ntb_hw.c revision 300516
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c 300516 2016-05-23 16:12:11Z mav $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/queue.h>
39#include <sys/rman.h>
40#include <sys/sbuf.h>
41#include <sys/sysctl.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44#include <machine/bus.h>
45#include <machine/pmap.h>
46#include <machine/resource.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49
50#include "ntb_regs.h"
51#include "ntb_hw.h"
52
53/*
54 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55 * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
70
71#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
72
73enum ntb_device_type {
74	NTB_XEON,
75	NTB_ATOM
76};
77
78/* ntb_conn_type are hardware numbers, cannot change. */
79enum ntb_conn_type {
80	NTB_CONN_TRANSPARENT = 0,
81	NTB_CONN_B2B = 1,
82	NTB_CONN_RP = 2,
83};
84
85enum ntb_b2b_direction {
86	NTB_DEV_USD = 0,
87	NTB_DEV_DSD = 1,
88};
89
90enum ntb_bar {
91	NTB_CONFIG_BAR = 0,
92	NTB_B2B_BAR_1,
93	NTB_B2B_BAR_2,
94	NTB_B2B_BAR_3,
95	NTB_MAX_BARS
96};
97
98/* Device features and workarounds */
99#define HAS_FEATURE(feature)	\
100	((ntb->features & (feature)) != 0)
101
102struct ntb_hw_info {
103	uint32_t		device_id;
104	const char		*desc;
105	enum ntb_device_type	type;
106	uint32_t		features;
107};
108
109struct ntb_pci_bar_info {
110	bus_space_tag_t		pci_bus_tag;
111	bus_space_handle_t	pci_bus_handle;
112	int			pci_resource_id;
113	struct resource		*pci_resource;
114	vm_paddr_t		pbase;
115	caddr_t			vbase;
116	vm_size_t		size;
117	vm_memattr_t		map_mode;
118
119	/* Configuration register offsets */
120	uint32_t		psz_off;
121	uint32_t		ssz_off;
122	uint32_t		pbarxlat_off;
123};
124
125struct ntb_int_info {
126	struct resource	*res;
127	int		rid;
128	void		*tag;
129};
130
131struct ntb_vec {
132	struct ntb_softc	*ntb;
133	uint32_t		num;
134};
135
136struct ntb_reg {
137	uint32_t	ntb_ctl;
138	uint32_t	lnk_sta;
139	uint8_t		db_size;
140	unsigned	mw_bar[NTB_MAX_BARS];
141};
142
143struct ntb_alt_reg {
144	uint32_t	db_bell;
145	uint32_t	db_mask;
146	uint32_t	spad;
147};
148
149struct ntb_xlat_reg {
150	uint32_t	bar0_base;
151	uint32_t	bar2_base;
152	uint32_t	bar4_base;
153	uint32_t	bar5_base;
154
155	uint32_t	bar2_xlat;
156	uint32_t	bar4_xlat;
157	uint32_t	bar5_xlat;
158
159	uint32_t	bar2_limit;
160	uint32_t	bar4_limit;
161	uint32_t	bar5_limit;
162};
163
164struct ntb_b2b_addr {
165	uint64_t	bar0_addr;
166	uint64_t	bar2_addr64;
167	uint64_t	bar4_addr64;
168	uint64_t	bar4_addr32;
169	uint64_t	bar5_addr32;
170};
171
172struct ntb_softc {
173	device_t		device;
174	enum ntb_device_type	type;
175	uint32_t		features;
176
177	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
178	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
179	uint32_t		allocated_interrupts;
180
181	struct callout		heartbeat_timer;
182	struct callout		lr_timer;
183
184	void			*ntb_ctx;
185	const struct ntb_ctx_ops *ctx_ops;
186	struct ntb_vec		*msix_vec;
187#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
188#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
189#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
190	struct mtx		ctx_lock;
191
192	uint32_t		ppd;
193	enum ntb_conn_type	conn_type;
194	enum ntb_b2b_direction	dev_type;
195
196	/* Offset of peer bar0 in B2B BAR */
197	uint64_t			b2b_off;
198	/* Memory window used to access peer bar0 */
199#define B2B_MW_DISABLED			UINT8_MAX
200	uint8_t				b2b_mw_idx;
201
202	uint8_t				mw_count;
203	uint8_t				spad_count;
204	uint8_t				db_count;
205	uint8_t				db_vec_count;
206	uint8_t				db_vec_shift;
207
208	/* Protects local db_mask. */
209#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
210#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
211#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
212	struct mtx			db_mask_lock;
213
214	volatile uint32_t		ntb_ctl;
215	volatile uint32_t		lnk_sta;
216
217	uint64_t			db_valid_mask;
218	uint64_t			db_link_mask;
219	uint64_t			db_mask;
220
221	int				last_ts;	/* ticks @ last irq */
222
223	const struct ntb_reg		*reg;
224	const struct ntb_alt_reg	*self_reg;
225	const struct ntb_alt_reg	*peer_reg;
226	const struct ntb_xlat_reg	*xlat_reg;
227};
228
229#ifdef __i386__
230static __inline uint64_t
231bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
232    bus_size_t offset)
233{
234
235	return (bus_space_read_4(tag, handle, offset) |
236	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
237}
238
239static __inline void
240bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
241    bus_size_t offset, uint64_t val)
242{
243
244	bus_space_write_4(tag, handle, offset, val);
245	bus_space_write_4(tag, handle, offset + 4, val >> 32);
246}
247#endif
248
249#define ntb_bar_read(SIZE, bar, offset) \
250	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
251	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
252#define ntb_bar_write(SIZE, bar, offset, val) \
253	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
254	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
255#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
256#define ntb_reg_write(SIZE, offset, val) \
257	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
258#define ntb_mw_read(SIZE, offset) \
259	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
260#define ntb_mw_write(SIZE, offset, val) \
261	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
262		offset, val)
263
264static int ntb_probe(device_t device);
265static int ntb_attach(device_t device);
266static int ntb_detach(device_t device);
267static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
268static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
269static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
270static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
271    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
272static int ntb_map_pci_bars(struct ntb_softc *ntb);
273static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
274    vm_memattr_t);
275static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
276    const char *);
277static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
278static int map_memory_window_bar(struct ntb_softc *ntb,
279    struct ntb_pci_bar_info *bar);
280static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
281static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
282static int ntb_init_isr(struct ntb_softc *ntb);
283static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
284static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
285static void ntb_teardown_interrupts(struct ntb_softc *ntb);
286static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
287static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
288static void ndev_vec_isr(void *arg);
289static void ndev_irq_isr(void *arg);
290static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
291static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
292static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
293static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
294static void ntb_free_msix_vec(struct ntb_softc *ntb);
295static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
296static void ntb_detect_max_mw(struct ntb_softc *ntb);
297static int ntb_detect_xeon(struct ntb_softc *ntb);
298static int ntb_detect_atom(struct ntb_softc *ntb);
299static int ntb_xeon_init_dev(struct ntb_softc *ntb);
300static int ntb_atom_init_dev(struct ntb_softc *ntb);
301static void ntb_teardown_xeon(struct ntb_softc *ntb);
302static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
303static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
304    enum ntb_bar regbar);
305static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
306    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
307static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
308    enum ntb_bar idx);
309static int xeon_setup_b2b_mw(struct ntb_softc *,
310    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
311static inline bool link_is_up(struct ntb_softc *ntb);
312static inline bool atom_link_is_err(struct ntb_softc *ntb);
313static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
314static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
315static void atom_link_hb(void *arg);
316static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
317static void recover_atom_link(void *arg);
318static bool ntb_poll_link(struct ntb_softc *ntb);
319static void save_bar_parameters(struct ntb_pci_bar_info *bar);
320static void ntb_sysctl_init(struct ntb_softc *);
321static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
322static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
323static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
324
325static unsigned g_ntb_hw_debug_level;
326TUNABLE_INT("hw.ntb.debug_level", &g_ntb_hw_debug_level);
327SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
328    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
329#define ntb_printf(lvl, ...) do {				\
330	if ((lvl) <= g_ntb_hw_debug_level) {			\
331		device_printf(ntb->device, __VA_ARGS__);	\
332	}							\
333} while (0)
334
335#define	_NTB_PAT_UC	0
336#define	_NTB_PAT_WC	1
337#define	_NTB_PAT_WT	4
338#define	_NTB_PAT_WP	5
339#define	_NTB_PAT_WB	6
340#define	_NTB_PAT_UCM	7
341static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
342TUNABLE_INT("hw.ntb.default_mw_pat", &g_ntb_mw_pat);
343SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
344    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
345    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
346    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
347    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
348    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
349    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
350    "UC-: " __XSTRING(_NTB_PAT_UCM));
351
352static inline vm_memattr_t
353ntb_pat_flags(void)
354{
355
356	switch (g_ntb_mw_pat) {
357	case _NTB_PAT_WC:
358		return (VM_MEMATTR_WRITE_COMBINING);
359	case _NTB_PAT_WT:
360		return (VM_MEMATTR_WRITE_THROUGH);
361	case _NTB_PAT_WP:
362		return (VM_MEMATTR_WRITE_PROTECTED);
363	case _NTB_PAT_WB:
364		return (VM_MEMATTR_WRITE_BACK);
365	case _NTB_PAT_UCM:
366		return (VM_MEMATTR_WEAK_UNCACHEABLE);
367	case _NTB_PAT_UC:
368		/* FALLTHROUGH */
369	default:
370		return (VM_MEMATTR_UNCACHEABLE);
371	}
372}
373
374/*
375 * Well, this obviously doesn't belong here, but it doesn't seem to exist
376 * anywhere better yet.
377 */
378static inline const char *
379ntb_vm_memattr_to_str(vm_memattr_t pat)
380{
381
382	switch (pat) {
383	case VM_MEMATTR_WRITE_COMBINING:
384		return ("WRITE_COMBINING");
385	case VM_MEMATTR_WRITE_THROUGH:
386		return ("WRITE_THROUGH");
387	case VM_MEMATTR_WRITE_PROTECTED:
388		return ("WRITE_PROTECTED");
389	case VM_MEMATTR_WRITE_BACK:
390		return ("WRITE_BACK");
391	case VM_MEMATTR_WEAK_UNCACHEABLE:
392		return ("UNCACHED");
393	case VM_MEMATTR_UNCACHEABLE:
394		return ("UNCACHEABLE");
395	default:
396		return ("UNKNOWN");
397	}
398}
399
400static int g_ntb_mw_idx = -1;
401TUNABLE_INT("hw.ntb.b2b_mw_idx", &g_ntb_mw_idx);
402SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
403    0, "Use this memory window to access the peer NTB registers.  A "
404    "non-negative value starts from the first MW index; a negative value "
405    "starts from the last MW index.  The default is -1, i.e., the last "
406    "available memory window.  Both sides of the NTB MUST set the same "
407    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
408
409static struct ntb_hw_info pci_ids[] = {
410	/* XXX: PS/SS IDs left out until they are supported. */
411	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
412		NTB_ATOM, 0 },
413
414	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
415		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
416	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
417		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
418	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
419		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
420		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
421	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
422		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
423		    NTB_SB01BASE_LOCKUP },
424	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
425		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
426		    NTB_SB01BASE_LOCKUP },
427
428	{ 0x00000000, NULL, NTB_ATOM, 0 }
429};
430
431static const struct ntb_reg atom_reg = {
432	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
433	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
434	.db_size = sizeof(uint64_t),
435	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
436};
437
438static const struct ntb_alt_reg atom_pri_reg = {
439	.db_bell = ATOM_PDOORBELL_OFFSET,
440	.db_mask = ATOM_PDBMSK_OFFSET,
441	.spad = ATOM_SPAD_OFFSET,
442};
443
444static const struct ntb_alt_reg atom_b2b_reg = {
445	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
446	.spad = ATOM_B2B_SPAD_OFFSET,
447};
448
449static const struct ntb_xlat_reg atom_sec_xlat = {
450#if 0
451	/* "FIXME" says the Linux driver. */
452	.bar0_base = ATOM_SBAR0BASE_OFFSET,
453	.bar2_base = ATOM_SBAR2BASE_OFFSET,
454	.bar4_base = ATOM_SBAR4BASE_OFFSET,
455
456	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
457	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
458#endif
459
460	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
461	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
462};
463
464static const struct ntb_reg xeon_reg = {
465	.ntb_ctl = XEON_NTBCNTL_OFFSET,
466	.lnk_sta = XEON_LINK_STATUS_OFFSET,
467	.db_size = sizeof(uint16_t),
468	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
469};
470
471static const struct ntb_alt_reg xeon_pri_reg = {
472	.db_bell = XEON_PDOORBELL_OFFSET,
473	.db_mask = XEON_PDBMSK_OFFSET,
474	.spad = XEON_SPAD_OFFSET,
475};
476
477static const struct ntb_alt_reg xeon_b2b_reg = {
478	.db_bell = XEON_B2B_DOORBELL_OFFSET,
479	.spad = XEON_B2B_SPAD_OFFSET,
480};
481
482static const struct ntb_xlat_reg xeon_sec_xlat = {
483	.bar0_base = XEON_SBAR0BASE_OFFSET,
484	.bar2_base = XEON_SBAR2BASE_OFFSET,
485	.bar4_base = XEON_SBAR4BASE_OFFSET,
486	.bar5_base = XEON_SBAR5BASE_OFFSET,
487
488	.bar2_limit = XEON_SBAR2LMT_OFFSET,
489	.bar4_limit = XEON_SBAR4LMT_OFFSET,
490	.bar5_limit = XEON_SBAR5LMT_OFFSET,
491
492	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
493	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
494	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
495};
496
497static struct ntb_b2b_addr xeon_b2b_usd_addr = {
498	.bar0_addr = XEON_B2B_BAR0_ADDR,
499	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
500	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
501	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
502	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
503};
504
505static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
506	.bar0_addr = XEON_B2B_BAR0_ADDR,
507	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
508	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
509	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
510	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
511};
512
513SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
514    "B2B MW segment overrides -- MUST be the same on both sides");
515
516TUNABLE_QUAD("hw.ntb.usd_bar2_addr64", &xeon_b2b_usd_addr.bar2_addr64);
517SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
518    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
519    "hardware, use this 64-bit address on the bus between the NTB devices for "
520    "the window at BAR2, on the upstream side of the link.  MUST be the same "
521    "address on both sides.");
522TUNABLE_QUAD("hw.ntb.usd_bar4_addr64", &xeon_b2b_usd_addr.bar4_addr64);
523SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
524    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
525TUNABLE_QUAD("hw.ntb.usd_bar4_addr32", &xeon_b2b_usd_addr.bar4_addr32);
526SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
527    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
528    "(split-BAR mode).");
529TUNABLE_QUAD("hw.ntb.usd_bar5_addr32", &xeon_b2b_usd_addr.bar5_addr32);
530SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
531    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
532    "(split-BAR mode).");
533
534TUNABLE_QUAD("hw.ntb.dsd_bar2_addr64", &xeon_b2b_dsd_addr.bar2_addr64);
535SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
536    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
537    "hardware, use this 64-bit address on the bus between the NTB devices for "
538    "the window at BAR2, on the downstream side of the link.  MUST be the same"
539    " address on both sides.");
540TUNABLE_QUAD("hw.ntb.dsd_bar4_addr64", &xeon_b2b_dsd_addr.bar4_addr64);
541SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
542    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
543TUNABLE_QUAD("hw.ntb.dsd_bar4_addr32", &xeon_b2b_dsd_addr.bar4_addr32);
544SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
545    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
546    "(split-BAR mode).");
547TUNABLE_QUAD("hw.ntb.dsd_bar5_addr32", &xeon_b2b_dsd_addr.bar5_addr32);
548SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
549    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
550    "(split-BAR mode).");
551
552/*
553 * OS <-> Driver interface structures
554 */
555MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
556
557static device_method_t ntb_pci_methods[] = {
558	/* Device interface */
559	DEVMETHOD(device_probe,     ntb_probe),
560	DEVMETHOD(device_attach,    ntb_attach),
561	DEVMETHOD(device_detach,    ntb_detach),
562	DEVMETHOD_END
563};
564
565static driver_t ntb_pci_driver = {
566	"ntb_hw",
567	ntb_pci_methods,
568	sizeof(struct ntb_softc),
569};
570
571static devclass_t ntb_devclass;
572DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
573MODULE_VERSION(ntb_hw, 1);
574
575SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
576
577/*
578 * OS <-> Driver linkage functions
579 */
580static int
581ntb_probe(device_t device)
582{
583	struct ntb_hw_info *p;
584
585	p = ntb_get_device_info(pci_get_devid(device));
586	if (p == NULL)
587		return (ENXIO);
588
589	device_set_desc(device, p->desc);
590	return (0);
591}
592
593static int
594ntb_attach(device_t device)
595{
596	struct ntb_softc *ntb;
597	struct ntb_hw_info *p;
598	int error;
599
600	ntb = DEVICE2SOFTC(device);
601	p = ntb_get_device_info(pci_get_devid(device));
602
603	ntb->device = device;
604	ntb->type = p->type;
605	ntb->features = p->features;
606	ntb->b2b_mw_idx = B2B_MW_DISABLED;
607
608	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
609	callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE);
610	callout_init(&ntb->lr_timer, CALLOUT_MPSAFE);
611	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
612	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
613
614	if (ntb->type == NTB_ATOM)
615		error = ntb_detect_atom(ntb);
616	else
617		error = ntb_detect_xeon(ntb);
618	if (error != 0)
619		goto out;
620
621	ntb_detect_max_mw(ntb);
622
623	pci_enable_busmaster(ntb->device);
624
625	error = ntb_map_pci_bars(ntb);
626	if (error != 0)
627		goto out;
628	if (ntb->type == NTB_ATOM)
629		error = ntb_atom_init_dev(ntb);
630	else
631		error = ntb_xeon_init_dev(ntb);
632	if (error != 0)
633		goto out;
634
635	ntb_poll_link(ntb);
636
637	ntb_sysctl_init(ntb);
638
639out:
640	if (error != 0)
641		ntb_detach(device);
642	return (error);
643}
644
645static int
646ntb_detach(device_t device)
647{
648	struct ntb_softc *ntb;
649
650	ntb = DEVICE2SOFTC(device);
651
652	if (ntb->self_reg != NULL)
653		ntb_db_set_mask(ntb, ntb->db_valid_mask);
654	callout_drain(&ntb->heartbeat_timer);
655	callout_drain(&ntb->lr_timer);
656	pci_disable_busmaster(ntb->device);
657	if (ntb->type == NTB_XEON)
658		ntb_teardown_xeon(ntb);
659	ntb_teardown_interrupts(ntb);
660
661	mtx_destroy(&ntb->db_mask_lock);
662	mtx_destroy(&ntb->ctx_lock);
663
664	ntb_unmap_pci_bar(ntb);
665
666	return (0);
667}
668
669/*
670 * Driver internal routines
671 */
672static inline enum ntb_bar
673ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
674{
675
676	KASSERT(mw < ntb->mw_count,
677	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
678	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
679
680	return (ntb->reg->mw_bar[mw]);
681}
682
683static inline bool
684bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
685{
686	/* XXX This assertion could be stronger. */
687	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
688	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
689}
690
691static inline void
692bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
693    uint32_t *xlat, uint32_t *lmt)
694{
695	uint32_t basev, lmtv, xlatv;
696
697	switch (bar) {
698	case NTB_B2B_BAR_1:
699		basev = ntb->xlat_reg->bar2_base;
700		lmtv = ntb->xlat_reg->bar2_limit;
701		xlatv = ntb->xlat_reg->bar2_xlat;
702		break;
703	case NTB_B2B_BAR_2:
704		basev = ntb->xlat_reg->bar4_base;
705		lmtv = ntb->xlat_reg->bar4_limit;
706		xlatv = ntb->xlat_reg->bar4_xlat;
707		break;
708	case NTB_B2B_BAR_3:
709		basev = ntb->xlat_reg->bar5_base;
710		lmtv = ntb->xlat_reg->bar5_limit;
711		xlatv = ntb->xlat_reg->bar5_xlat;
712		break;
713	default:
714		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
715		    ("bad bar"));
716		basev = lmtv = xlatv = 0;
717		break;
718	}
719
720	if (base != NULL)
721		*base = basev;
722	if (xlat != NULL)
723		*xlat = xlatv;
724	if (lmt != NULL)
725		*lmt = lmtv;
726}
727
728static int
729ntb_map_pci_bars(struct ntb_softc *ntb)
730{
731	int rc;
732
733	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
734	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
735	if (rc != 0)
736		goto out;
737
738	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
739	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
740	if (rc != 0)
741		goto out;
742	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
743	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
744	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
745
746	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
747	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
748	if (rc != 0)
749		goto out;
750	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
751	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
752	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
753
754	if (!HAS_FEATURE(NTB_SPLIT_BAR))
755		goto out;
756
757	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
758	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
759	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
760	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
761	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
762
763out:
764	if (rc != 0)
765		device_printf(ntb->device,
766		    "unable to allocate pci resource\n");
767	return (rc);
768}
769
770static void
771print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
772    const char *kind)
773{
774
775	device_printf(ntb->device,
776	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
777	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
778	    (char *)bar->vbase + bar->size - 1,
779	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
780	    (uintmax_t)bar->size, kind);
781}
782
783static int
784map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
785{
786
787	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
788	    &bar->pci_resource_id, RF_ACTIVE);
789	if (bar->pci_resource == NULL)
790		return (ENXIO);
791
792	save_bar_parameters(bar);
793	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
794	print_map_success(ntb, bar, "mmr");
795	return (0);
796}
797
798static int
799map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
800{
801	int rc;
802	vm_memattr_t mapmode;
803	uint8_t bar_size_bits = 0;
804
805	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
806	    &bar->pci_resource_id, RF_ACTIVE);
807
808	if (bar->pci_resource == NULL)
809		return (ENXIO);
810
811	save_bar_parameters(bar);
812	/*
813	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
814	 * hardware issue. To work around this, query the size it should be
815	 * configured to by the device and modify the resource to correspond to
816	 * this new size. The BIOS on systems with this problem is required to
817	 * provide enough address space to allow the driver to make this change
818	 * safely.
819	 *
820	 * Ideally I could have just specified the size when I allocated the
821	 * resource like:
822	 *  bus_alloc_resource(ntb->device,
823	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
824	 *	1ul << bar_size_bits, RF_ACTIVE);
825	 * but the PCI driver does not honor the size in this call, so we have
826	 * to modify it after the fact.
827	 */
828	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
829		if (bar->pci_resource_id == PCIR_BAR(2))
830			bar_size_bits = pci_read_config(ntb->device,
831			    XEON_PBAR23SZ_OFFSET, 1);
832		else
833			bar_size_bits = pci_read_config(ntb->device,
834			    XEON_PBAR45SZ_OFFSET, 1);
835
836		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
837		    bar->pci_resource, bar->pbase,
838		    bar->pbase + (1ul << bar_size_bits) - 1);
839		if (rc != 0) {
840			device_printf(ntb->device,
841			    "unable to resize bar\n");
842			return (rc);
843		}
844
845		save_bar_parameters(bar);
846	}
847
848	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
849	print_map_success(ntb, bar, "mw");
850
851	/*
852	 * Optionally, mark MW BARs as anything other than UC to improve
853	 * performance.
854	 */
855	mapmode = ntb_pat_flags();
856	if (mapmode == bar->map_mode)
857		return (0);
858
859	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
860	if (rc == 0) {
861		bar->map_mode = mapmode;
862		device_printf(ntb->device,
863		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
864		    "%s.\n",
865		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
866		    (char *)bar->vbase + bar->size - 1,
867		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
868		    ntb_vm_memattr_to_str(mapmode));
869	} else
870		device_printf(ntb->device,
871		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
872		    "%s: %d\n",
873		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
874		    (char *)bar->vbase + bar->size - 1,
875		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
876		    ntb_vm_memattr_to_str(mapmode), rc);
877		/* Proceed anyway */
878	return (0);
879}
880
881static void
882ntb_unmap_pci_bar(struct ntb_softc *ntb)
883{
884	struct ntb_pci_bar_info *current_bar;
885	int i;
886
887	for (i = 0; i < NTB_MAX_BARS; i++) {
888		current_bar = &ntb->bar_info[i];
889		if (current_bar->pci_resource != NULL)
890			bus_release_resource(ntb->device, SYS_RES_MEMORY,
891			    current_bar->pci_resource_id,
892			    current_bar->pci_resource);
893	}
894}
895
896static int
897ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
898{
899	uint32_t i;
900	int rc;
901
902	for (i = 0; i < num_vectors; i++) {
903		ntb->int_info[i].rid = i + 1;
904		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
905		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
906		if (ntb->int_info[i].res == NULL) {
907			device_printf(ntb->device,
908			    "bus_alloc_resource failed\n");
909			return (ENOMEM);
910		}
911		ntb->int_info[i].tag = NULL;
912		ntb->allocated_interrupts++;
913		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
914		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
915		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
916		if (rc != 0) {
917			device_printf(ntb->device, "bus_setup_intr failed\n");
918			return (ENXIO);
919		}
920	}
921	return (0);
922}
923
924/*
925 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
926 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
927 * should be okay.  This tunable should enable us to test that hypothesis
928 * when someone gets their hands on some Xeon hardware.
929 */
930static int ntb_force_remap_mode;
931TUNABLE_INT("hw.ntb.force_remap_mode", &ntb_force_remap_mode);
932SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
933    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
934    " to a smaller number of ithreads, even if the desired number are "
935    "available");
936
937/*
938 * In case it is NOT ok, give consumers an abort button.
939 */
940static int ntb_prefer_intx;
941TUNABLE_INT("hw.ntb.prefer_intx_to_remap", &ntb_prefer_intx);
942SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
943    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
944    "than remapping MSI-X messages over available slots (match Linux driver "
945    "behavior)");
946
947/*
948 * Remap the desired number of MSI-X messages to available ithreads in a simple
949 * round-robin fashion.
950 */
951static int
952ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
953{
954	u_int *vectors;
955	uint32_t i;
956	int rc;
957
958	if (ntb_prefer_intx != 0)
959		return (ENXIO);
960
961	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
962
963	for (i = 0; i < desired; i++)
964		vectors[i] = (i % avail) + 1;
965
966	rc = pci_remap_msix(dev, desired, vectors);
967	free(vectors, M_NTB);
968	return (rc);
969}
970
971static int
972ntb_init_isr(struct ntb_softc *ntb)
973{
974	uint32_t desired_vectors, num_vectors;
975	int rc;
976
977	ntb->allocated_interrupts = 0;
978	ntb->last_ts = ticks;
979
980	/*
981	 * Mask all doorbell interrupts.
982	 */
983	ntb_db_set_mask(ntb, ntb->db_valid_mask);
984
985	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
986	    ntb->db_count);
987	if (desired_vectors >= 1) {
988		rc = pci_alloc_msix(ntb->device, &num_vectors);
989
990		if (ntb_force_remap_mode != 0 && rc == 0 &&
991		    num_vectors == desired_vectors)
992			num_vectors--;
993
994		if (rc == 0 && num_vectors < desired_vectors) {
995			rc = ntb_remap_msix(ntb->device, desired_vectors,
996			    num_vectors);
997			if (rc == 0)
998				num_vectors = desired_vectors;
999			else
1000				pci_release_msi(ntb->device);
1001		}
1002		if (rc != 0)
1003			num_vectors = 1;
1004	} else
1005		num_vectors = 1;
1006
1007	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
1008		ntb->db_vec_count = 1;
1009		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
1010		rc = ntb_setup_legacy_interrupt(ntb);
1011	} else {
1012		ntb_create_msix_vec(ntb, num_vectors);
1013		rc = ntb_setup_msix(ntb, num_vectors);
1014	}
1015	if (rc != 0) {
1016		device_printf(ntb->device,
1017		    "Error allocating interrupts: %d\n", rc);
1018		ntb_free_msix_vec(ntb);
1019	}
1020
1021	return (rc);
1022}
1023
1024static int
1025ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1026{
1027	int rc;
1028
1029	ntb->int_info[0].rid = 0;
1030	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1031	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1032	if (ntb->int_info[0].res == NULL) {
1033		device_printf(ntb->device, "bus_alloc_resource failed\n");
1034		return (ENOMEM);
1035	}
1036
1037	ntb->int_info[0].tag = NULL;
1038	ntb->allocated_interrupts = 1;
1039
1040	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1041	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1042	    ntb, &ntb->int_info[0].tag);
1043	if (rc != 0) {
1044		device_printf(ntb->device, "bus_setup_intr failed\n");
1045		return (ENXIO);
1046	}
1047
1048	return (0);
1049}
1050
1051static void
1052ntb_teardown_interrupts(struct ntb_softc *ntb)
1053{
1054	struct ntb_int_info *current_int;
1055	int i;
1056
1057	for (i = 0; i < ntb->allocated_interrupts; i++) {
1058		current_int = &ntb->int_info[i];
1059		if (current_int->tag != NULL)
1060			bus_teardown_intr(ntb->device, current_int->res,
1061			    current_int->tag);
1062
1063		if (current_int->res != NULL)
1064			bus_release_resource(ntb->device, SYS_RES_IRQ,
1065			    rman_get_rid(current_int->res), current_int->res);
1066	}
1067
1068	ntb_free_msix_vec(ntb);
1069	pci_release_msi(ntb->device);
1070}
1071
1072/*
1073 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1074 * out to make code clearer.
1075 */
1076static inline uint64_t
1077db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1078{
1079
1080	if (ntb->type == NTB_ATOM)
1081		return (ntb_reg_read(8, regoff));
1082
1083	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1084
1085	return (ntb_reg_read(2, regoff));
1086}
1087
1088static inline void
1089db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1090{
1091
1092	KASSERT((val & ~ntb->db_valid_mask) == 0,
1093	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1094	     (uintmax_t)(val & ~ntb->db_valid_mask),
1095	     (uintmax_t)ntb->db_valid_mask));
1096
1097	if (regoff == ntb->self_reg->db_mask)
1098		DB_MASK_ASSERT(ntb, MA_OWNED);
1099	db_iowrite_raw(ntb, regoff, val);
1100}
1101
1102static inline void
1103db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1104{
1105
1106	if (ntb->type == NTB_ATOM) {
1107		ntb_reg_write(8, regoff, val);
1108		return;
1109	}
1110
1111	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1112	ntb_reg_write(2, regoff, (uint16_t)val);
1113}
1114
1115void
1116ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1117{
1118
1119	DB_MASK_LOCK(ntb);
1120	ntb->db_mask |= bits;
1121	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1122	DB_MASK_UNLOCK(ntb);
1123}
1124
1125void
1126ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1127{
1128
1129	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1130	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1131	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1132	     (uintmax_t)ntb->db_valid_mask));
1133
1134	DB_MASK_LOCK(ntb);
1135	ntb->db_mask &= ~bits;
1136	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1137	DB_MASK_UNLOCK(ntb);
1138}
1139
1140uint64_t
1141ntb_db_read(struct ntb_softc *ntb)
1142{
1143
1144	return (db_ioread(ntb, ntb->self_reg->db_bell));
1145}
1146
1147void
1148ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1149{
1150
1151	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1152	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1153	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1154	     (uintmax_t)ntb->db_valid_mask));
1155
1156	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1157}
1158
1159static inline uint64_t
1160ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1161{
1162	uint64_t shift, mask;
1163
1164	shift = ntb->db_vec_shift;
1165	mask = (1ull << shift) - 1;
1166	return (mask << (shift * db_vector));
1167}
1168
1169static void
1170ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1171{
1172	uint64_t vec_mask;
1173
1174	ntb->last_ts = ticks;
1175	vec_mask = ntb_vec_mask(ntb, vec);
1176
1177	if ((vec_mask & ntb->db_link_mask) != 0) {
1178		if (ntb_poll_link(ntb))
1179			ntb_link_event(ntb);
1180	}
1181
1182	if ((vec_mask & ntb->db_valid_mask) != 0)
1183		ntb_db_event(ntb, vec);
1184}
1185
1186static void
1187ndev_vec_isr(void *arg)
1188{
1189	struct ntb_vec *nvec = arg;
1190
1191	ntb_interrupt(nvec->ntb, nvec->num);
1192}
1193
1194static void
1195ndev_irq_isr(void *arg)
1196{
1197	/* If we couldn't set up MSI-X, we only have the one vector. */
1198	ntb_interrupt(arg, 0);
1199}
1200
1201static int
1202ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1203{
1204	uint32_t i;
1205
1206	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1207	    M_ZERO | M_WAITOK);
1208	for (i = 0; i < num_vectors; i++) {
1209		ntb->msix_vec[i].num = i;
1210		ntb->msix_vec[i].ntb = ntb;
1211	}
1212
1213	return (0);
1214}
1215
1216static void
1217ntb_free_msix_vec(struct ntb_softc *ntb)
1218{
1219
1220	if (ntb->msix_vec == NULL)
1221		return;
1222
1223	free(ntb->msix_vec, M_NTB);
1224	ntb->msix_vec = NULL;
1225}
1226
1227static struct ntb_hw_info *
1228ntb_get_device_info(uint32_t device_id)
1229{
1230	struct ntb_hw_info *ep = pci_ids;
1231
1232	while (ep->device_id) {
1233		if (ep->device_id == device_id)
1234			return (ep);
1235		++ep;
1236	}
1237	return (NULL);
1238}
1239
1240static void
1241ntb_teardown_xeon(struct ntb_softc *ntb)
1242{
1243
1244	if (ntb->reg != NULL)
1245		ntb_link_disable(ntb);
1246}
1247
1248static void
1249ntb_detect_max_mw(struct ntb_softc *ntb)
1250{
1251
1252	if (ntb->type == NTB_ATOM) {
1253		ntb->mw_count = ATOM_MW_COUNT;
1254		return;
1255	}
1256
1257	if (HAS_FEATURE(NTB_SPLIT_BAR))
1258		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1259	else
1260		ntb->mw_count = XEON_SNB_MW_COUNT;
1261}
1262
1263static int
1264ntb_detect_xeon(struct ntb_softc *ntb)
1265{
1266	uint8_t ppd, conn_type;
1267
1268	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1269	ntb->ppd = ppd;
1270
1271	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1272		ntb->dev_type = NTB_DEV_DSD;
1273	else
1274		ntb->dev_type = NTB_DEV_USD;
1275
1276	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1277		ntb->features |= NTB_SPLIT_BAR;
1278
1279	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1280	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1281		ntb->features |= NTB_SDOORBELL_LOCKUP;
1282
1283	conn_type = ppd & XEON_PPD_CONN_TYPE;
1284	switch (conn_type) {
1285	case NTB_CONN_B2B:
1286		ntb->conn_type = conn_type;
1287		break;
1288	case NTB_CONN_RP:
1289	case NTB_CONN_TRANSPARENT:
1290	default:
1291		device_printf(ntb->device, "Unsupported connection type: %u\n",
1292		    (unsigned)conn_type);
1293		return (ENXIO);
1294	}
1295	return (0);
1296}
1297
1298static int
1299ntb_detect_atom(struct ntb_softc *ntb)
1300{
1301	uint32_t ppd, conn_type;
1302
1303	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1304	ntb->ppd = ppd;
1305
1306	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1307		ntb->dev_type = NTB_DEV_DSD;
1308	else
1309		ntb->dev_type = NTB_DEV_USD;
1310
1311	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1312	switch (conn_type) {
1313	case NTB_CONN_B2B:
1314		ntb->conn_type = conn_type;
1315		break;
1316	default:
1317		device_printf(ntb->device, "Unsupported NTB configuration\n");
1318		return (ENXIO);
1319	}
1320	return (0);
1321}
1322
1323static int
1324ntb_xeon_init_dev(struct ntb_softc *ntb)
1325{
1326	int rc;
1327
1328	ntb->spad_count		= XEON_SPAD_COUNT;
1329	ntb->db_count		= XEON_DB_COUNT;
1330	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1331	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1332	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1333
1334	if (ntb->conn_type != NTB_CONN_B2B) {
1335		device_printf(ntb->device, "Connection type %d not supported\n",
1336		    ntb->conn_type);
1337		return (ENXIO);
1338	}
1339
1340	ntb->reg = &xeon_reg;
1341	ntb->self_reg = &xeon_pri_reg;
1342	ntb->peer_reg = &xeon_b2b_reg;
1343	ntb->xlat_reg = &xeon_sec_xlat;
1344
1345	/*
1346	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1347	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1348	 * which may hang the system.  To workaround this, use a memory
1349	 * window to access the interrupt and scratch pad registers on the
1350	 * remote system.
1351	 */
1352	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
1353		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1354		    ntb->mw_count;
1355		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1356		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1357		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE);
1358		KASSERT(rc == 0, ("shouldn't fail"));
1359	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1360		/*
1361		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1362		 * mirrored to the remote system.  Shrink the number of bits by one,
1363		 * since bit 14 is the last bit.
1364		 *
1365		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1366		 * anyway.  Nor for non-B2B connection types.
1367		 */
1368		ntb->db_count = XEON_DB_COUNT - 1;
1369
1370	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1371
1372	if (ntb->dev_type == NTB_DEV_USD)
1373		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1374		    &xeon_b2b_usd_addr);
1375	else
1376		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1377		    &xeon_b2b_dsd_addr);
1378	if (rc != 0)
1379		return (rc);
1380
1381	/* Enable Bus Master and Memory Space on the secondary side */
1382	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1383	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1384
1385	/*
1386	 * Mask all doorbell interrupts.
1387	 */
1388	ntb_db_set_mask(ntb, ntb->db_valid_mask);
1389
1390	rc = ntb_init_isr(ntb);
1391	return (rc);
1392}
1393
1394static int
1395ntb_atom_init_dev(struct ntb_softc *ntb)
1396{
1397	int error;
1398
1399	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1400	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1401
1402	ntb->spad_count		 = ATOM_SPAD_COUNT;
1403	ntb->db_count		 = ATOM_DB_COUNT;
1404	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1405	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1406	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1407
1408	ntb->reg = &atom_reg;
1409	ntb->self_reg = &atom_pri_reg;
1410	ntb->peer_reg = &atom_b2b_reg;
1411	ntb->xlat_reg = &atom_sec_xlat;
1412
1413	/*
1414	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1415	 * resolved.  Mask transaction layer internal parity errors.
1416	 */
1417	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1418
1419	configure_atom_secondary_side_bars(ntb);
1420
1421	/* Enable Bus Master and Memory Space on the secondary side */
1422	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1423	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1424
1425	error = ntb_init_isr(ntb);
1426	if (error != 0)
1427		return (error);
1428
1429	/* Initiate PCI-E link training */
1430	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1431
1432	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1433
1434	return (0);
1435}
1436
1437/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1438static void
1439configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1440{
1441
1442	if (ntb->dev_type == NTB_DEV_USD) {
1443		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1444		    XEON_B2B_BAR2_ADDR64);
1445		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1446		    XEON_B2B_BAR4_ADDR64);
1447		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1448		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1449	} else {
1450		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1451		    XEON_B2B_BAR2_ADDR64);
1452		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1453		    XEON_B2B_BAR4_ADDR64);
1454		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1455		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1456	}
1457}
1458
1459
1460/*
1461 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1462 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1463 * remains for use by a higher layer.
1464 *
1465 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1466 * MW size is sufficiently large.
1467 */
1468static unsigned int ntb_b2b_mw_share;
1469TUNABLE_INT("hw.ntb.b2b_mw_share", &ntb_b2b_mw_share);
1470SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1471    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1472    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1473    "value here.");
1474
1475static void
1476xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1477    enum ntb_bar regbar)
1478{
1479	struct ntb_pci_bar_info *bar;
1480	uint8_t bar_sz;
1481
1482	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1483		return;
1484
1485	bar = &ntb->bar_info[idx];
1486	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1487	if (idx == regbar) {
1488		if (ntb->b2b_off != 0)
1489			bar_sz--;
1490		else
1491			bar_sz = 0;
1492	}
1493	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1494	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1495	(void)bar_sz;
1496}
1497
1498static void
1499xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1500    enum ntb_bar idx, enum ntb_bar regbar)
1501{
1502	uint64_t reg_val;
1503	uint32_t base_reg, lmt_reg;
1504
1505	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1506	if (idx == regbar)
1507		bar_addr += ntb->b2b_off;
1508
1509	if (!bar_is_64bit(ntb, idx)) {
1510		ntb_reg_write(4, base_reg, bar_addr);
1511		reg_val = ntb_reg_read(4, base_reg);
1512		(void)reg_val;
1513
1514		ntb_reg_write(4, lmt_reg, bar_addr);
1515		reg_val = ntb_reg_read(4, lmt_reg);
1516		(void)reg_val;
1517	} else {
1518		ntb_reg_write(8, base_reg, bar_addr);
1519		reg_val = ntb_reg_read(8, base_reg);
1520		(void)reg_val;
1521
1522		ntb_reg_write(8, lmt_reg, bar_addr);
1523		reg_val = ntb_reg_read(8, lmt_reg);
1524		(void)reg_val;
1525	}
1526}
1527
1528static void
1529xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1530{
1531	struct ntb_pci_bar_info *bar;
1532
1533	bar = &ntb->bar_info[idx];
1534	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1535		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1536		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1537	} else {
1538		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1539		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1540	}
1541	(void)base_addr;
1542}
1543
1544static int
1545xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1546    const struct ntb_b2b_addr *peer_addr)
1547{
1548	struct ntb_pci_bar_info *b2b_bar;
1549	vm_size_t bar_size;
1550	uint64_t bar_addr;
1551	enum ntb_bar b2b_bar_num, i;
1552
1553	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1554		b2b_bar = NULL;
1555		b2b_bar_num = NTB_CONFIG_BAR;
1556		ntb->b2b_off = 0;
1557	} else {
1558		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1559		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1560		    ("invalid b2b mw bar"));
1561
1562		b2b_bar = &ntb->bar_info[b2b_bar_num];
1563		bar_size = b2b_bar->size;
1564
1565		if (ntb_b2b_mw_share != 0 &&
1566		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1567			ntb->b2b_off = bar_size >> 1;
1568		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1569			ntb->b2b_off = 0;
1570		} else {
1571			device_printf(ntb->device,
1572			    "B2B bar size is too small!\n");
1573			return (EIO);
1574		}
1575	}
1576
1577	/*
1578	 * Reset the secondary bar sizes to match the primary bar sizes.
1579	 * (Except, disable or halve the size of the B2B secondary bar.)
1580	 */
1581	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1582		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1583
1584	bar_addr = 0;
1585	if (b2b_bar_num == NTB_CONFIG_BAR)
1586		bar_addr = addr->bar0_addr;
1587	else if (b2b_bar_num == NTB_B2B_BAR_1)
1588		bar_addr = addr->bar2_addr64;
1589	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1590		bar_addr = addr->bar4_addr64;
1591	else if (b2b_bar_num == NTB_B2B_BAR_2)
1592		bar_addr = addr->bar4_addr32;
1593	else if (b2b_bar_num == NTB_B2B_BAR_3)
1594		bar_addr = addr->bar5_addr32;
1595	else
1596		KASSERT(false, ("invalid bar"));
1597
1598	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1599
1600	/*
1601	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1602	 * register BAR.  The B2B BAR is either disabled above or configured
1603	 * half-size.  It starts at PBAR xlat + offset.
1604	 *
1605	 * Also set up incoming BAR limits == base (zero length window).
1606	 */
1607	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1608	    b2b_bar_num);
1609	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1610		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1611		    NTB_B2B_BAR_2, b2b_bar_num);
1612		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1613		    NTB_B2B_BAR_3, b2b_bar_num);
1614	} else
1615		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1616		    NTB_B2B_BAR_2, b2b_bar_num);
1617
1618	/* Zero incoming translation addrs */
1619	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1620	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1621
1622	/* Zero outgoing translation limits (whole bar size windows) */
1623	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1624	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1625
1626	/* Set outgoing translation offsets */
1627	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1628	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1629		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1630		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1631	} else
1632		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1633
1634	/* Set the translation offset for B2B registers */
1635	bar_addr = 0;
1636	if (b2b_bar_num == NTB_CONFIG_BAR)
1637		bar_addr = peer_addr->bar0_addr;
1638	else if (b2b_bar_num == NTB_B2B_BAR_1)
1639		bar_addr = peer_addr->bar2_addr64;
1640	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1641		bar_addr = peer_addr->bar4_addr64;
1642	else if (b2b_bar_num == NTB_B2B_BAR_2)
1643		bar_addr = peer_addr->bar4_addr32;
1644	else if (b2b_bar_num == NTB_B2B_BAR_3)
1645		bar_addr = peer_addr->bar5_addr32;
1646	else
1647		KASSERT(false, ("invalid bar"));
1648
1649	/*
1650	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1651	 * at a time.
1652	 */
1653	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1654	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1655	return (0);
1656}
1657
1658static inline bool
1659link_is_up(struct ntb_softc *ntb)
1660{
1661
1662	if (ntb->type == NTB_XEON) {
1663		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1664			return (true);
1665		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1666	}
1667
1668	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1669	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1670}
1671
1672static inline bool
1673atom_link_is_err(struct ntb_softc *ntb)
1674{
1675	uint32_t status;
1676
1677	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1678
1679	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1680	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1681		return (true);
1682
1683	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1684	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1685}
1686
1687/* Atom does not have link status interrupt, poll on that platform */
1688static void
1689atom_link_hb(void *arg)
1690{
1691	struct ntb_softc *ntb = arg;
1692	sbintime_t timo, poll_ts;
1693
1694	timo = NTB_HB_TIMEOUT * hz;
1695	poll_ts = ntb->last_ts + timo;
1696
1697	/*
1698	 * Delay polling the link status if an interrupt was received, unless
1699	 * the cached link status says the link is down.
1700	 */
1701	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1702		timo = poll_ts - ticks;
1703		goto out;
1704	}
1705
1706	if (ntb_poll_link(ntb))
1707		ntb_link_event(ntb);
1708
1709	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1710		/* Link is down with error, proceed with recovery */
1711		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1712		return;
1713	}
1714
1715out:
1716	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1717}
1718
1719static void
1720atom_perform_link_restart(struct ntb_softc *ntb)
1721{
1722	uint32_t status;
1723
1724	/* Driver resets the NTB ModPhy lanes - magic! */
1725	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1726	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1727	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1728	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1729
1730	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1731	pause("ModPhy", hz / 10);
1732
1733	/* Clear AER Errors, write to clear */
1734	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1735	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1736	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1737
1738	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1739	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1740	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1741	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1742
1743	/* Clear DeSkew Buffer error, write to clear */
1744	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1745	status |= ATOM_DESKEWSTS_DBERR;
1746	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1747
1748	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1749	status &= ATOM_IBIST_ERR_OFLOW;
1750	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1751
1752	/* Releases the NTB state machine to allow the link to retrain */
1753	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1754	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1755	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1756}
1757
1758/*
1759 * ntb_set_ctx() - associate a driver context with an ntb device
1760 * @ntb:        NTB device context
1761 * @ctx:        Driver context
1762 * @ctx_ops:    Driver context operations
1763 *
1764 * Associate a driver context and operations with a ntb device.  The context is
1765 * provided by the client driver, and the driver may associate a different
1766 * context with each ntb device.
1767 *
1768 * Return: Zero if the context is associated, otherwise an error number.
1769 */
1770int
1771ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1772{
1773
1774	if (ctx == NULL || ops == NULL)
1775		return (EINVAL);
1776	if (ntb->ctx_ops != NULL)
1777		return (EINVAL);
1778
1779	CTX_LOCK(ntb);
1780	if (ntb->ctx_ops != NULL) {
1781		CTX_UNLOCK(ntb);
1782		return (EINVAL);
1783	}
1784	ntb->ntb_ctx = ctx;
1785	ntb->ctx_ops = ops;
1786	CTX_UNLOCK(ntb);
1787
1788	return (0);
1789}
1790
1791/*
1792 * It is expected that this will only be used from contexts where the ctx_lock
1793 * is not needed to protect ntb_ctx lifetime.
1794 */
1795void *
1796ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1797{
1798
1799	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1800	if (ops != NULL)
1801		*ops = ntb->ctx_ops;
1802	return (ntb->ntb_ctx);
1803}
1804
1805/*
1806 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1807 * @ntb:        NTB device context
1808 *
1809 * Clear any association that may exist between a driver context and the ntb
1810 * device.
1811 */
1812void
1813ntb_clear_ctx(struct ntb_softc *ntb)
1814{
1815
1816	CTX_LOCK(ntb);
1817	ntb->ntb_ctx = NULL;
1818	ntb->ctx_ops = NULL;
1819	CTX_UNLOCK(ntb);
1820}
1821
1822/*
1823 * ntb_link_event() - notify driver context of a change in link status
1824 * @ntb:        NTB device context
1825 *
1826 * Notify the driver context that the link status may have changed.  The driver
1827 * should call ntb_link_is_up() to get the current status.
1828 */
1829void
1830ntb_link_event(struct ntb_softc *ntb)
1831{
1832
1833	CTX_LOCK(ntb);
1834	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1835		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1836	CTX_UNLOCK(ntb);
1837}
1838
1839/*
1840 * ntb_db_event() - notify driver context of a doorbell event
1841 * @ntb:        NTB device context
1842 * @vector:     Interrupt vector number
1843 *
1844 * Notify the driver context of a doorbell event.  If hardware supports
1845 * multiple interrupt vectors for doorbells, the vector number indicates which
1846 * vector received the interrupt.  The vector number is relative to the first
1847 * vector used for doorbells, starting at zero, and must be less than
1848 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1849 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1850 * those bits are associated with the vector number.
1851 */
1852static void
1853ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1854{
1855
1856	CTX_LOCK(ntb);
1857	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1858		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1859	CTX_UNLOCK(ntb);
1860}
1861
1862/*
1863 * ntb_link_enable() - enable the link on the secondary side of the ntb
1864 * @ntb:        NTB device context
1865 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1866 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1867 *
1868 * Enable the link on the secondary side of the ntb.  This can only be done
1869 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1870 * should train the link to its maximum speed and width, or the requested speed
1871 * and width, whichever is smaller, if supported.
1872 *
1873 * Return: Zero on success, otherwise an error number.
1874 *
1875 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1876 *      and width input will be ignored.
1877 */
1878int
1879ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1880    enum ntb_width w __unused)
1881{
1882	uint32_t cntl;
1883
1884	if (ntb->type == NTB_ATOM) {
1885		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1886		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1887		return (0);
1888	}
1889
1890	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1891		ntb_link_event(ntb);
1892		return (0);
1893	}
1894
1895	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1896	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1897	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1898	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1899	if (HAS_FEATURE(NTB_SPLIT_BAR))
1900		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1901	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1902	return (0);
1903}
1904
1905/*
1906 * ntb_link_disable() - disable the link on the secondary side of the ntb
1907 * @ntb:        NTB device context
1908 *
1909 * Disable the link on the secondary side of the ntb.  This can only be done
1910 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1911 * should disable the link.  Returning from this call must indicate that a
1912 * barrier has passed, though with no more writes may pass in either direction
1913 * across the link, except if this call returns an error number.
1914 *
1915 * Return: Zero on success, otherwise an error number.
1916 */
1917int
1918ntb_link_disable(struct ntb_softc *ntb)
1919{
1920	uint32_t cntl;
1921
1922	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1923		ntb_link_event(ntb);
1924		return (0);
1925	}
1926
1927	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1928	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1929	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1930	if (HAS_FEATURE(NTB_SPLIT_BAR))
1931		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1932	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1933	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1934	return (0);
1935}
1936
1937static void
1938recover_atom_link(void *arg)
1939{
1940	struct ntb_softc *ntb = arg;
1941	unsigned speed, width, oldspeed, oldwidth;
1942	uint32_t status32;
1943
1944	atom_perform_link_restart(ntb);
1945
1946	/*
1947	 * There is a potential race between the 2 NTB devices recovering at
1948	 * the same time.  If the times are the same, the link will not recover
1949	 * and the driver will be stuck in this loop forever.  Add a random
1950	 * interval to the recovery time to prevent this race.
1951	 */
1952	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1953	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1954
1955	if (atom_link_is_err(ntb))
1956		goto retry;
1957
1958	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1959	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1960		goto out;
1961
1962	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1963	width = NTB_LNK_STA_WIDTH(status32);
1964	speed = status32 & NTB_LINK_SPEED_MASK;
1965
1966	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1967	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1968	if (oldwidth != width || oldspeed != speed)
1969		goto retry;
1970
1971out:
1972	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1973	    ntb);
1974	return;
1975
1976retry:
1977	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1978	    ntb);
1979}
1980
1981/*
1982 * Polls the HW link status register(s); returns true if something has changed.
1983 */
1984static bool
1985ntb_poll_link(struct ntb_softc *ntb)
1986{
1987	uint32_t ntb_cntl;
1988	uint16_t reg_val;
1989
1990	if (ntb->type == NTB_ATOM) {
1991		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1992		if (ntb_cntl == ntb->ntb_ctl)
1993			return (false);
1994
1995		ntb->ntb_ctl = ntb_cntl;
1996		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1997	} else {
1998		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1999
2000		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
2001		if (reg_val == ntb->lnk_sta)
2002			return (false);
2003
2004		ntb->lnk_sta = reg_val;
2005	}
2006	return (true);
2007}
2008
2009static inline enum ntb_speed
2010ntb_link_sta_speed(struct ntb_softc *ntb)
2011{
2012
2013	if (!link_is_up(ntb))
2014		return (NTB_SPEED_NONE);
2015	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2016}
2017
2018static inline enum ntb_width
2019ntb_link_sta_width(struct ntb_softc *ntb)
2020{
2021
2022	if (!link_is_up(ntb))
2023		return (NTB_WIDTH_NONE);
2024	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2025}
2026
2027SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2028    "Driver state, statistics, and HW registers");
2029
2030#define NTB_REGSZ_MASK	(3ul << 30)
2031#define NTB_REG_64	(1ul << 30)
2032#define NTB_REG_32	(2ul << 30)
2033#define NTB_REG_16	(3ul << 30)
2034#define NTB_REG_8	(0ul << 30)
2035
2036#define NTB_DB_READ	(1ul << 29)
2037#define NTB_PCI_REG	(1ul << 28)
2038#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2039
2040static void
2041ntb_sysctl_init(struct ntb_softc *ntb)
2042{
2043	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
2044	struct sysctl_ctx_list *ctx;
2045	struct sysctl_oid *tree, *tmptree;
2046
2047	ctx = device_get_sysctl_ctx(ntb->device);
2048
2049	tree = SYSCTL_ADD_NODE(ctx,
2050	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
2051	    "debug_info", CTLFLAG_RD, NULL,
2052	    "Driver state, statistics, and HW registers");
2053	tree_par = SYSCTL_CHILDREN(tree);
2054
2055	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2056	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2057	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2058	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2059	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2060	    &ntb->ppd, 0, "Raw PPD register (cached)");
2061
2062	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2063#ifdef notyet
2064		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2065		    &ntb->b2b_mw_idx, 0,
2066		    "Index of the MW used for B2B remote register access");
2067#endif
2068		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2069		    CTLFLAG_RD, &ntb->b2b_off,
2070		    "If non-zero, offset of B2B register region in shared MW");
2071	}
2072
2073	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2074	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2075	    "Features/errata of this NTB device");
2076
2077	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2078	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2079	    "NTB CTL register (cached)");
2080	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2081	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2082	    "LNK STA register (cached)");
2083
2084	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
2085	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
2086	    "A", "Link status");
2087
2088#ifdef notyet
2089	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2090	    &ntb->mw_count, 0, "MW count");
2091	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2092	    &ntb->spad_count, 0, "Scratchpad count");
2093	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2094	    &ntb->db_count, 0, "Doorbell count");
2095	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2096	    &ntb->db_vec_count, 0, "Doorbell vector count");
2097	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2098	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2099#endif
2100
2101	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2102	    &ntb->db_valid_mask, "Doorbell valid mask");
2103	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2104	    &ntb->db_link_mask, "Doorbell link mask");
2105	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2106	    &ntb->db_mask, "Doorbell mask (cached)");
2107
2108	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2109	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2110	regpar = SYSCTL_CHILDREN(tmptree);
2111
2112	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2113	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2114	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2115	    "NTB Control register");
2116	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2117	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2118	    0x19c, sysctl_handle_register, "IU",
2119	    "NTB Link Capabilities");
2120	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2121	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2122	    0x1a0, sysctl_handle_register, "IU",
2123	    "NTB Link Control register");
2124
2125	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2126	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2127	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2128	    sysctl_handle_register, "QU", "Doorbell mask register");
2129	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2130	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2131	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2132	    sysctl_handle_register, "QU", "Doorbell register");
2133
2134	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2135	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2136	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2137	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2138	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2139		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2140		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2141		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2142		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2143		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2144		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2145		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2146		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2147	} else {
2148		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2149		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2150		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2151		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2152	}
2153
2154	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2155	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2156	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2157	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2158	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2159		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2160		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2161		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2162		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2163		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2164		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2165		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2166		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2167	} else {
2168		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2169		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2170		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2171		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2172	}
2173
2174	if (ntb->type == NTB_ATOM)
2175		return;
2176
2177	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2178	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2179	statpar = SYSCTL_CHILDREN(tmptree);
2180	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2181	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2182	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2183	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2184
2185	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2186	    CTLFLAG_RD, NULL, "Xeon HW errors");
2187	errpar = SYSCTL_CHILDREN(tmptree);
2188
2189	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2190	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2191	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2192	    sysctl_handle_register, "CU", "PPD");
2193
2194	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2195	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2196	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2197	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2198	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2199	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2200	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2201	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2202	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2203	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2204	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2205	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2206
2207	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2208	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2209	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2210	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2211	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2212	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2213	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2214	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2215	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2216	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2217	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2218	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2219
2220	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2221	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2222	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2223	    sysctl_handle_register, "SU", "DEVSTS");
2224	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2225	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2226	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2227	    sysctl_handle_register, "SU", "LNKSTS");
2228	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2229	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2230	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2231	    sysctl_handle_register, "SU", "SLNKSTS");
2232
2233	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2234	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2235	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2236	    sysctl_handle_register, "IU", "UNCERRSTS");
2237	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2238	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2239	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2240	    sysctl_handle_register, "IU", "CORERRSTS");
2241
2242	if (ntb->conn_type != NTB_CONN_B2B)
2243		return;
2244
2245	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2246	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2247	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2248	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2249	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2250		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2251		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2252		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2253		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2254		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2255		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2256		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2257		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2258	} else {
2259		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2260		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2261		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2262		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2263	}
2264
2265	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2266	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2267	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2268	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2269	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2270		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2271		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2272		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2273		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2274		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2275		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2276		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2277		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2278	} else {
2279		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2280		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2281		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2282		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2283	}
2284
2285	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2286	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2287	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2288	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2289	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2290	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2291	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2292	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2293	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2294		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2295		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2296		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2297		    sysctl_handle_register, "IU",
2298		    "Secondary BAR4 base register");
2299		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2300		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2301		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2302		    sysctl_handle_register, "IU",
2303		    "Secondary BAR5 base register");
2304	} else {
2305		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2306		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2307		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2308		    sysctl_handle_register, "QU",
2309		    "Secondary BAR45 base register");
2310	}
2311}
2312
2313static int
2314sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2315{
2316	struct ntb_softc *ntb;
2317	struct sbuf sb;
2318	int error;
2319
2320	error = 0;
2321	ntb = arg1;
2322
2323	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2324
2325	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2326	error = sbuf_finish(&sb);
2327	sbuf_delete(&sb);
2328
2329	if (error || !req->newptr)
2330		return (error);
2331	return (EINVAL);
2332}
2333
2334static int
2335sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2336{
2337	struct ntb_softc *ntb;
2338	struct sbuf sb;
2339	enum ntb_speed speed;
2340	enum ntb_width width;
2341	int error;
2342
2343	error = 0;
2344	ntb = arg1;
2345
2346	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2347
2348	if (ntb_link_is_up(ntb, &speed, &width))
2349		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2350		    (unsigned)speed, (unsigned)width);
2351	else
2352		sbuf_printf(&sb, "down");
2353
2354	error = sbuf_finish(&sb);
2355	sbuf_delete(&sb);
2356
2357	if (error || !req->newptr)
2358		return (error);
2359	return (EINVAL);
2360}
2361
2362static int
2363sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2364{
2365	struct ntb_softc *ntb;
2366	const void *outp;
2367	uintptr_t sz;
2368	uint64_t umv;
2369	char be[sizeof(umv)];
2370	size_t outsz;
2371	uint32_t reg;
2372	bool db, pci;
2373	int error;
2374
2375	ntb = arg1;
2376	reg = arg2 & ~NTB_REGFLAGS_MASK;
2377	sz = arg2 & NTB_REGSZ_MASK;
2378	db = (arg2 & NTB_DB_READ) != 0;
2379	pci = (arg2 & NTB_PCI_REG) != 0;
2380
2381	KASSERT(!(db && pci), ("bogus"));
2382
2383	if (db) {
2384		KASSERT(sz == NTB_REG_64, ("bogus"));
2385		umv = db_ioread(ntb, reg);
2386		outsz = sizeof(uint64_t);
2387	} else {
2388		switch (sz) {
2389		case NTB_REG_64:
2390			if (pci)
2391				umv = pci_read_config(ntb->device, reg, 8);
2392			else
2393				umv = ntb_reg_read(8, reg);
2394			outsz = sizeof(uint64_t);
2395			break;
2396		case NTB_REG_32:
2397			if (pci)
2398				umv = pci_read_config(ntb->device, reg, 4);
2399			else
2400				umv = ntb_reg_read(4, reg);
2401			outsz = sizeof(uint32_t);
2402			break;
2403		case NTB_REG_16:
2404			if (pci)
2405				umv = pci_read_config(ntb->device, reg, 2);
2406			else
2407				umv = ntb_reg_read(2, reg);
2408			outsz = sizeof(uint16_t);
2409			break;
2410		case NTB_REG_8:
2411			if (pci)
2412				umv = pci_read_config(ntb->device, reg, 1);
2413			else
2414				umv = ntb_reg_read(1, reg);
2415			outsz = sizeof(uint8_t);
2416			break;
2417		default:
2418			panic("bogus");
2419			break;
2420		}
2421	}
2422
2423	/* Encode bigendian so that sysctl -x is legible. */
2424	be64enc(be, umv);
2425	outp = ((char *)be) + sizeof(umv) - outsz;
2426
2427	error = SYSCTL_OUT(req, outp, outsz);
2428	if (error || !req->newptr)
2429		return (error);
2430	return (EINVAL);
2431}
2432
2433static unsigned
2434ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2435{
2436
2437	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2438	    uidx >= ntb->b2b_mw_idx)
2439		return (uidx + 1);
2440	return (uidx);
2441}
2442
2443/*
2444 * Public API to the rest of the OS
2445 */
2446
2447/**
2448 * ntb_get_max_spads() - get the total scratch regs usable
2449 * @ntb: pointer to ntb_softc instance
2450 *
2451 * This function returns the max 32bit scratchpad registers usable by the
2452 * upper layer.
2453 *
2454 * RETURNS: total number of scratch pad registers available
2455 */
2456uint8_t
2457ntb_get_max_spads(struct ntb_softc *ntb)
2458{
2459
2460	return (ntb->spad_count);
2461}
2462
2463/*
2464 * ntb_mw_count() - Get the number of memory windows available for KPI
2465 * consumers.
2466 *
2467 * (Excludes any MW wholly reserved for register access.)
2468 */
2469uint8_t
2470ntb_mw_count(struct ntb_softc *ntb)
2471{
2472
2473	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2474		return (ntb->mw_count - 1);
2475	return (ntb->mw_count);
2476}
2477
2478/**
2479 * ntb_spad_write() - write to the secondary scratchpad register
2480 * @ntb: pointer to ntb_softc instance
2481 * @idx: index to the scratchpad register, 0 based
2482 * @val: the data value to put into the register
2483 *
2484 * This function allows writing of a 32bit value to the indexed scratchpad
2485 * register. The register resides on the secondary (external) side.
2486 *
2487 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2488 */
2489int
2490ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2491{
2492
2493	if (idx >= ntb->spad_count)
2494		return (EINVAL);
2495
2496	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2497
2498	return (0);
2499}
2500
2501/**
2502 * ntb_spad_read() - read from the primary scratchpad register
2503 * @ntb: pointer to ntb_softc instance
2504 * @idx: index to scratchpad register, 0 based
2505 * @val: pointer to 32bit integer for storing the register value
2506 *
2507 * This function allows reading of the 32bit scratchpad register on
2508 * the primary (internal) side.
2509 *
2510 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2511 */
2512int
2513ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2514{
2515
2516	if (idx >= ntb->spad_count)
2517		return (EINVAL);
2518
2519	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2520
2521	return (0);
2522}
2523
2524/**
2525 * ntb_peer_spad_write() - write to the secondary scratchpad register
2526 * @ntb: pointer to ntb_softc instance
2527 * @idx: index to the scratchpad register, 0 based
2528 * @val: the data value to put into the register
2529 *
2530 * This function allows writing of a 32bit value to the indexed scratchpad
2531 * register. The register resides on the secondary (external) side.
2532 *
2533 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2534 */
2535int
2536ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2537{
2538
2539	if (idx >= ntb->spad_count)
2540		return (EINVAL);
2541
2542	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2543		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2544	else
2545		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2546
2547	return (0);
2548}
2549
2550/**
2551 * ntb_peer_spad_read() - read from the primary scratchpad register
2552 * @ntb: pointer to ntb_softc instance
2553 * @idx: index to scratchpad register, 0 based
2554 * @val: pointer to 32bit integer for storing the register value
2555 *
2556 * This function allows reading of the 32bit scratchpad register on
2557 * the primary (internal) side.
2558 *
2559 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2560 */
2561int
2562ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2563{
2564
2565	if (idx >= ntb->spad_count)
2566		return (EINVAL);
2567
2568	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2569		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2570	else
2571		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2572
2573	return (0);
2574}
2575
2576/*
2577 * ntb_mw_get_range() - get the range of a memory window
2578 * @ntb:        NTB device context
2579 * @idx:        Memory window number
2580 * @base:       OUT - the base address for mapping the memory window
2581 * @size:       OUT - the size for mapping the memory window
2582 * @align:      OUT - the base alignment for translating the memory window
2583 * @align_size: OUT - the size alignment for translating the memory window
2584 *
2585 * Get the range of a memory window.  NULL may be given for any output
2586 * parameter if the value is not needed.  The base and size may be used for
2587 * mapping the memory window, to access the peer memory.  The alignment and
2588 * size may be used for translating the memory window, for the peer to access
2589 * memory on the local system.
2590 *
2591 * Return: Zero on success, otherwise an error number.
2592 */
2593int
2594ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2595    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2596    bus_addr_t *plimit)
2597{
2598	struct ntb_pci_bar_info *bar;
2599	bus_addr_t limit;
2600	size_t bar_b2b_off;
2601	enum ntb_bar bar_num;
2602
2603	if (mw_idx >= ntb_mw_count(ntb))
2604		return (EINVAL);
2605	mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
2606
2607	bar_num = ntb_mw_to_bar(ntb, mw_idx);
2608	bar = &ntb->bar_info[bar_num];
2609	bar_b2b_off = 0;
2610	if (mw_idx == ntb->b2b_mw_idx) {
2611		KASSERT(ntb->b2b_off != 0,
2612		    ("user shouldn't get non-shared b2b mw"));
2613		bar_b2b_off = ntb->b2b_off;
2614	}
2615
2616	if (bar_is_64bit(ntb, bar_num))
2617		limit = BUS_SPACE_MAXADDR;
2618	else
2619		limit = BUS_SPACE_MAXADDR_32BIT;
2620
2621	if (base != NULL)
2622		*base = bar->pbase + bar_b2b_off;
2623	if (vbase != NULL)
2624		*vbase = bar->vbase + bar_b2b_off;
2625	if (size != NULL)
2626		*size = bar->size - bar_b2b_off;
2627	if (align != NULL)
2628		*align = bar->size;
2629	if (align_size != NULL)
2630		*align_size = 1;
2631	if (plimit != NULL)
2632		*plimit = limit;
2633	return (0);
2634}
2635
2636/*
2637 * ntb_mw_set_trans() - set the translation of a memory window
2638 * @ntb:        NTB device context
2639 * @idx:        Memory window number
2640 * @addr:       The dma address local memory to expose to the peer
2641 * @size:       The size of the local memory to expose to the peer
2642 *
2643 * Set the translation of a memory window.  The peer may access local memory
2644 * through the window starting at the address, up to the size.  The address
2645 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2646 * must be aligned to the size alignment specified by ntb_mw_get_range().  The
2647 * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
2648 * 32-bit BARs).
2649 *
2650 * Return: Zero on success, otherwise an error number.
2651 */
2652int
2653ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2654    size_t size)
2655{
2656	struct ntb_pci_bar_info *bar;
2657	uint64_t base, limit, reg_val;
2658	size_t bar_size, mw_size;
2659	uint32_t base_reg, xlat_reg, limit_reg;
2660	enum ntb_bar bar_num;
2661
2662	if (idx >= ntb_mw_count(ntb))
2663		return (EINVAL);
2664	idx = ntb_user_mw_to_idx(ntb, idx);
2665
2666	bar_num = ntb_mw_to_bar(ntb, idx);
2667	bar = &ntb->bar_info[bar_num];
2668
2669	bar_size = bar->size;
2670	if (idx == ntb->b2b_mw_idx)
2671		mw_size = bar_size - ntb->b2b_off;
2672	else
2673		mw_size = bar_size;
2674
2675	/* Hardware requires that addr is aligned to bar size */
2676	if ((addr & (bar_size - 1)) != 0)
2677		return (EINVAL);
2678
2679	if (size > mw_size)
2680		return (EINVAL);
2681
2682	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2683
2684	limit = 0;
2685	if (bar_is_64bit(ntb, bar_num)) {
2686		base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2687
2688		if (limit_reg != 0 && size != mw_size)
2689			limit = base + size;
2690
2691		/* Set and verify translation address */
2692		ntb_reg_write(8, xlat_reg, addr);
2693		reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2694		if (reg_val != addr) {
2695			ntb_reg_write(8, xlat_reg, 0);
2696			return (EIO);
2697		}
2698
2699		/* Set and verify the limit */
2700		ntb_reg_write(8, limit_reg, limit);
2701		reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2702		if (reg_val != limit) {
2703			ntb_reg_write(8, limit_reg, base);
2704			ntb_reg_write(8, xlat_reg, 0);
2705			return (EIO);
2706		}
2707	} else {
2708		/* Configure 32-bit (split) BAR MW */
2709
2710		if ((addr & UINT32_MAX) != addr)
2711			return (ERANGE);
2712		if (((addr + size) & UINT32_MAX) != (addr + size))
2713			return (ERANGE);
2714
2715		base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2716
2717		if (limit_reg != 0 && size != mw_size)
2718			limit = base + size;
2719
2720		/* Set and verify translation address */
2721		ntb_reg_write(4, xlat_reg, addr);
2722		reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2723		if (reg_val != addr) {
2724			ntb_reg_write(4, xlat_reg, 0);
2725			return (EIO);
2726		}
2727
2728		/* Set and verify the limit */
2729		ntb_reg_write(4, limit_reg, limit);
2730		reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
2731		if (reg_val != limit) {
2732			ntb_reg_write(4, limit_reg, base);
2733			ntb_reg_write(4, xlat_reg, 0);
2734			return (EIO);
2735		}
2736	}
2737	return (0);
2738}
2739
2740/*
2741 * ntb_mw_clear_trans() - clear the translation of a memory window
2742 * @ntb:	NTB device context
2743 * @idx:	Memory window number
2744 *
2745 * Clear the translation of a memory window.  The peer may no longer access
2746 * local memory through the window.
2747 *
2748 * Return: Zero on success, otherwise an error number.
2749 */
2750int
2751ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2752{
2753
2754	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2755}
2756
2757/*
2758 * ntb_mw_get_wc - Get the write-combine status of a memory window
2759 *
2760 * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
2761 * idx is an invalid memory window).
2762 *
2763 * Mode is a VM_MEMATTR_* type.
2764 */
2765int
2766ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
2767{
2768	struct ntb_pci_bar_info *bar;
2769
2770	if (idx >= ntb_mw_count(ntb))
2771		return (EINVAL);
2772	idx = ntb_user_mw_to_idx(ntb, idx);
2773
2774	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
2775	*mode = bar->map_mode;
2776	return (0);
2777}
2778
2779/*
2780 * ntb_mw_set_wc - Set the write-combine status of a memory window
2781 *
2782 * If 'mode' matches the current status, this does nothing and succeeds.  Mode
2783 * is a VM_MEMATTR_* type.
2784 *
2785 * Returns:  Zero on success, setting the caching attribute on the virtual
2786 * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
2787 * memory window, or if changing the caching attribute fails).
2788 */
2789int
2790ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2791{
2792
2793	if (idx >= ntb_mw_count(ntb))
2794		return (EINVAL);
2795
2796	idx = ntb_user_mw_to_idx(ntb, idx);
2797	return (ntb_mw_set_wc_internal(ntb, idx, mode));
2798}
2799
2800static int
2801ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2802{
2803	struct ntb_pci_bar_info *bar;
2804	int rc;
2805
2806	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
2807	if (bar->map_mode == mode)
2808		return (0);
2809
2810	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
2811	if (rc == 0)
2812		bar->map_mode = mode;
2813
2814	return (rc);
2815}
2816
2817/**
2818 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2819 * @ntb: pointer to ntb_softc instance
2820 * @bit: doorbell bits to ring
2821 *
2822 * This function allows triggering of a doorbell on the secondary/external
2823 * side that will initiate an interrupt on the remote host
2824 */
2825void
2826ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2827{
2828
2829	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2830		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
2831		return;
2832	}
2833
2834	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2835}
2836
2837/*
2838 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2839 * as well as the size of the register (via *sz_out).
2840 *
2841 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2842 * ring to its memory window write.
2843 *
2844 * Note that writing the peer doorbell via a memory window will *not* generate
2845 * an interrupt on the remote host; that must be done seperately.
2846 */
2847bus_addr_t
2848ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2849{
2850	struct ntb_pci_bar_info *bar;
2851	uint64_t regoff;
2852
2853	KASSERT(sz_out != NULL, ("must be non-NULL"));
2854
2855	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2856		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2857		regoff = ntb->peer_reg->db_bell;
2858	} else {
2859		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2860		    ("invalid b2b idx"));
2861
2862		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2863		regoff = XEON_PDOORBELL_OFFSET;
2864	}
2865	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2866
2867	*sz_out = ntb->reg->db_size;
2868	/* HACK: Specific to current x86 bus implementation. */
2869	return ((uint64_t)bar->pci_bus_handle + regoff);
2870}
2871
2872/*
2873 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2874 * @ntb:	NTB device context
2875 *
2876 * Hardware may support different number or arrangement of doorbell bits.
2877 *
2878 * Return: A mask of doorbell bits supported by the ntb.
2879 */
2880uint64_t
2881ntb_db_valid_mask(struct ntb_softc *ntb)
2882{
2883
2884	return (ntb->db_valid_mask);
2885}
2886
2887/*
2888 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2889 * @ntb:	NTB device context
2890 * @vector:	Doorbell vector number
2891 *
2892 * Each interrupt vector may have a different number or arrangement of bits.
2893 *
2894 * Return: A mask of doorbell bits serviced by a vector.
2895 */
2896uint64_t
2897ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2898{
2899
2900	if (vector > ntb->db_vec_count)
2901		return (0);
2902	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2903}
2904
2905/**
2906 * ntb_link_is_up() - get the current ntb link state
2907 * @ntb:        NTB device context
2908 * @speed:      OUT - The link speed expressed as PCIe generation number
2909 * @width:      OUT - The link width expressed as the number of PCIe lanes
2910 *
2911 * RETURNS: true or false based on the hardware link state
2912 */
2913bool
2914ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2915    enum ntb_width *width)
2916{
2917
2918	if (speed != NULL)
2919		*speed = ntb_link_sta_speed(ntb);
2920	if (width != NULL)
2921		*width = ntb_link_sta_width(ntb);
2922	return (link_is_up(ntb));
2923}
2924
2925static void
2926save_bar_parameters(struct ntb_pci_bar_info *bar)
2927{
2928
2929	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2930	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2931	bar->pbase = rman_get_start(bar->pci_resource);
2932	bar->size = rman_get_size(bar->pci_resource);
2933	bar->vbase = rman_get_virtual(bar->pci_resource);
2934}
2935
2936device_t
2937ntb_get_device(struct ntb_softc *ntb)
2938{
2939
2940	return (ntb->device);
2941}
2942
2943/* Export HW-specific errata information. */
2944bool
2945ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2946{
2947
2948	return (HAS_FEATURE(feature));
2949}
2950