ntb_hw.c revision 300373
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c 300373 2016-05-21 15:15:11Z mav $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/queue.h>
39#include <sys/rman.h>
40#include <sys/sbuf.h>
41#include <sys/sysctl.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44#include <machine/bus.h>
45#include <machine/pmap.h>
46#include <machine/resource.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49
50#include "ntb_regs.h"
51#include "ntb_hw.h"
52
53/*
54 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55 * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69#define BAR_HIGH_MASK		(~((1ull << 12) - 1))
70
71#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
72
73enum ntb_device_type {
74	NTB_XEON,
75	NTB_ATOM
76};
77
78/* ntb_conn_type are hardware numbers, cannot change. */
79enum ntb_conn_type {
80	NTB_CONN_TRANSPARENT = 0,
81	NTB_CONN_B2B = 1,
82	NTB_CONN_RP = 2,
83};
84
85enum ntb_b2b_direction {
86	NTB_DEV_USD = 0,
87	NTB_DEV_DSD = 1,
88};
89
90enum ntb_bar {
91	NTB_CONFIG_BAR = 0,
92	NTB_B2B_BAR_1,
93	NTB_B2B_BAR_2,
94	NTB_B2B_BAR_3,
95	NTB_MAX_BARS
96};
97
98/* Device features and workarounds */
99#define HAS_FEATURE(feature)	\
100	((ntb->features & (feature)) != 0)
101
102struct ntb_hw_info {
103	uint32_t		device_id;
104	const char		*desc;
105	enum ntb_device_type	type;
106	uint32_t		features;
107};
108
109struct ntb_pci_bar_info {
110	bus_space_tag_t		pci_bus_tag;
111	bus_space_handle_t	pci_bus_handle;
112	int			pci_resource_id;
113	struct resource		*pci_resource;
114	vm_paddr_t		pbase;
115	caddr_t			vbase;
116	vm_size_t		size;
117	vm_memattr_t		map_mode;
118
119	/* Configuration register offsets */
120	uint32_t		psz_off;
121	uint32_t		ssz_off;
122	uint32_t		pbarxlat_off;
123};
124
125struct ntb_int_info {
126	struct resource	*res;
127	int		rid;
128	void		*tag;
129};
130
131struct ntb_vec {
132	struct ntb_softc	*ntb;
133	uint32_t		num;
134};
135
136struct ntb_reg {
137	uint32_t	ntb_ctl;
138	uint32_t	lnk_sta;
139	uint8_t		db_size;
140	unsigned	mw_bar[NTB_MAX_BARS];
141};
142
143struct ntb_alt_reg {
144	uint32_t	db_bell;
145	uint32_t	db_mask;
146	uint32_t	spad;
147};
148
149struct ntb_xlat_reg {
150	uint32_t	bar0_base;
151	uint32_t	bar2_base;
152	uint32_t	bar4_base;
153	uint32_t	bar5_base;
154
155	uint32_t	bar2_xlat;
156	uint32_t	bar4_xlat;
157	uint32_t	bar5_xlat;
158
159	uint32_t	bar2_limit;
160	uint32_t	bar4_limit;
161	uint32_t	bar5_limit;
162};
163
164struct ntb_b2b_addr {
165	uint64_t	bar0_addr;
166	uint64_t	bar2_addr64;
167	uint64_t	bar4_addr64;
168	uint64_t	bar4_addr32;
169	uint64_t	bar5_addr32;
170};
171
172struct ntb_softc {
173	device_t		device;
174	enum ntb_device_type	type;
175	uint32_t		features;
176
177	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
178	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
179	uint32_t		allocated_interrupts;
180
181	struct callout		heartbeat_timer;
182	struct callout		lr_timer;
183
184	void			*ntb_ctx;
185	const struct ntb_ctx_ops *ctx_ops;
186	struct ntb_vec		*msix_vec;
187#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
188#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
189#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
190	struct mtx		ctx_lock;
191
192	uint32_t		ppd;
193	enum ntb_conn_type	conn_type;
194	enum ntb_b2b_direction	dev_type;
195
196	/* Offset of peer bar0 in B2B BAR */
197	uint64_t			b2b_off;
198	/* Memory window used to access peer bar0 */
199#define B2B_MW_DISABLED			UINT8_MAX
200	uint8_t				b2b_mw_idx;
201
202	uint8_t				mw_count;
203	uint8_t				spad_count;
204	uint8_t				db_count;
205	uint8_t				db_vec_count;
206	uint8_t				db_vec_shift;
207
208	/* Protects local db_mask. */
209#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
210#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
211#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
212	struct mtx			db_mask_lock;
213
214	volatile uint32_t		ntb_ctl;
215	volatile uint32_t		lnk_sta;
216
217	uint64_t			db_valid_mask;
218	uint64_t			db_link_mask;
219	uint64_t			db_mask;
220
221	int				last_ts;	/* ticks @ last irq */
222
223	const struct ntb_reg		*reg;
224	const struct ntb_alt_reg	*self_reg;
225	const struct ntb_alt_reg	*peer_reg;
226	const struct ntb_xlat_reg	*xlat_reg;
227};
228
229#ifdef __i386__
230static __inline uint64_t
231bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
232    bus_size_t offset)
233{
234
235	return (bus_space_read_4(tag, handle, offset) |
236	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
237}
238
239static __inline void
240bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
241    bus_size_t offset, uint64_t val)
242{
243
244	bus_space_write_4(tag, handle, offset, val);
245	bus_space_write_4(tag, handle, offset + 4, val >> 32);
246}
247#endif
248
249#define ntb_bar_read(SIZE, bar, offset) \
250	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
251	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
252#define ntb_bar_write(SIZE, bar, offset, val) \
253	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
254	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
255#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
256#define ntb_reg_write(SIZE, offset, val) \
257	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
258#define ntb_mw_read(SIZE, offset) \
259	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
260#define ntb_mw_write(SIZE, offset, val) \
261	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
262		offset, val)
263
264static int ntb_probe(device_t device);
265static int ntb_attach(device_t device);
266static int ntb_detach(device_t device);
267static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
268static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
269static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
270static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
271    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
272static int ntb_map_pci_bars(struct ntb_softc *ntb);
273static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
274    vm_memattr_t);
275static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
276    const char *);
277static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
278static int map_memory_window_bar(struct ntb_softc *ntb,
279    struct ntb_pci_bar_info *bar);
280static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
281static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
282static int ntb_init_isr(struct ntb_softc *ntb);
283static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
284static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
285static void ntb_teardown_interrupts(struct ntb_softc *ntb);
286static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
287static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
288static void ndev_vec_isr(void *arg);
289static void ndev_irq_isr(void *arg);
290static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
291static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
292static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
293static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
294static void ntb_free_msix_vec(struct ntb_softc *ntb);
295static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
296static void ntb_detect_max_mw(struct ntb_softc *ntb);
297static int ntb_detect_xeon(struct ntb_softc *ntb);
298static int ntb_detect_atom(struct ntb_softc *ntb);
299static int ntb_xeon_init_dev(struct ntb_softc *ntb);
300static int ntb_atom_init_dev(struct ntb_softc *ntb);
301static void ntb_teardown_xeon(struct ntb_softc *ntb);
302static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
303static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
304    enum ntb_bar regbar);
305static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
306    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
307static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
308    enum ntb_bar idx);
309static int xeon_setup_b2b_mw(struct ntb_softc *,
310    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
311static inline bool link_is_up(struct ntb_softc *ntb);
312static inline bool atom_link_is_err(struct ntb_softc *ntb);
313static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
314static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
315static void atom_link_hb(void *arg);
316static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
317static void recover_atom_link(void *arg);
318static bool ntb_poll_link(struct ntb_softc *ntb);
319static void save_bar_parameters(struct ntb_pci_bar_info *bar);
320static void ntb_sysctl_init(struct ntb_softc *);
321static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
322static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
323static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
324
325static unsigned g_ntb_hw_debug_level;
326SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
327    &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
328#define ntb_printf(lvl, ...) do {				\
329	if ((lvl) <= g_ntb_hw_debug_level) {			\
330		device_printf(ntb->device, __VA_ARGS__);	\
331	}							\
332} while (0)
333
334#define	_NTB_PAT_UC	0
335#define	_NTB_PAT_WC	1
336#define	_NTB_PAT_WT	4
337#define	_NTB_PAT_WP	5
338#define	_NTB_PAT_WB	6
339#define	_NTB_PAT_UCM	7
340static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
341SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
342    &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
343    "UC: "  __XSTRING(_NTB_PAT_UC) ", "
344    "WC: "  __XSTRING(_NTB_PAT_WC) ", "
345    "WT: "  __XSTRING(_NTB_PAT_WT) ", "
346    "WP: "  __XSTRING(_NTB_PAT_WP) ", "
347    "WB: "  __XSTRING(_NTB_PAT_WB) ", "
348    "UC-: " __XSTRING(_NTB_PAT_UCM));
349
350static inline vm_memattr_t
351ntb_pat_flags(void)
352{
353
354	switch (g_ntb_mw_pat) {
355	case _NTB_PAT_WC:
356		return (VM_MEMATTR_WRITE_COMBINING);
357	case _NTB_PAT_WT:
358		return (VM_MEMATTR_WRITE_THROUGH);
359	case _NTB_PAT_WP:
360		return (VM_MEMATTR_WRITE_PROTECTED);
361	case _NTB_PAT_WB:
362		return (VM_MEMATTR_WRITE_BACK);
363	case _NTB_PAT_UCM:
364		return (VM_MEMATTR_WEAK_UNCACHEABLE);
365	case _NTB_PAT_UC:
366		/* FALLTHROUGH */
367	default:
368		return (VM_MEMATTR_UNCACHEABLE);
369	}
370}
371
372/*
373 * Well, this obviously doesn't belong here, but it doesn't seem to exist
374 * anywhere better yet.
375 */
376static inline const char *
377ntb_vm_memattr_to_str(vm_memattr_t pat)
378{
379
380	switch (pat) {
381	case VM_MEMATTR_WRITE_COMBINING:
382		return ("WRITE_COMBINING");
383	case VM_MEMATTR_WRITE_THROUGH:
384		return ("WRITE_THROUGH");
385	case VM_MEMATTR_WRITE_PROTECTED:
386		return ("WRITE_PROTECTED");
387	case VM_MEMATTR_WRITE_BACK:
388		return ("WRITE_BACK");
389	case VM_MEMATTR_WEAK_UNCACHEABLE:
390		return ("UNCACHED");
391	case VM_MEMATTR_UNCACHEABLE:
392		return ("UNCACHEABLE");
393	default:
394		return ("UNKNOWN");
395	}
396}
397
398static int g_ntb_mw_idx = -1;
399SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
400    0, "Use this memory window to access the peer NTB registers.  A "
401    "non-negative value starts from the first MW index; a negative value "
402    "starts from the last MW index.  The default is -1, i.e., the last "
403    "available memory window.  Both sides of the NTB MUST set the same "
404    "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
405
406static struct ntb_hw_info pci_ids[] = {
407	/* XXX: PS/SS IDs left out until they are supported. */
408	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
409		NTB_ATOM, 0 },
410
411	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
412		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
413	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
414		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
415	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
416		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
417		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
418	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
419		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
420		    NTB_SB01BASE_LOCKUP },
421	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
422		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
423		    NTB_SB01BASE_LOCKUP },
424
425	{ 0x00000000, NULL, NTB_ATOM, 0 }
426};
427
428static const struct ntb_reg atom_reg = {
429	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
430	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
431	.db_size = sizeof(uint64_t),
432	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
433};
434
435static const struct ntb_alt_reg atom_pri_reg = {
436	.db_bell = ATOM_PDOORBELL_OFFSET,
437	.db_mask = ATOM_PDBMSK_OFFSET,
438	.spad = ATOM_SPAD_OFFSET,
439};
440
441static const struct ntb_alt_reg atom_b2b_reg = {
442	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
443	.spad = ATOM_B2B_SPAD_OFFSET,
444};
445
446static const struct ntb_xlat_reg atom_sec_xlat = {
447#if 0
448	/* "FIXME" says the Linux driver. */
449	.bar0_base = ATOM_SBAR0BASE_OFFSET,
450	.bar2_base = ATOM_SBAR2BASE_OFFSET,
451	.bar4_base = ATOM_SBAR4BASE_OFFSET,
452
453	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
454	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
455#endif
456
457	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
458	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
459};
460
461static const struct ntb_reg xeon_reg = {
462	.ntb_ctl = XEON_NTBCNTL_OFFSET,
463	.lnk_sta = XEON_LINK_STATUS_OFFSET,
464	.db_size = sizeof(uint16_t),
465	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
466};
467
468static const struct ntb_alt_reg xeon_pri_reg = {
469	.db_bell = XEON_PDOORBELL_OFFSET,
470	.db_mask = XEON_PDBMSK_OFFSET,
471	.spad = XEON_SPAD_OFFSET,
472};
473
474static const struct ntb_alt_reg xeon_b2b_reg = {
475	.db_bell = XEON_B2B_DOORBELL_OFFSET,
476	.spad = XEON_B2B_SPAD_OFFSET,
477};
478
479static const struct ntb_xlat_reg xeon_sec_xlat = {
480	.bar0_base = XEON_SBAR0BASE_OFFSET,
481	.bar2_base = XEON_SBAR2BASE_OFFSET,
482	.bar4_base = XEON_SBAR4BASE_OFFSET,
483	.bar5_base = XEON_SBAR5BASE_OFFSET,
484
485	.bar2_limit = XEON_SBAR2LMT_OFFSET,
486	.bar4_limit = XEON_SBAR4LMT_OFFSET,
487	.bar5_limit = XEON_SBAR5LMT_OFFSET,
488
489	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
490	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
491	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
492};
493
494static struct ntb_b2b_addr xeon_b2b_usd_addr = {
495	.bar0_addr = XEON_B2B_BAR0_ADDR,
496	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
497	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
498	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
499	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
500};
501
502static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
503	.bar0_addr = XEON_B2B_BAR0_ADDR,
504	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
505	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
506	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
507	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
508};
509
510SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
511    "B2B MW segment overrides -- MUST be the same on both sides");
512
513SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
514    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
515    "hardware, use this 64-bit address on the bus between the NTB devices for "
516    "the window at BAR2, on the upstream side of the link.  MUST be the same "
517    "address on both sides.");
518SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
519    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
520SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
521    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
522    "(split-BAR mode).");
523SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
524    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
525    "(split-BAR mode).");
526
527SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
528    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
529    "hardware, use this 64-bit address on the bus between the NTB devices for "
530    "the window at BAR2, on the downstream side of the link.  MUST be the same"
531    " address on both sides.");
532SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
533    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
534SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
535    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
536    "(split-BAR mode).");
537SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
538    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
539    "(split-BAR mode).");
540
541/*
542 * OS <-> Driver interface structures
543 */
544MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
545
546static device_method_t ntb_pci_methods[] = {
547	/* Device interface */
548	DEVMETHOD(device_probe,     ntb_probe),
549	DEVMETHOD(device_attach,    ntb_attach),
550	DEVMETHOD(device_detach,    ntb_detach),
551	DEVMETHOD_END
552};
553
554static driver_t ntb_pci_driver = {
555	"ntb_hw",
556	ntb_pci_methods,
557	sizeof(struct ntb_softc),
558};
559
560static devclass_t ntb_devclass;
561DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
562MODULE_VERSION(ntb_hw, 1);
563
564SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
565
566/*
567 * OS <-> Driver linkage functions
568 */
569static int
570ntb_probe(device_t device)
571{
572	struct ntb_hw_info *p;
573
574	p = ntb_get_device_info(pci_get_devid(device));
575	if (p == NULL)
576		return (ENXIO);
577
578	device_set_desc(device, p->desc);
579	return (0);
580}
581
582static int
583ntb_attach(device_t device)
584{
585	struct ntb_softc *ntb;
586	struct ntb_hw_info *p;
587	int error;
588
589	ntb = DEVICE2SOFTC(device);
590	p = ntb_get_device_info(pci_get_devid(device));
591
592	ntb->device = device;
593	ntb->type = p->type;
594	ntb->features = p->features;
595	ntb->b2b_mw_idx = B2B_MW_DISABLED;
596
597	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
598	callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE);
599	callout_init(&ntb->lr_timer, CALLOUT_MPSAFE);
600	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
601	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
602
603	if (ntb->type == NTB_ATOM)
604		error = ntb_detect_atom(ntb);
605	else
606		error = ntb_detect_xeon(ntb);
607	if (error != 0)
608		goto out;
609
610	ntb_detect_max_mw(ntb);
611
612	pci_enable_busmaster(ntb->device);
613
614	error = ntb_map_pci_bars(ntb);
615	if (error != 0)
616		goto out;
617	if (ntb->type == NTB_ATOM)
618		error = ntb_atom_init_dev(ntb);
619	else
620		error = ntb_xeon_init_dev(ntb);
621	if (error != 0)
622		goto out;
623
624	ntb_poll_link(ntb);
625
626	ntb_sysctl_init(ntb);
627
628out:
629	if (error != 0)
630		ntb_detach(device);
631	return (error);
632}
633
634static int
635ntb_detach(device_t device)
636{
637	struct ntb_softc *ntb;
638
639	ntb = DEVICE2SOFTC(device);
640
641	if (ntb->self_reg != NULL)
642		ntb_db_set_mask(ntb, ntb->db_valid_mask);
643	callout_drain(&ntb->heartbeat_timer);
644	callout_drain(&ntb->lr_timer);
645	pci_disable_busmaster(ntb->device);
646	if (ntb->type == NTB_XEON)
647		ntb_teardown_xeon(ntb);
648	ntb_teardown_interrupts(ntb);
649
650	mtx_destroy(&ntb->db_mask_lock);
651	mtx_destroy(&ntb->ctx_lock);
652
653	ntb_unmap_pci_bar(ntb);
654
655	return (0);
656}
657
658/*
659 * Driver internal routines
660 */
661static inline enum ntb_bar
662ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
663{
664
665	KASSERT(mw < ntb->mw_count,
666	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
667	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
668
669	return (ntb->reg->mw_bar[mw]);
670}
671
672static inline bool
673bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
674{
675	/* XXX This assertion could be stronger. */
676	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
677	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
678}
679
680static inline void
681bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
682    uint32_t *xlat, uint32_t *lmt)
683{
684	uint32_t basev, lmtv, xlatv;
685
686	switch (bar) {
687	case NTB_B2B_BAR_1:
688		basev = ntb->xlat_reg->bar2_base;
689		lmtv = ntb->xlat_reg->bar2_limit;
690		xlatv = ntb->xlat_reg->bar2_xlat;
691		break;
692	case NTB_B2B_BAR_2:
693		basev = ntb->xlat_reg->bar4_base;
694		lmtv = ntb->xlat_reg->bar4_limit;
695		xlatv = ntb->xlat_reg->bar4_xlat;
696		break;
697	case NTB_B2B_BAR_3:
698		basev = ntb->xlat_reg->bar5_base;
699		lmtv = ntb->xlat_reg->bar5_limit;
700		xlatv = ntb->xlat_reg->bar5_xlat;
701		break;
702	default:
703		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
704		    ("bad bar"));
705		basev = lmtv = xlatv = 0;
706		break;
707	}
708
709	if (base != NULL)
710		*base = basev;
711	if (xlat != NULL)
712		*xlat = xlatv;
713	if (lmt != NULL)
714		*lmt = lmtv;
715}
716
717static int
718ntb_map_pci_bars(struct ntb_softc *ntb)
719{
720	int rc;
721
722	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
723	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
724	if (rc != 0)
725		goto out;
726
727	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
728	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
729	if (rc != 0)
730		goto out;
731	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
732	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
733	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
734
735	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
736	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
737	if (rc != 0)
738		goto out;
739	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
740	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
741	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
742
743	if (!HAS_FEATURE(NTB_SPLIT_BAR))
744		goto out;
745
746	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
747	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
748	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
749	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
750	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
751
752out:
753	if (rc != 0)
754		device_printf(ntb->device,
755		    "unable to allocate pci resource\n");
756	return (rc);
757}
758
759static void
760print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
761    const char *kind)
762{
763
764	device_printf(ntb->device,
765	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
766	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
767	    (char *)bar->vbase + bar->size - 1,
768	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
769	    (uintmax_t)bar->size, kind);
770}
771
772static int
773map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
774{
775
776	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
777	    &bar->pci_resource_id, RF_ACTIVE);
778	if (bar->pci_resource == NULL)
779		return (ENXIO);
780
781	save_bar_parameters(bar);
782	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
783	print_map_success(ntb, bar, "mmr");
784	return (0);
785}
786
787static int
788map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
789{
790	int rc;
791	vm_memattr_t mapmode;
792	uint8_t bar_size_bits = 0;
793
794	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
795	    &bar->pci_resource_id, RF_ACTIVE);
796
797	if (bar->pci_resource == NULL)
798		return (ENXIO);
799
800	save_bar_parameters(bar);
801	/*
802	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
803	 * hardware issue. To work around this, query the size it should be
804	 * configured to by the device and modify the resource to correspond to
805	 * this new size. The BIOS on systems with this problem is required to
806	 * provide enough address space to allow the driver to make this change
807	 * safely.
808	 *
809	 * Ideally I could have just specified the size when I allocated the
810	 * resource like:
811	 *  bus_alloc_resource(ntb->device,
812	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
813	 *	1ul << bar_size_bits, RF_ACTIVE);
814	 * but the PCI driver does not honor the size in this call, so we have
815	 * to modify it after the fact.
816	 */
817	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
818		if (bar->pci_resource_id == PCIR_BAR(2))
819			bar_size_bits = pci_read_config(ntb->device,
820			    XEON_PBAR23SZ_OFFSET, 1);
821		else
822			bar_size_bits = pci_read_config(ntb->device,
823			    XEON_PBAR45SZ_OFFSET, 1);
824
825		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
826		    bar->pci_resource, bar->pbase,
827		    bar->pbase + (1ul << bar_size_bits) - 1);
828		if (rc != 0) {
829			device_printf(ntb->device,
830			    "unable to resize bar\n");
831			return (rc);
832		}
833
834		save_bar_parameters(bar);
835	}
836
837	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
838	print_map_success(ntb, bar, "mw");
839
840	/*
841	 * Optionally, mark MW BARs as anything other than UC to improve
842	 * performance.
843	 */
844	mapmode = ntb_pat_flags();
845	if (mapmode == bar->map_mode)
846		return (0);
847
848	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
849	if (rc == 0) {
850		bar->map_mode = mapmode;
851		device_printf(ntb->device,
852		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
853		    "%s.\n",
854		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
855		    (char *)bar->vbase + bar->size - 1,
856		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
857		    ntb_vm_memattr_to_str(mapmode));
858	} else
859		device_printf(ntb->device,
860		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
861		    "%s: %d\n",
862		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
863		    (char *)bar->vbase + bar->size - 1,
864		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
865		    ntb_vm_memattr_to_str(mapmode), rc);
866		/* Proceed anyway */
867	return (0);
868}
869
870static void
871ntb_unmap_pci_bar(struct ntb_softc *ntb)
872{
873	struct ntb_pci_bar_info *current_bar;
874	int i;
875
876	for (i = 0; i < NTB_MAX_BARS; i++) {
877		current_bar = &ntb->bar_info[i];
878		if (current_bar->pci_resource != NULL)
879			bus_release_resource(ntb->device, SYS_RES_MEMORY,
880			    current_bar->pci_resource_id,
881			    current_bar->pci_resource);
882	}
883}
884
885static int
886ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
887{
888	uint32_t i;
889	int rc;
890
891	for (i = 0; i < num_vectors; i++) {
892		ntb->int_info[i].rid = i + 1;
893		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
894		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
895		if (ntb->int_info[i].res == NULL) {
896			device_printf(ntb->device,
897			    "bus_alloc_resource failed\n");
898			return (ENOMEM);
899		}
900		ntb->int_info[i].tag = NULL;
901		ntb->allocated_interrupts++;
902		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
903		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
904		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
905		if (rc != 0) {
906			device_printf(ntb->device, "bus_setup_intr failed\n");
907			return (ENXIO);
908		}
909	}
910	return (0);
911}
912
913/*
914 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
915 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
916 * should be okay.  This tunable should enable us to test that hypothesis
917 * when someone gets their hands on some Xeon hardware.
918 */
919static int ntb_force_remap_mode;
920SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
921    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
922    " to a smaller number of ithreads, even if the desired number are "
923    "available");
924
925/*
926 * In case it is NOT ok, give consumers an abort button.
927 */
928static int ntb_prefer_intx;
929SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
930    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
931    "than remapping MSI-X messages over available slots (match Linux driver "
932    "behavior)");
933
934/*
935 * Remap the desired number of MSI-X messages to available ithreads in a simple
936 * round-robin fashion.
937 */
938static int
939ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
940{
941	u_int *vectors;
942	uint32_t i;
943	int rc;
944
945	if (ntb_prefer_intx != 0)
946		return (ENXIO);
947
948	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
949
950	for (i = 0; i < desired; i++)
951		vectors[i] = (i % avail) + 1;
952
953	rc = pci_remap_msix(dev, desired, vectors);
954	free(vectors, M_NTB);
955	return (rc);
956}
957
958static int
959ntb_init_isr(struct ntb_softc *ntb)
960{
961	uint32_t desired_vectors, num_vectors;
962	int rc;
963
964	ntb->allocated_interrupts = 0;
965	ntb->last_ts = ticks;
966
967	/*
968	 * Mask all doorbell interrupts.
969	 */
970	ntb_db_set_mask(ntb, ntb->db_valid_mask);
971
972	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
973	    ntb->db_count);
974	if (desired_vectors >= 1) {
975		rc = pci_alloc_msix(ntb->device, &num_vectors);
976
977		if (ntb_force_remap_mode != 0 && rc == 0 &&
978		    num_vectors == desired_vectors)
979			num_vectors--;
980
981		if (rc == 0 && num_vectors < desired_vectors) {
982			rc = ntb_remap_msix(ntb->device, desired_vectors,
983			    num_vectors);
984			if (rc == 0)
985				num_vectors = desired_vectors;
986			else
987				pci_release_msi(ntb->device);
988		}
989		if (rc != 0)
990			num_vectors = 1;
991	} else
992		num_vectors = 1;
993
994	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
995		ntb->db_vec_count = 1;
996		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
997		rc = ntb_setup_legacy_interrupt(ntb);
998	} else {
999		ntb_create_msix_vec(ntb, num_vectors);
1000		rc = ntb_setup_msix(ntb, num_vectors);
1001	}
1002	if (rc != 0) {
1003		device_printf(ntb->device,
1004		    "Error allocating interrupts: %d\n", rc);
1005		ntb_free_msix_vec(ntb);
1006	}
1007
1008	return (rc);
1009}
1010
1011static int
1012ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
1013{
1014	int rc;
1015
1016	ntb->int_info[0].rid = 0;
1017	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
1018	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
1019	if (ntb->int_info[0].res == NULL) {
1020		device_printf(ntb->device, "bus_alloc_resource failed\n");
1021		return (ENOMEM);
1022	}
1023
1024	ntb->int_info[0].tag = NULL;
1025	ntb->allocated_interrupts = 1;
1026
1027	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
1028	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
1029	    ntb, &ntb->int_info[0].tag);
1030	if (rc != 0) {
1031		device_printf(ntb->device, "bus_setup_intr failed\n");
1032		return (ENXIO);
1033	}
1034
1035	return (0);
1036}
1037
1038static void
1039ntb_teardown_interrupts(struct ntb_softc *ntb)
1040{
1041	struct ntb_int_info *current_int;
1042	int i;
1043
1044	for (i = 0; i < ntb->allocated_interrupts; i++) {
1045		current_int = &ntb->int_info[i];
1046		if (current_int->tag != NULL)
1047			bus_teardown_intr(ntb->device, current_int->res,
1048			    current_int->tag);
1049
1050		if (current_int->res != NULL)
1051			bus_release_resource(ntb->device, SYS_RES_IRQ,
1052			    rman_get_rid(current_int->res), current_int->res);
1053	}
1054
1055	ntb_free_msix_vec(ntb);
1056	pci_release_msi(ntb->device);
1057}
1058
1059/*
1060 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
1061 * out to make code clearer.
1062 */
1063static inline uint64_t
1064db_ioread(struct ntb_softc *ntb, uint64_t regoff)
1065{
1066
1067	if (ntb->type == NTB_ATOM)
1068		return (ntb_reg_read(8, regoff));
1069
1070	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1071
1072	return (ntb_reg_read(2, regoff));
1073}
1074
1075static inline void
1076db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1077{
1078
1079	KASSERT((val & ~ntb->db_valid_mask) == 0,
1080	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1081	     (uintmax_t)(val & ~ntb->db_valid_mask),
1082	     (uintmax_t)ntb->db_valid_mask));
1083
1084	if (regoff == ntb->self_reg->db_mask)
1085		DB_MASK_ASSERT(ntb, MA_OWNED);
1086	db_iowrite_raw(ntb, regoff, val);
1087}
1088
1089static inline void
1090db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1091{
1092
1093	if (ntb->type == NTB_ATOM) {
1094		ntb_reg_write(8, regoff, val);
1095		return;
1096	}
1097
1098	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1099	ntb_reg_write(2, regoff, (uint16_t)val);
1100}
1101
1102void
1103ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1104{
1105
1106	DB_MASK_LOCK(ntb);
1107	ntb->db_mask |= bits;
1108	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1109	DB_MASK_UNLOCK(ntb);
1110}
1111
1112void
1113ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1114{
1115
1116	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1117	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1118	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1119	     (uintmax_t)ntb->db_valid_mask));
1120
1121	DB_MASK_LOCK(ntb);
1122	ntb->db_mask &= ~bits;
1123	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1124	DB_MASK_UNLOCK(ntb);
1125}
1126
1127uint64_t
1128ntb_db_read(struct ntb_softc *ntb)
1129{
1130
1131	return (db_ioread(ntb, ntb->self_reg->db_bell));
1132}
1133
1134void
1135ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1136{
1137
1138	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1139	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1140	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1141	     (uintmax_t)ntb->db_valid_mask));
1142
1143	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1144}
1145
1146static inline uint64_t
1147ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1148{
1149	uint64_t shift, mask;
1150
1151	shift = ntb->db_vec_shift;
1152	mask = (1ull << shift) - 1;
1153	return (mask << (shift * db_vector));
1154}
1155
1156static void
1157ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1158{
1159	uint64_t vec_mask;
1160
1161	ntb->last_ts = ticks;
1162	vec_mask = ntb_vec_mask(ntb, vec);
1163
1164	if ((vec_mask & ntb->db_link_mask) != 0) {
1165		if (ntb_poll_link(ntb))
1166			ntb_link_event(ntb);
1167	}
1168
1169	if ((vec_mask & ntb->db_valid_mask) != 0)
1170		ntb_db_event(ntb, vec);
1171}
1172
1173static void
1174ndev_vec_isr(void *arg)
1175{
1176	struct ntb_vec *nvec = arg;
1177
1178	ntb_interrupt(nvec->ntb, nvec->num);
1179}
1180
1181static void
1182ndev_irq_isr(void *arg)
1183{
1184	/* If we couldn't set up MSI-X, we only have the one vector. */
1185	ntb_interrupt(arg, 0);
1186}
1187
1188static int
1189ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1190{
1191	uint32_t i;
1192
1193	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1194	    M_ZERO | M_WAITOK);
1195	for (i = 0; i < num_vectors; i++) {
1196		ntb->msix_vec[i].num = i;
1197		ntb->msix_vec[i].ntb = ntb;
1198	}
1199
1200	return (0);
1201}
1202
1203static void
1204ntb_free_msix_vec(struct ntb_softc *ntb)
1205{
1206
1207	if (ntb->msix_vec == NULL)
1208		return;
1209
1210	free(ntb->msix_vec, M_NTB);
1211	ntb->msix_vec = NULL;
1212}
1213
1214static struct ntb_hw_info *
1215ntb_get_device_info(uint32_t device_id)
1216{
1217	struct ntb_hw_info *ep = pci_ids;
1218
1219	while (ep->device_id) {
1220		if (ep->device_id == device_id)
1221			return (ep);
1222		++ep;
1223	}
1224	return (NULL);
1225}
1226
1227static void
1228ntb_teardown_xeon(struct ntb_softc *ntb)
1229{
1230
1231	if (ntb->reg != NULL)
1232		ntb_link_disable(ntb);
1233}
1234
1235static void
1236ntb_detect_max_mw(struct ntb_softc *ntb)
1237{
1238
1239	if (ntb->type == NTB_ATOM) {
1240		ntb->mw_count = ATOM_MW_COUNT;
1241		return;
1242	}
1243
1244	if (HAS_FEATURE(NTB_SPLIT_BAR))
1245		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1246	else
1247		ntb->mw_count = XEON_SNB_MW_COUNT;
1248}
1249
1250static int
1251ntb_detect_xeon(struct ntb_softc *ntb)
1252{
1253	uint8_t ppd, conn_type;
1254
1255	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1256	ntb->ppd = ppd;
1257
1258	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1259		ntb->dev_type = NTB_DEV_DSD;
1260	else
1261		ntb->dev_type = NTB_DEV_USD;
1262
1263	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1264		ntb->features |= NTB_SPLIT_BAR;
1265
1266	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1267	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1268		ntb->features |= NTB_SDOORBELL_LOCKUP;
1269
1270	conn_type = ppd & XEON_PPD_CONN_TYPE;
1271	switch (conn_type) {
1272	case NTB_CONN_B2B:
1273		ntb->conn_type = conn_type;
1274		break;
1275	case NTB_CONN_RP:
1276	case NTB_CONN_TRANSPARENT:
1277	default:
1278		device_printf(ntb->device, "Unsupported connection type: %u\n",
1279		    (unsigned)conn_type);
1280		return (ENXIO);
1281	}
1282	return (0);
1283}
1284
1285static int
1286ntb_detect_atom(struct ntb_softc *ntb)
1287{
1288	uint32_t ppd, conn_type;
1289
1290	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1291	ntb->ppd = ppd;
1292
1293	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1294		ntb->dev_type = NTB_DEV_DSD;
1295	else
1296		ntb->dev_type = NTB_DEV_USD;
1297
1298	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1299	switch (conn_type) {
1300	case NTB_CONN_B2B:
1301		ntb->conn_type = conn_type;
1302		break;
1303	default:
1304		device_printf(ntb->device, "Unsupported NTB configuration\n");
1305		return (ENXIO);
1306	}
1307	return (0);
1308}
1309
1310static int
1311ntb_xeon_init_dev(struct ntb_softc *ntb)
1312{
1313	int rc;
1314
1315	ntb->spad_count		= XEON_SPAD_COUNT;
1316	ntb->db_count		= XEON_DB_COUNT;
1317	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1318	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1319	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1320
1321	if (ntb->conn_type != NTB_CONN_B2B) {
1322		device_printf(ntb->device, "Connection type %d not supported\n",
1323		    ntb->conn_type);
1324		return (ENXIO);
1325	}
1326
1327	ntb->reg = &xeon_reg;
1328	ntb->self_reg = &xeon_pri_reg;
1329	ntb->peer_reg = &xeon_b2b_reg;
1330	ntb->xlat_reg = &xeon_sec_xlat;
1331
1332	/*
1333	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1334	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1335	 * which may hang the system.  To workaround this, use a memory
1336	 * window to access the interrupt and scratch pad registers on the
1337	 * remote system.
1338	 */
1339	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
1340		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
1341		    ntb->mw_count;
1342		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
1343		    g_ntb_mw_idx, ntb->b2b_mw_idx);
1344		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE);
1345		KASSERT(rc == 0, ("shouldn't fail"));
1346	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1347		/*
1348		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1349		 * mirrored to the remote system.  Shrink the number of bits by one,
1350		 * since bit 14 is the last bit.
1351		 *
1352		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1353		 * anyway.  Nor for non-B2B connection types.
1354		 */
1355		ntb->db_count = XEON_DB_COUNT - 1;
1356
1357	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1358
1359	if (ntb->dev_type == NTB_DEV_USD)
1360		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1361		    &xeon_b2b_usd_addr);
1362	else
1363		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1364		    &xeon_b2b_dsd_addr);
1365	if (rc != 0)
1366		return (rc);
1367
1368	/* Enable Bus Master and Memory Space on the secondary side */
1369	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
1370	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1371
1372	/*
1373	 * Mask all doorbell interrupts.
1374	 */
1375	ntb_db_set_mask(ntb, ntb->db_valid_mask);
1376
1377	rc = ntb_init_isr(ntb);
1378	return (rc);
1379}
1380
1381static int
1382ntb_atom_init_dev(struct ntb_softc *ntb)
1383{
1384	int error;
1385
1386	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1387	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1388
1389	ntb->spad_count		 = ATOM_SPAD_COUNT;
1390	ntb->db_count		 = ATOM_DB_COUNT;
1391	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1392	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1393	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1394
1395	ntb->reg = &atom_reg;
1396	ntb->self_reg = &atom_pri_reg;
1397	ntb->peer_reg = &atom_b2b_reg;
1398	ntb->xlat_reg = &atom_sec_xlat;
1399
1400	/*
1401	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1402	 * resolved.  Mask transaction layer internal parity errors.
1403	 */
1404	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1405
1406	configure_atom_secondary_side_bars(ntb);
1407
1408	/* Enable Bus Master and Memory Space on the secondary side */
1409	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
1410	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1411
1412	error = ntb_init_isr(ntb);
1413	if (error != 0)
1414		return (error);
1415
1416	/* Initiate PCI-E link training */
1417	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1418
1419	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1420
1421	return (0);
1422}
1423
1424/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1425static void
1426configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1427{
1428
1429	if (ntb->dev_type == NTB_DEV_USD) {
1430		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1431		    XEON_B2B_BAR2_ADDR64);
1432		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1433		    XEON_B2B_BAR4_ADDR64);
1434		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1435		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1436	} else {
1437		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1438		    XEON_B2B_BAR2_ADDR64);
1439		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1440		    XEON_B2B_BAR4_ADDR64);
1441		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
1442		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
1443	}
1444}
1445
1446
1447/*
1448 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1449 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1450 * remains for use by a higher layer.
1451 *
1452 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1453 * MW size is sufficiently large.
1454 */
1455static unsigned int ntb_b2b_mw_share;
1456SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1457    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1458    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1459    "value here.");
1460
1461static void
1462xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1463    enum ntb_bar regbar)
1464{
1465	struct ntb_pci_bar_info *bar;
1466	uint8_t bar_sz;
1467
1468	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1469		return;
1470
1471	bar = &ntb->bar_info[idx];
1472	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1473	if (idx == regbar) {
1474		if (ntb->b2b_off != 0)
1475			bar_sz--;
1476		else
1477			bar_sz = 0;
1478	}
1479	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1480	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1481	(void)bar_sz;
1482}
1483
1484static void
1485xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1486    enum ntb_bar idx, enum ntb_bar regbar)
1487{
1488	uint64_t reg_val;
1489	uint32_t base_reg, lmt_reg;
1490
1491	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1492	if (idx == regbar)
1493		bar_addr += ntb->b2b_off;
1494
1495	if (!bar_is_64bit(ntb, idx)) {
1496		ntb_reg_write(4, base_reg, bar_addr);
1497		reg_val = ntb_reg_read(4, base_reg);
1498		(void)reg_val;
1499
1500		ntb_reg_write(4, lmt_reg, bar_addr);
1501		reg_val = ntb_reg_read(4, lmt_reg);
1502		(void)reg_val;
1503	} else {
1504		ntb_reg_write(8, base_reg, bar_addr);
1505		reg_val = ntb_reg_read(8, base_reg);
1506		(void)reg_val;
1507
1508		ntb_reg_write(8, lmt_reg, bar_addr);
1509		reg_val = ntb_reg_read(8, lmt_reg);
1510		(void)reg_val;
1511	}
1512}
1513
1514static void
1515xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1516{
1517	struct ntb_pci_bar_info *bar;
1518
1519	bar = &ntb->bar_info[idx];
1520	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1521		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1522		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1523	} else {
1524		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1525		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1526	}
1527	(void)base_addr;
1528}
1529
1530static int
1531xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1532    const struct ntb_b2b_addr *peer_addr)
1533{
1534	struct ntb_pci_bar_info *b2b_bar;
1535	vm_size_t bar_size;
1536	uint64_t bar_addr;
1537	enum ntb_bar b2b_bar_num, i;
1538
1539	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1540		b2b_bar = NULL;
1541		b2b_bar_num = NTB_CONFIG_BAR;
1542		ntb->b2b_off = 0;
1543	} else {
1544		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1545		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1546		    ("invalid b2b mw bar"));
1547
1548		b2b_bar = &ntb->bar_info[b2b_bar_num];
1549		bar_size = b2b_bar->size;
1550
1551		if (ntb_b2b_mw_share != 0 &&
1552		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1553			ntb->b2b_off = bar_size >> 1;
1554		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1555			ntb->b2b_off = 0;
1556		} else {
1557			device_printf(ntb->device,
1558			    "B2B bar size is too small!\n");
1559			return (EIO);
1560		}
1561	}
1562
1563	/*
1564	 * Reset the secondary bar sizes to match the primary bar sizes.
1565	 * (Except, disable or halve the size of the B2B secondary bar.)
1566	 */
1567	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1568		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1569
1570	bar_addr = 0;
1571	if (b2b_bar_num == NTB_CONFIG_BAR)
1572		bar_addr = addr->bar0_addr;
1573	else if (b2b_bar_num == NTB_B2B_BAR_1)
1574		bar_addr = addr->bar2_addr64;
1575	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1576		bar_addr = addr->bar4_addr64;
1577	else if (b2b_bar_num == NTB_B2B_BAR_2)
1578		bar_addr = addr->bar4_addr32;
1579	else if (b2b_bar_num == NTB_B2B_BAR_3)
1580		bar_addr = addr->bar5_addr32;
1581	else
1582		KASSERT(false, ("invalid bar"));
1583
1584	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1585
1586	/*
1587	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1588	 * register BAR.  The B2B BAR is either disabled above or configured
1589	 * half-size.  It starts at PBAR xlat + offset.
1590	 *
1591	 * Also set up incoming BAR limits == base (zero length window).
1592	 */
1593	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1594	    b2b_bar_num);
1595	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1596		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1597		    NTB_B2B_BAR_2, b2b_bar_num);
1598		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1599		    NTB_B2B_BAR_3, b2b_bar_num);
1600	} else
1601		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1602		    NTB_B2B_BAR_2, b2b_bar_num);
1603
1604	/* Zero incoming translation addrs */
1605	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1606	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1607
1608	/* Zero outgoing translation limits (whole bar size windows) */
1609	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1610	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1611
1612	/* Set outgoing translation offsets */
1613	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1614	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1615		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1616		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1617	} else
1618		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1619
1620	/* Set the translation offset for B2B registers */
1621	bar_addr = 0;
1622	if (b2b_bar_num == NTB_CONFIG_BAR)
1623		bar_addr = peer_addr->bar0_addr;
1624	else if (b2b_bar_num == NTB_B2B_BAR_1)
1625		bar_addr = peer_addr->bar2_addr64;
1626	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1627		bar_addr = peer_addr->bar4_addr64;
1628	else if (b2b_bar_num == NTB_B2B_BAR_2)
1629		bar_addr = peer_addr->bar4_addr32;
1630	else if (b2b_bar_num == NTB_B2B_BAR_3)
1631		bar_addr = peer_addr->bar5_addr32;
1632	else
1633		KASSERT(false, ("invalid bar"));
1634
1635	/*
1636	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1637	 * at a time.
1638	 */
1639	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1640	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1641	return (0);
1642}
1643
1644static inline bool
1645link_is_up(struct ntb_softc *ntb)
1646{
1647
1648	if (ntb->type == NTB_XEON) {
1649		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1650			return (true);
1651		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1652	}
1653
1654	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1655	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1656}
1657
1658static inline bool
1659atom_link_is_err(struct ntb_softc *ntb)
1660{
1661	uint32_t status;
1662
1663	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1664
1665	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1666	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1667		return (true);
1668
1669	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1670	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1671}
1672
1673/* Atom does not have link status interrupt, poll on that platform */
1674static void
1675atom_link_hb(void *arg)
1676{
1677	struct ntb_softc *ntb = arg;
1678	sbintime_t timo, poll_ts;
1679
1680	timo = NTB_HB_TIMEOUT * hz;
1681	poll_ts = ntb->last_ts + timo;
1682
1683	/*
1684	 * Delay polling the link status if an interrupt was received, unless
1685	 * the cached link status says the link is down.
1686	 */
1687	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1688		timo = poll_ts - ticks;
1689		goto out;
1690	}
1691
1692	if (ntb_poll_link(ntb))
1693		ntb_link_event(ntb);
1694
1695	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1696		/* Link is down with error, proceed with recovery */
1697		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1698		return;
1699	}
1700
1701out:
1702	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1703}
1704
1705static void
1706atom_perform_link_restart(struct ntb_softc *ntb)
1707{
1708	uint32_t status;
1709
1710	/* Driver resets the NTB ModPhy lanes - magic! */
1711	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1712	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1713	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1714	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1715
1716	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1717	pause("ModPhy", hz / 10);
1718
1719	/* Clear AER Errors, write to clear */
1720	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1721	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1722	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1723
1724	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1725	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1726	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1727	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1728
1729	/* Clear DeSkew Buffer error, write to clear */
1730	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1731	status |= ATOM_DESKEWSTS_DBERR;
1732	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1733
1734	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1735	status &= ATOM_IBIST_ERR_OFLOW;
1736	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1737
1738	/* Releases the NTB state machine to allow the link to retrain */
1739	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1740	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1741	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1742}
1743
1744/*
1745 * ntb_set_ctx() - associate a driver context with an ntb device
1746 * @ntb:        NTB device context
1747 * @ctx:        Driver context
1748 * @ctx_ops:    Driver context operations
1749 *
1750 * Associate a driver context and operations with a ntb device.  The context is
1751 * provided by the client driver, and the driver may associate a different
1752 * context with each ntb device.
1753 *
1754 * Return: Zero if the context is associated, otherwise an error number.
1755 */
1756int
1757ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1758{
1759
1760	if (ctx == NULL || ops == NULL)
1761		return (EINVAL);
1762	if (ntb->ctx_ops != NULL)
1763		return (EINVAL);
1764
1765	CTX_LOCK(ntb);
1766	if (ntb->ctx_ops != NULL) {
1767		CTX_UNLOCK(ntb);
1768		return (EINVAL);
1769	}
1770	ntb->ntb_ctx = ctx;
1771	ntb->ctx_ops = ops;
1772	CTX_UNLOCK(ntb);
1773
1774	return (0);
1775}
1776
1777/*
1778 * It is expected that this will only be used from contexts where the ctx_lock
1779 * is not needed to protect ntb_ctx lifetime.
1780 */
1781void *
1782ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1783{
1784
1785	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1786	if (ops != NULL)
1787		*ops = ntb->ctx_ops;
1788	return (ntb->ntb_ctx);
1789}
1790
1791/*
1792 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1793 * @ntb:        NTB device context
1794 *
1795 * Clear any association that may exist between a driver context and the ntb
1796 * device.
1797 */
1798void
1799ntb_clear_ctx(struct ntb_softc *ntb)
1800{
1801
1802	CTX_LOCK(ntb);
1803	ntb->ntb_ctx = NULL;
1804	ntb->ctx_ops = NULL;
1805	CTX_UNLOCK(ntb);
1806}
1807
1808/*
1809 * ntb_link_event() - notify driver context of a change in link status
1810 * @ntb:        NTB device context
1811 *
1812 * Notify the driver context that the link status may have changed.  The driver
1813 * should call ntb_link_is_up() to get the current status.
1814 */
1815void
1816ntb_link_event(struct ntb_softc *ntb)
1817{
1818
1819	CTX_LOCK(ntb);
1820	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1821		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1822	CTX_UNLOCK(ntb);
1823}
1824
1825/*
1826 * ntb_db_event() - notify driver context of a doorbell event
1827 * @ntb:        NTB device context
1828 * @vector:     Interrupt vector number
1829 *
1830 * Notify the driver context of a doorbell event.  If hardware supports
1831 * multiple interrupt vectors for doorbells, the vector number indicates which
1832 * vector received the interrupt.  The vector number is relative to the first
1833 * vector used for doorbells, starting at zero, and must be less than
1834 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1835 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1836 * those bits are associated with the vector number.
1837 */
1838static void
1839ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1840{
1841
1842	CTX_LOCK(ntb);
1843	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1844		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1845	CTX_UNLOCK(ntb);
1846}
1847
1848/*
1849 * ntb_link_enable() - enable the link on the secondary side of the ntb
1850 * @ntb:        NTB device context
1851 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1852 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1853 *
1854 * Enable the link on the secondary side of the ntb.  This can only be done
1855 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1856 * should train the link to its maximum speed and width, or the requested speed
1857 * and width, whichever is smaller, if supported.
1858 *
1859 * Return: Zero on success, otherwise an error number.
1860 *
1861 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1862 *      and width input will be ignored.
1863 */
1864int
1865ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1866    enum ntb_width w __unused)
1867{
1868	uint32_t cntl;
1869
1870	if (ntb->type == NTB_ATOM) {
1871		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1872		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1873		return (0);
1874	}
1875
1876	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1877		ntb_link_event(ntb);
1878		return (0);
1879	}
1880
1881	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1882	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1883	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1884	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1885	if (HAS_FEATURE(NTB_SPLIT_BAR))
1886		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1887	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1888	return (0);
1889}
1890
1891/*
1892 * ntb_link_disable() - disable the link on the secondary side of the ntb
1893 * @ntb:        NTB device context
1894 *
1895 * Disable the link on the secondary side of the ntb.  This can only be done
1896 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1897 * should disable the link.  Returning from this call must indicate that a
1898 * barrier has passed, though with no more writes may pass in either direction
1899 * across the link, except if this call returns an error number.
1900 *
1901 * Return: Zero on success, otherwise an error number.
1902 */
1903int
1904ntb_link_disable(struct ntb_softc *ntb)
1905{
1906	uint32_t cntl;
1907
1908	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1909		ntb_link_event(ntb);
1910		return (0);
1911	}
1912
1913	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1914	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1915	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1916	if (HAS_FEATURE(NTB_SPLIT_BAR))
1917		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1918	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1919	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1920	return (0);
1921}
1922
1923static void
1924recover_atom_link(void *arg)
1925{
1926	struct ntb_softc *ntb = arg;
1927	unsigned speed, width, oldspeed, oldwidth;
1928	uint32_t status32;
1929
1930	atom_perform_link_restart(ntb);
1931
1932	/*
1933	 * There is a potential race between the 2 NTB devices recovering at
1934	 * the same time.  If the times are the same, the link will not recover
1935	 * and the driver will be stuck in this loop forever.  Add a random
1936	 * interval to the recovery time to prevent this race.
1937	 */
1938	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1939	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1940
1941	if (atom_link_is_err(ntb))
1942		goto retry;
1943
1944	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1945	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1946		goto out;
1947
1948	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1949	width = NTB_LNK_STA_WIDTH(status32);
1950	speed = status32 & NTB_LINK_SPEED_MASK;
1951
1952	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1953	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1954	if (oldwidth != width || oldspeed != speed)
1955		goto retry;
1956
1957out:
1958	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1959	    ntb);
1960	return;
1961
1962retry:
1963	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1964	    ntb);
1965}
1966
1967/*
1968 * Polls the HW link status register(s); returns true if something has changed.
1969 */
1970static bool
1971ntb_poll_link(struct ntb_softc *ntb)
1972{
1973	uint32_t ntb_cntl;
1974	uint16_t reg_val;
1975
1976	if (ntb->type == NTB_ATOM) {
1977		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1978		if (ntb_cntl == ntb->ntb_ctl)
1979			return (false);
1980
1981		ntb->ntb_ctl = ntb_cntl;
1982		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1983	} else {
1984		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1985
1986		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1987		if (reg_val == ntb->lnk_sta)
1988			return (false);
1989
1990		ntb->lnk_sta = reg_val;
1991	}
1992	return (true);
1993}
1994
1995static inline enum ntb_speed
1996ntb_link_sta_speed(struct ntb_softc *ntb)
1997{
1998
1999	if (!link_is_up(ntb))
2000		return (NTB_SPEED_NONE);
2001	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
2002}
2003
2004static inline enum ntb_width
2005ntb_link_sta_width(struct ntb_softc *ntb)
2006{
2007
2008	if (!link_is_up(ntb))
2009		return (NTB_WIDTH_NONE);
2010	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
2011}
2012
2013SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
2014    "Driver state, statistics, and HW registers");
2015
2016#define NTB_REGSZ_MASK	(3ul << 30)
2017#define NTB_REG_64	(1ul << 30)
2018#define NTB_REG_32	(2ul << 30)
2019#define NTB_REG_16	(3ul << 30)
2020#define NTB_REG_8	(0ul << 30)
2021
2022#define NTB_DB_READ	(1ul << 29)
2023#define NTB_PCI_REG	(1ul << 28)
2024#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
2025
2026static void
2027ntb_sysctl_init(struct ntb_softc *ntb)
2028{
2029	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
2030	struct sysctl_ctx_list *ctx;
2031	struct sysctl_oid *tree, *tmptree;
2032
2033	ctx = device_get_sysctl_ctx(ntb->device);
2034
2035	tree = SYSCTL_ADD_NODE(ctx,
2036	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
2037	    "debug_info", CTLFLAG_RD, NULL,
2038	    "Driver state, statistics, and HW registers");
2039	tree_par = SYSCTL_CHILDREN(tree);
2040
2041	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
2042	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
2043	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
2044	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
2045	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
2046	    &ntb->ppd, 0, "Raw PPD register (cached)");
2047
2048	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
2049#ifdef notyet
2050		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
2051		    &ntb->b2b_mw_idx, 0,
2052		    "Index of the MW used for B2B remote register access");
2053#endif
2054		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
2055		    CTLFLAG_RD, &ntb->b2b_off,
2056		    "If non-zero, offset of B2B register region in shared MW");
2057	}
2058
2059	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
2060	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
2061	    "Features/errata of this NTB device");
2062
2063	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
2064	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
2065	    "NTB CTL register (cached)");
2066	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
2067	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
2068	    "LNK STA register (cached)");
2069
2070	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
2071	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
2072	    "A", "Link status");
2073
2074#ifdef notyet
2075	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
2076	    &ntb->mw_count, 0, "MW count");
2077	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
2078	    &ntb->spad_count, 0, "Scratchpad count");
2079	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
2080	    &ntb->db_count, 0, "Doorbell count");
2081	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
2082	    &ntb->db_vec_count, 0, "Doorbell vector count");
2083	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
2084	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
2085#endif
2086
2087	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
2088	    &ntb->db_valid_mask, "Doorbell valid mask");
2089	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
2090	    &ntb->db_link_mask, "Doorbell link mask");
2091	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
2092	    &ntb->db_mask, "Doorbell mask (cached)");
2093
2094	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
2095	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
2096	regpar = SYSCTL_CHILDREN(tmptree);
2097
2098	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
2099	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2100	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
2101	    "NTB Control register");
2102	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
2103	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2104	    0x19c, sysctl_handle_register, "IU",
2105	    "NTB Link Capabilities");
2106	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
2107	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
2108	    0x1a0, sysctl_handle_register, "IU",
2109	    "NTB Link Control register");
2110
2111	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
2112	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2113	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
2114	    sysctl_handle_register, "QU", "Doorbell mask register");
2115	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
2116	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2117	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
2118	    sysctl_handle_register, "QU", "Doorbell register");
2119
2120	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2121	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2122	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2123	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2124	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2125		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2126		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2127		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2128		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2129		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2130		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2131		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2132		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2133	} else {
2134		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2135		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2136		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2137		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2138	}
2139
2140	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2141	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2142	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2143	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2144	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2145		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2146		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2147		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2148		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2149		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2150		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2151		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2152		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2153	} else {
2154		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2155		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2156		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2157		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2158	}
2159
2160	if (ntb->type == NTB_ATOM)
2161		return;
2162
2163	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2164	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2165	statpar = SYSCTL_CHILDREN(tmptree);
2166	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2167	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2168	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2169	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2170
2171	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2172	    CTLFLAG_RD, NULL, "Xeon HW errors");
2173	errpar = SYSCTL_CHILDREN(tmptree);
2174
2175	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
2176	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2177	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
2178	    sysctl_handle_register, "CU", "PPD");
2179
2180	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
2181	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2182	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
2183	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
2184	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
2185	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2186	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
2187	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
2188	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
2189	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2190	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
2191	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
2192
2193	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
2194	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2195	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
2196	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
2197	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
2198	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2199	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
2200	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
2201	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
2202	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2203	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
2204	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
2205
2206	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
2207	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2208	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2209	    sysctl_handle_register, "SU", "DEVSTS");
2210	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
2211	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2212	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2213	    sysctl_handle_register, "SU", "LNKSTS");
2214	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
2215	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2216	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
2217	    sysctl_handle_register, "SU", "SLNKSTS");
2218
2219	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2220	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2221	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2222	    sysctl_handle_register, "IU", "UNCERRSTS");
2223	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2224	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2225	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2226	    sysctl_handle_register, "IU", "CORERRSTS");
2227
2228	if (ntb->conn_type != NTB_CONN_B2B)
2229		return;
2230
2231	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2232	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2233	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2234	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2235	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2236		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2237		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2238		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2239		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2240		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2241		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2242		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2243		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2244	} else {
2245		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2246		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2247		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2248		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2249	}
2250
2251	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2252	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2253	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2254	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2255	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2256		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2257		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2258		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2259		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2260		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2261		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2262		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2263		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2264	} else {
2265		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2266		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2267		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2268		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2269	}
2270
2271	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2272	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2273	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2274	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2275	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2276	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2277	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2278	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2279	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2280		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2281		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2282		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2283		    sysctl_handle_register, "IU",
2284		    "Secondary BAR4 base register");
2285		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2286		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2287		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2288		    sysctl_handle_register, "IU",
2289		    "Secondary BAR5 base register");
2290	} else {
2291		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2292		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2293		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2294		    sysctl_handle_register, "QU",
2295		    "Secondary BAR45 base register");
2296	}
2297}
2298
2299static int
2300sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2301{
2302	struct ntb_softc *ntb;
2303	struct sbuf sb;
2304	int error;
2305
2306	error = 0;
2307	ntb = arg1;
2308
2309	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2310
2311	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2312	error = sbuf_finish(&sb);
2313	sbuf_delete(&sb);
2314
2315	if (error || !req->newptr)
2316		return (error);
2317	return (EINVAL);
2318}
2319
2320static int
2321sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2322{
2323	struct ntb_softc *ntb;
2324	struct sbuf sb;
2325	enum ntb_speed speed;
2326	enum ntb_width width;
2327	int error;
2328
2329	error = 0;
2330	ntb = arg1;
2331
2332	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2333
2334	if (ntb_link_is_up(ntb, &speed, &width))
2335		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2336		    (unsigned)speed, (unsigned)width);
2337	else
2338		sbuf_printf(&sb, "down");
2339
2340	error = sbuf_finish(&sb);
2341	sbuf_delete(&sb);
2342
2343	if (error || !req->newptr)
2344		return (error);
2345	return (EINVAL);
2346}
2347
2348static int
2349sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2350{
2351	struct ntb_softc *ntb;
2352	const void *outp;
2353	uintptr_t sz;
2354	uint64_t umv;
2355	char be[sizeof(umv)];
2356	size_t outsz;
2357	uint32_t reg;
2358	bool db, pci;
2359	int error;
2360
2361	ntb = arg1;
2362	reg = arg2 & ~NTB_REGFLAGS_MASK;
2363	sz = arg2 & NTB_REGSZ_MASK;
2364	db = (arg2 & NTB_DB_READ) != 0;
2365	pci = (arg2 & NTB_PCI_REG) != 0;
2366
2367	KASSERT(!(db && pci), ("bogus"));
2368
2369	if (db) {
2370		KASSERT(sz == NTB_REG_64, ("bogus"));
2371		umv = db_ioread(ntb, reg);
2372		outsz = sizeof(uint64_t);
2373	} else {
2374		switch (sz) {
2375		case NTB_REG_64:
2376			if (pci)
2377				umv = pci_read_config(ntb->device, reg, 8);
2378			else
2379				umv = ntb_reg_read(8, reg);
2380			outsz = sizeof(uint64_t);
2381			break;
2382		case NTB_REG_32:
2383			if (pci)
2384				umv = pci_read_config(ntb->device, reg, 4);
2385			else
2386				umv = ntb_reg_read(4, reg);
2387			outsz = sizeof(uint32_t);
2388			break;
2389		case NTB_REG_16:
2390			if (pci)
2391				umv = pci_read_config(ntb->device, reg, 2);
2392			else
2393				umv = ntb_reg_read(2, reg);
2394			outsz = sizeof(uint16_t);
2395			break;
2396		case NTB_REG_8:
2397			if (pci)
2398				umv = pci_read_config(ntb->device, reg, 1);
2399			else
2400				umv = ntb_reg_read(1, reg);
2401			outsz = sizeof(uint8_t);
2402			break;
2403		default:
2404			panic("bogus");
2405			break;
2406		}
2407	}
2408
2409	/* Encode bigendian so that sysctl -x is legible. */
2410	be64enc(be, umv);
2411	outp = ((char *)be) + sizeof(umv) - outsz;
2412
2413	error = SYSCTL_OUT(req, outp, outsz);
2414	if (error || !req->newptr)
2415		return (error);
2416	return (EINVAL);
2417}
2418
2419static unsigned
2420ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
2421{
2422
2423	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
2424	    uidx >= ntb->b2b_mw_idx)
2425		return (uidx + 1);
2426	return (uidx);
2427}
2428
2429/*
2430 * Public API to the rest of the OS
2431 */
2432
2433/**
2434 * ntb_get_max_spads() - get the total scratch regs usable
2435 * @ntb: pointer to ntb_softc instance
2436 *
2437 * This function returns the max 32bit scratchpad registers usable by the
2438 * upper layer.
2439 *
2440 * RETURNS: total number of scratch pad registers available
2441 */
2442uint8_t
2443ntb_get_max_spads(struct ntb_softc *ntb)
2444{
2445
2446	return (ntb->spad_count);
2447}
2448
2449/*
2450 * ntb_mw_count() - Get the number of memory windows available for KPI
2451 * consumers.
2452 *
2453 * (Excludes any MW wholly reserved for register access.)
2454 */
2455uint8_t
2456ntb_mw_count(struct ntb_softc *ntb)
2457{
2458
2459	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
2460		return (ntb->mw_count - 1);
2461	return (ntb->mw_count);
2462}
2463
2464/**
2465 * ntb_spad_write() - write to the secondary scratchpad register
2466 * @ntb: pointer to ntb_softc instance
2467 * @idx: index to the scratchpad register, 0 based
2468 * @val: the data value to put into the register
2469 *
2470 * This function allows writing of a 32bit value to the indexed scratchpad
2471 * register. The register resides on the secondary (external) side.
2472 *
2473 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2474 */
2475int
2476ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2477{
2478
2479	if (idx >= ntb->spad_count)
2480		return (EINVAL);
2481
2482	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2483
2484	return (0);
2485}
2486
2487/**
2488 * ntb_spad_read() - read from the primary scratchpad register
2489 * @ntb: pointer to ntb_softc instance
2490 * @idx: index to scratchpad register, 0 based
2491 * @val: pointer to 32bit integer for storing the register value
2492 *
2493 * This function allows reading of the 32bit scratchpad register on
2494 * the primary (internal) side.
2495 *
2496 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2497 */
2498int
2499ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2500{
2501
2502	if (idx >= ntb->spad_count)
2503		return (EINVAL);
2504
2505	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2506
2507	return (0);
2508}
2509
2510/**
2511 * ntb_peer_spad_write() - write to the secondary scratchpad register
2512 * @ntb: pointer to ntb_softc instance
2513 * @idx: index to the scratchpad register, 0 based
2514 * @val: the data value to put into the register
2515 *
2516 * This function allows writing of a 32bit value to the indexed scratchpad
2517 * register. The register resides on the secondary (external) side.
2518 *
2519 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2520 */
2521int
2522ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2523{
2524
2525	if (idx >= ntb->spad_count)
2526		return (EINVAL);
2527
2528	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2529		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
2530	else
2531		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2532
2533	return (0);
2534}
2535
2536/**
2537 * ntb_peer_spad_read() - read from the primary scratchpad register
2538 * @ntb: pointer to ntb_softc instance
2539 * @idx: index to scratchpad register, 0 based
2540 * @val: pointer to 32bit integer for storing the register value
2541 *
2542 * This function allows reading of the 32bit scratchpad register on
2543 * the primary (internal) side.
2544 *
2545 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2546 */
2547int
2548ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2549{
2550
2551	if (idx >= ntb->spad_count)
2552		return (EINVAL);
2553
2554	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2555		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
2556	else
2557		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2558
2559	return (0);
2560}
2561
2562/*
2563 * ntb_mw_get_range() - get the range of a memory window
2564 * @ntb:        NTB device context
2565 * @idx:        Memory window number
2566 * @base:       OUT - the base address for mapping the memory window
2567 * @size:       OUT - the size for mapping the memory window
2568 * @align:      OUT - the base alignment for translating the memory window
2569 * @align_size: OUT - the size alignment for translating the memory window
2570 *
2571 * Get the range of a memory window.  NULL may be given for any output
2572 * parameter if the value is not needed.  The base and size may be used for
2573 * mapping the memory window, to access the peer memory.  The alignment and
2574 * size may be used for translating the memory window, for the peer to access
2575 * memory on the local system.
2576 *
2577 * Return: Zero on success, otherwise an error number.
2578 */
2579int
2580ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2581    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
2582    bus_addr_t *plimit)
2583{
2584	struct ntb_pci_bar_info *bar;
2585	bus_addr_t limit;
2586	size_t bar_b2b_off;
2587	enum ntb_bar bar_num;
2588
2589	if (mw_idx >= ntb_mw_count(ntb))
2590		return (EINVAL);
2591	mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
2592
2593	bar_num = ntb_mw_to_bar(ntb, mw_idx);
2594	bar = &ntb->bar_info[bar_num];
2595	bar_b2b_off = 0;
2596	if (mw_idx == ntb->b2b_mw_idx) {
2597		KASSERT(ntb->b2b_off != 0,
2598		    ("user shouldn't get non-shared b2b mw"));
2599		bar_b2b_off = ntb->b2b_off;
2600	}
2601
2602	if (bar_is_64bit(ntb, bar_num))
2603		limit = BUS_SPACE_MAXADDR;
2604	else
2605		limit = BUS_SPACE_MAXADDR_32BIT;
2606
2607	if (base != NULL)
2608		*base = bar->pbase + bar_b2b_off;
2609	if (vbase != NULL)
2610		*vbase = bar->vbase + bar_b2b_off;
2611	if (size != NULL)
2612		*size = bar->size - bar_b2b_off;
2613	if (align != NULL)
2614		*align = bar->size;
2615	if (align_size != NULL)
2616		*align_size = 1;
2617	if (plimit != NULL)
2618		*plimit = limit;
2619	return (0);
2620}
2621
2622/*
2623 * ntb_mw_set_trans() - set the translation of a memory window
2624 * @ntb:        NTB device context
2625 * @idx:        Memory window number
2626 * @addr:       The dma address local memory to expose to the peer
2627 * @size:       The size of the local memory to expose to the peer
2628 *
2629 * Set the translation of a memory window.  The peer may access local memory
2630 * through the window starting at the address, up to the size.  The address
2631 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2632 * must be aligned to the size alignment specified by ntb_mw_get_range().  The
2633 * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
2634 * 32-bit BARs).
2635 *
2636 * Return: Zero on success, otherwise an error number.
2637 */
2638int
2639ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2640    size_t size)
2641{
2642	struct ntb_pci_bar_info *bar;
2643	uint64_t base, limit, reg_val;
2644	size_t bar_size, mw_size;
2645	uint32_t base_reg, xlat_reg, limit_reg;
2646	enum ntb_bar bar_num;
2647
2648	if (idx >= ntb_mw_count(ntb))
2649		return (EINVAL);
2650	idx = ntb_user_mw_to_idx(ntb, idx);
2651
2652	bar_num = ntb_mw_to_bar(ntb, idx);
2653	bar = &ntb->bar_info[bar_num];
2654
2655	bar_size = bar->size;
2656	if (idx == ntb->b2b_mw_idx)
2657		mw_size = bar_size - ntb->b2b_off;
2658	else
2659		mw_size = bar_size;
2660
2661	/* Hardware requires that addr is aligned to bar size */
2662	if ((addr & (bar_size - 1)) != 0)
2663		return (EINVAL);
2664
2665	if (size > mw_size)
2666		return (EINVAL);
2667
2668	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2669
2670	limit = 0;
2671	if (bar_is_64bit(ntb, bar_num)) {
2672		base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
2673
2674		if (limit_reg != 0 && size != mw_size)
2675			limit = base + size;
2676
2677		/* Set and verify translation address */
2678		ntb_reg_write(8, xlat_reg, addr);
2679		reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
2680		if (reg_val != addr) {
2681			ntb_reg_write(8, xlat_reg, 0);
2682			return (EIO);
2683		}
2684
2685		/* Set and verify the limit */
2686		ntb_reg_write(8, limit_reg, limit);
2687		reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
2688		if (reg_val != limit) {
2689			ntb_reg_write(8, limit_reg, base);
2690			ntb_reg_write(8, xlat_reg, 0);
2691			return (EIO);
2692		}
2693	} else {
2694		/* Configure 32-bit (split) BAR MW */
2695
2696		if ((addr & UINT32_MAX) != addr)
2697			return (ERANGE);
2698		if (((addr + size) & UINT32_MAX) != (addr + size))
2699			return (ERANGE);
2700
2701		base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
2702
2703		if (limit_reg != 0 && size != mw_size)
2704			limit = base + size;
2705
2706		/* Set and verify translation address */
2707		ntb_reg_write(4, xlat_reg, addr);
2708		reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
2709		if (reg_val != addr) {
2710			ntb_reg_write(4, xlat_reg, 0);
2711			return (EIO);
2712		}
2713
2714		/* Set and verify the limit */
2715		ntb_reg_write(4, limit_reg, limit);
2716		reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
2717		if (reg_val != limit) {
2718			ntb_reg_write(4, limit_reg, base);
2719			ntb_reg_write(4, xlat_reg, 0);
2720			return (EIO);
2721		}
2722	}
2723	return (0);
2724}
2725
2726/*
2727 * ntb_mw_clear_trans() - clear the translation of a memory window
2728 * @ntb:	NTB device context
2729 * @idx:	Memory window number
2730 *
2731 * Clear the translation of a memory window.  The peer may no longer access
2732 * local memory through the window.
2733 *
2734 * Return: Zero on success, otherwise an error number.
2735 */
2736int
2737ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2738{
2739
2740	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2741}
2742
2743/*
2744 * ntb_mw_get_wc - Get the write-combine status of a memory window
2745 *
2746 * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
2747 * idx is an invalid memory window).
2748 *
2749 * Mode is a VM_MEMATTR_* type.
2750 */
2751int
2752ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
2753{
2754	struct ntb_pci_bar_info *bar;
2755
2756	if (idx >= ntb_mw_count(ntb))
2757		return (EINVAL);
2758	idx = ntb_user_mw_to_idx(ntb, idx);
2759
2760	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
2761	*mode = bar->map_mode;
2762	return (0);
2763}
2764
2765/*
2766 * ntb_mw_set_wc - Set the write-combine status of a memory window
2767 *
2768 * If 'mode' matches the current status, this does nothing and succeeds.  Mode
2769 * is a VM_MEMATTR_* type.
2770 *
2771 * Returns:  Zero on success, setting the caching attribute on the virtual
2772 * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
2773 * memory window, or if changing the caching attribute fails).
2774 */
2775int
2776ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2777{
2778
2779	if (idx >= ntb_mw_count(ntb))
2780		return (EINVAL);
2781
2782	idx = ntb_user_mw_to_idx(ntb, idx);
2783	return (ntb_mw_set_wc_internal(ntb, idx, mode));
2784}
2785
2786static int
2787ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
2788{
2789	struct ntb_pci_bar_info *bar;
2790	int rc;
2791
2792	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
2793	if (bar->map_mode == mode)
2794		return (0);
2795
2796	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
2797	if (rc == 0)
2798		bar->map_mode = mode;
2799
2800	return (rc);
2801}
2802
2803/**
2804 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2805 * @ntb: pointer to ntb_softc instance
2806 * @bit: doorbell bits to ring
2807 *
2808 * This function allows triggering of a doorbell on the secondary/external
2809 * side that will initiate an interrupt on the remote host
2810 */
2811void
2812ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2813{
2814
2815	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2816		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
2817		return;
2818	}
2819
2820	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2821}
2822
2823/*
2824 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2825 * as well as the size of the register (via *sz_out).
2826 *
2827 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2828 * ring to its memory window write.
2829 *
2830 * Note that writing the peer doorbell via a memory window will *not* generate
2831 * an interrupt on the remote host; that must be done seperately.
2832 */
2833bus_addr_t
2834ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2835{
2836	struct ntb_pci_bar_info *bar;
2837	uint64_t regoff;
2838
2839	KASSERT(sz_out != NULL, ("must be non-NULL"));
2840
2841	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2842		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2843		regoff = ntb->peer_reg->db_bell;
2844	} else {
2845		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2846		    ("invalid b2b idx"));
2847
2848		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2849		regoff = XEON_PDOORBELL_OFFSET;
2850	}
2851	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2852
2853	*sz_out = ntb->reg->db_size;
2854	/* HACK: Specific to current x86 bus implementation. */
2855	return ((uint64_t)bar->pci_bus_handle + regoff);
2856}
2857
2858/*
2859 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2860 * @ntb:	NTB device context
2861 *
2862 * Hardware may support different number or arrangement of doorbell bits.
2863 *
2864 * Return: A mask of doorbell bits supported by the ntb.
2865 */
2866uint64_t
2867ntb_db_valid_mask(struct ntb_softc *ntb)
2868{
2869
2870	return (ntb->db_valid_mask);
2871}
2872
2873/*
2874 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2875 * @ntb:	NTB device context
2876 * @vector:	Doorbell vector number
2877 *
2878 * Each interrupt vector may have a different number or arrangement of bits.
2879 *
2880 * Return: A mask of doorbell bits serviced by a vector.
2881 */
2882uint64_t
2883ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2884{
2885
2886	if (vector > ntb->db_vec_count)
2887		return (0);
2888	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2889}
2890
2891/**
2892 * ntb_link_is_up() - get the current ntb link state
2893 * @ntb:        NTB device context
2894 * @speed:      OUT - The link speed expressed as PCIe generation number
2895 * @width:      OUT - The link width expressed as the number of PCIe lanes
2896 *
2897 * RETURNS: true or false based on the hardware link state
2898 */
2899bool
2900ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2901    enum ntb_width *width)
2902{
2903
2904	if (speed != NULL)
2905		*speed = ntb_link_sta_speed(ntb);
2906	if (width != NULL)
2907		*width = ntb_link_sta_width(ntb);
2908	return (link_is_up(ntb));
2909}
2910
2911static void
2912save_bar_parameters(struct ntb_pci_bar_info *bar)
2913{
2914
2915	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2916	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2917	bar->pbase = rman_get_start(bar->pci_resource);
2918	bar->size = rman_get_size(bar->pci_resource);
2919	bar->vbase = rman_get_virtual(bar->pci_resource);
2920}
2921
2922device_t
2923ntb_get_device(struct ntb_softc *ntb)
2924{
2925
2926	return (ntb->device);
2927}
2928
2929/* Export HW-specific errata information. */
2930bool
2931ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2932{
2933
2934	return (HAS_FEATURE(feature));
2935}
2936