ntb_hw.c revision 289645
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289645 2015-10-20 19:19:48Z cem $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/queue.h>
38#include <sys/rman.h>
39#include <sys/sysctl.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>
42#include <machine/bus.h>
43#include <machine/pmap.h>
44#include <machine/resource.h>
45#include <dev/pci/pcireg.h>
46#include <dev/pci/pcivar.h>
47
48#include "ntb_regs.h"
49#include "ntb_hw.h"
50
51/*
52 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
53 * allows you to connect two systems using a PCI-e link.
54 *
55 * This module contains the hardware abstraction layer for the NTB. It allows
56 * you to send and recieve interrupts, map the memory windows and send and
57 * receive messages in the scratch-pad registers.
58 *
59 * NOTE: Much of the code in this module is shared with Linux. Any patches may
60 * be picked up and redistributed in Linux with a dual GPL/BSD license.
61 */
62
63#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, SOC_DB_COUNT)
64
65#define NTB_HB_TIMEOUT		1 /* second */
66#define SOC_LINK_RECOVERY_TIME	500 /* ms */
67
68#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
69
70enum ntb_device_type {
71	NTB_XEON,
72	NTB_SOC
73};
74
75/* ntb_conn_type are hardware numbers, cannot change. */
76enum ntb_conn_type {
77	NTB_CONN_TRANSPARENT = 0,
78	NTB_CONN_B2B = 1,
79	NTB_CONN_RP = 2,
80};
81
82enum ntb_b2b_direction {
83	NTB_DEV_USD = 0,
84	NTB_DEV_DSD = 1,
85};
86
87enum ntb_bar {
88	NTB_CONFIG_BAR = 0,
89	NTB_B2B_BAR_1,
90	NTB_B2B_BAR_2,
91	NTB_B2B_BAR_3,
92	NTB_MAX_BARS
93};
94
95/* Device features and workarounds */
96#define HAS_FEATURE(feature)	\
97	((ntb->features & (feature)) != 0)
98
99struct ntb_hw_info {
100	uint32_t		device_id;
101	const char		*desc;
102	enum ntb_device_type	type;
103	uint32_t		features;
104};
105
106struct ntb_pci_bar_info {
107	bus_space_tag_t		pci_bus_tag;
108	bus_space_handle_t	pci_bus_handle;
109	int			pci_resource_id;
110	struct resource		*pci_resource;
111	vm_paddr_t		pbase;
112	void			*vbase;
113	u_long			size;
114
115	/* Configuration register offsets */
116	uint32_t		psz_off;
117	uint32_t		ssz_off;
118	uint32_t		pbarxlat_off;
119};
120
121struct ntb_int_info {
122	struct resource	*res;
123	int		rid;
124	void		*tag;
125};
126
127struct ntb_vec {
128	struct ntb_softc	*ntb;
129	uint32_t		num;
130};
131
132struct ntb_reg {
133	uint32_t	ntb_ctl;
134	uint32_t	lnk_sta;
135	uint8_t		db_size;
136	unsigned	mw_bar[NTB_MAX_BARS];
137};
138
139struct ntb_alt_reg {
140	uint32_t	db_bell;
141	uint32_t	db_mask;
142	uint32_t	spad;
143};
144
145struct ntb_xlat_reg {
146	uint32_t	bar0_base;
147	uint32_t	bar2_base;
148	uint32_t	bar4_base;
149	uint32_t	bar5_base;
150
151	uint32_t	bar2_xlat;
152	uint32_t	bar4_xlat;
153	uint32_t	bar5_xlat;
154
155	uint32_t	bar2_limit;
156	uint32_t	bar4_limit;
157	uint32_t	bar5_limit;
158};
159
160struct ntb_b2b_addr {
161	uint64_t	bar0_addr;
162	uint64_t	bar2_addr64;
163	uint64_t	bar4_addr64;
164	uint64_t	bar4_addr32;
165	uint64_t	bar5_addr32;
166};
167
168struct ntb_softc {
169	device_t		device;
170	enum ntb_device_type	type;
171	uint64_t		features;
172
173	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
174	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
175	uint32_t		allocated_interrupts;
176
177	struct callout		heartbeat_timer;
178	struct callout		lr_timer;
179
180	void			*ntb_ctx;
181	const struct ntb_ctx_ops *ctx_ops;
182	struct ntb_vec		*msix_vec;
183#define CTX_LOCK(sc)		mtx_lock_spin(&(sc)->ctx_lock)
184#define CTX_UNLOCK(sc)		mtx_unlock_spin(&(sc)->ctx_lock)
185#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
186	struct mtx		ctx_lock;
187
188	uint32_t		ppd;
189	enum ntb_conn_type	conn_type;
190	enum ntb_b2b_direction	dev_type;
191
192	/* Offset of peer bar0 in B2B BAR */
193	uint64_t			b2b_off;
194	/* Memory window used to access peer bar0 */
195#define B2B_MW_DISABLED			UINT8_MAX
196	uint8_t				b2b_mw_idx;
197
198	uint8_t				mw_count;
199	uint8_t				spad_count;
200	uint8_t				db_count;
201	uint8_t				db_vec_count;
202	uint8_t				db_vec_shift;
203
204	/* Protects local db_mask. */
205#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
206#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
207#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
208	struct mtx			db_mask_lock;
209
210	uint32_t			ntb_ctl;
211	uint32_t			lnk_sta;
212
213	uint64_t			db_valid_mask;
214	uint64_t			db_link_mask;
215	uint64_t			db_mask;
216
217	int				last_ts;	/* ticks @ last irq */
218
219	const struct ntb_reg		*reg;
220	const struct ntb_alt_reg	*self_reg;
221	const struct ntb_alt_reg	*peer_reg;
222	const struct ntb_xlat_reg	*xlat_reg;
223};
224
225#ifdef __i386__
226static __inline uint64_t
227bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
228    bus_size_t offset)
229{
230
231	return (bus_space_read_4(tag, handle, offset) |
232	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
233}
234
235static __inline void
236bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
237    bus_size_t offset, uint64_t val)
238{
239
240	bus_space_write_4(tag, handle, offset, val);
241	bus_space_write_4(tag, handle, offset + 4, val >> 32);
242}
243#endif
244
245#define ntb_bar_read(SIZE, bar, offset) \
246	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
247	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
248#define ntb_bar_write(SIZE, bar, offset, val) \
249	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
250	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
251#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
252#define ntb_reg_write(SIZE, offset, val) \
253	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
254#define ntb_mw_read(SIZE, offset) \
255	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
256#define ntb_mw_write(SIZE, offset, val) \
257	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
258		offset, val)
259
260static int ntb_probe(device_t device);
261static int ntb_attach(device_t device);
262static int ntb_detach(device_t device);
263static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
264static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
265static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
266    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
267static int ntb_map_pci_bars(struct ntb_softc *ntb);
268static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *);
269static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
270static int map_memory_window_bar(struct ntb_softc *ntb,
271    struct ntb_pci_bar_info *bar);
272static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
273static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
274static int ntb_init_isr(struct ntb_softc *ntb);
275static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
276static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
277static void ntb_teardown_interrupts(struct ntb_softc *ntb);
278static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
279static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
280static void ndev_vec_isr(void *arg);
281static void ndev_irq_isr(void *arg);
282static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
283static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t val);
284static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
285static void ntb_free_msix_vec(struct ntb_softc *ntb);
286static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
287static void ntb_detect_max_mw(struct ntb_softc *ntb);
288static int ntb_detect_xeon(struct ntb_softc *ntb);
289static int ntb_detect_soc(struct ntb_softc *ntb);
290static int ntb_xeon_init_dev(struct ntb_softc *ntb);
291static int ntb_soc_init_dev(struct ntb_softc *ntb);
292static void ntb_teardown_xeon(struct ntb_softc *ntb);
293static void configure_soc_secondary_side_bars(struct ntb_softc *ntb);
294static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
295    enum ntb_bar regbar);
296static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
297    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
298static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
299    enum ntb_bar idx);
300static int xeon_setup_b2b_mw(struct ntb_softc *,
301    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
302static inline bool link_is_up(struct ntb_softc *ntb);
303static inline bool soc_link_is_err(struct ntb_softc *ntb);
304static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
305static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
306static void soc_link_hb(void *arg);
307static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
308static void recover_soc_link(void *arg);
309static bool ntb_poll_link(struct ntb_softc *ntb);
310static void save_bar_parameters(struct ntb_pci_bar_info *bar);
311
312static struct ntb_hw_info pci_ids[] = {
313	/* XXX: PS/SS IDs left out until they are supported. */
314	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
315		NTB_SOC, 0 },
316
317	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
318		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
319	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
320		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
321	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
322		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
323		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
324	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
325		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
326		    NTB_SB01BASE_LOCKUP },
327	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
328		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
329		    NTB_SB01BASE_LOCKUP },
330
331	{ 0x00000000, NULL, NTB_SOC, 0 }
332};
333
334static const struct ntb_reg soc_reg = {
335	.ntb_ctl = SOC_NTBCNTL_OFFSET,
336	.lnk_sta = SOC_LINK_STATUS_OFFSET,
337	.db_size = sizeof(uint64_t),
338	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
339};
340
341static const struct ntb_alt_reg soc_pri_reg = {
342	.db_bell = SOC_PDOORBELL_OFFSET,
343	.db_mask = SOC_PDBMSK_OFFSET,
344	.spad = SOC_SPAD_OFFSET,
345};
346
347static const struct ntb_alt_reg soc_b2b_reg = {
348	.db_bell = SOC_B2B_DOORBELL_OFFSET,
349	.spad = SOC_B2B_SPAD_OFFSET,
350};
351
352static const struct ntb_xlat_reg soc_sec_xlat = {
353#if 0
354	/* "FIXME" says the Linux driver. */
355	.bar0_base = SOC_SBAR0BASE_OFFSET,
356	.bar2_base = SOC_SBAR2BASE_OFFSET,
357	.bar4_base = SOC_SBAR4BASE_OFFSET,
358
359	.bar2_limit = SOC_SBAR2LMT_OFFSET,
360	.bar4_limit = SOC_SBAR4LMT_OFFSET,
361#endif
362
363	.bar2_xlat = SOC_SBAR2XLAT_OFFSET,
364	.bar4_xlat = SOC_SBAR4XLAT_OFFSET,
365};
366
367static const struct ntb_reg xeon_reg = {
368	.ntb_ctl = XEON_NTBCNTL_OFFSET,
369	.lnk_sta = XEON_LINK_STATUS_OFFSET,
370	.db_size = sizeof(uint16_t),
371	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
372};
373
374static const struct ntb_alt_reg xeon_pri_reg = {
375	.db_bell = XEON_PDOORBELL_OFFSET,
376	.db_mask = XEON_PDBMSK_OFFSET,
377	.spad = XEON_SPAD_OFFSET,
378};
379
380static const struct ntb_alt_reg xeon_b2b_reg = {
381	.db_bell = XEON_B2B_DOORBELL_OFFSET,
382	.spad = XEON_B2B_SPAD_OFFSET,
383};
384
385static const struct ntb_xlat_reg xeon_sec_xlat = {
386	.bar0_base = XEON_SBAR0BASE_OFFSET,
387	.bar2_base = XEON_SBAR2BASE_OFFSET,
388	.bar4_base = XEON_SBAR4BASE_OFFSET,
389	.bar5_base = XEON_SBAR5BASE_OFFSET,
390
391	.bar2_limit = XEON_SBAR2LMT_OFFSET,
392	.bar4_limit = XEON_SBAR4LMT_OFFSET,
393	.bar5_limit = XEON_SBAR5LMT_OFFSET,
394
395	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
396	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
397	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
398};
399
400static struct ntb_b2b_addr xeon_b2b_usd_addr = {
401	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
402	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
403	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
404	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
405	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
406};
407
408static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
409	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
410	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
411	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
412	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
413	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
414};
415
416SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
417    "B2B MW segment overrides -- MUST be the same on both sides");
418
419SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
420    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
421    "hardware, use this 64-bit address on the bus between the NTB devices for "
422    "the window at BAR2, on the upstream side of the link.  MUST be the same "
423    "address on both sides.");
424SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
425    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
426SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
427    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
428    "(split-BAR mode).");
429SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
430    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR5 "
431    "(split-BAR mode).");
432
433SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
434    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
435    "hardware, use this 64-bit address on the bus between the NTB devices for "
436    "the window at BAR2, on the downstream side of the link.  MUST be the same"
437    " address on both sides.");
438SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
439    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
440SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
441    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
442    "(split-BAR mode).");
443SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
444    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
445    "(split-BAR mode).");
446
447/*
448 * OS <-> Driver interface structures
449 */
450MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
451
452static device_method_t ntb_pci_methods[] = {
453	/* Device interface */
454	DEVMETHOD(device_probe,     ntb_probe),
455	DEVMETHOD(device_attach,    ntb_attach),
456	DEVMETHOD(device_detach,    ntb_detach),
457	DEVMETHOD_END
458};
459
460static driver_t ntb_pci_driver = {
461	"ntb_hw",
462	ntb_pci_methods,
463	sizeof(struct ntb_softc),
464};
465
466static devclass_t ntb_devclass;
467DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
468MODULE_VERSION(ntb_hw, 1);
469
470SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
471
472/*
473 * OS <-> Driver linkage functions
474 */
475static int
476ntb_probe(device_t device)
477{
478	struct ntb_hw_info *p;
479
480	p = ntb_get_device_info(pci_get_devid(device));
481	if (p == NULL)
482		return (ENXIO);
483
484	device_set_desc(device, p->desc);
485	return (0);
486}
487
488static int
489ntb_attach(device_t device)
490{
491	struct ntb_softc *ntb;
492	struct ntb_hw_info *p;
493	int error;
494
495	ntb = DEVICE2SOFTC(device);
496	p = ntb_get_device_info(pci_get_devid(device));
497
498	ntb->device = device;
499	ntb->type = p->type;
500	ntb->features = p->features;
501	ntb->b2b_mw_idx = B2B_MW_DISABLED;
502
503	/* Heartbeat timer for NTB_SOC since there is no link interrupt */
504	callout_init(&ntb->heartbeat_timer, 1);
505	callout_init(&ntb->lr_timer, 1);
506	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
507	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_SPIN);
508
509	if (ntb->type == NTB_SOC)
510		error = ntb_detect_soc(ntb);
511	else
512		error = ntb_detect_xeon(ntb);
513	if (error)
514		goto out;
515
516	ntb_detect_max_mw(ntb);
517
518	error = ntb_map_pci_bars(ntb);
519	if (error)
520		goto out;
521	if (ntb->type == NTB_SOC)
522		error = ntb_soc_init_dev(ntb);
523	else
524		error = ntb_xeon_init_dev(ntb);
525	if (error)
526		goto out;
527	error = ntb_init_isr(ntb);
528	if (error)
529		goto out;
530
531	pci_enable_busmaster(ntb->device);
532
533	device_printf(ntb->device, "NTB device registered\n");
534
535out:
536	if (error != 0)
537		ntb_detach(device);
538	return (error);
539}
540
541static int
542ntb_detach(device_t device)
543{
544	struct ntb_softc *ntb;
545
546	ntb = DEVICE2SOFTC(device);
547
548	if (ntb->self_reg != NULL)
549		ntb_db_set_mask(ntb, ntb->db_valid_mask);
550	callout_drain(&ntb->heartbeat_timer);
551	callout_drain(&ntb->lr_timer);
552	if (ntb->type == NTB_XEON)
553		ntb_teardown_xeon(ntb);
554	ntb_teardown_interrupts(ntb);
555
556	mtx_destroy(&ntb->db_mask_lock);
557	mtx_destroy(&ntb->ctx_lock);
558
559	/*
560	 * Redetect total MWs so we unmap properly -- in case we lowered the
561	 * maximum to work around Xeon errata.
562	 */
563	ntb_detect_max_mw(ntb);
564	ntb_unmap_pci_bar(ntb);
565
566	device_printf(ntb->device, "NTB device unregistered\n");
567
568	return (0);
569}
570
571/*
572 * Driver internal routines
573 */
574static inline enum ntb_bar
575ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
576{
577
578	KASSERT(mw < ntb->mw_count ||
579	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
580	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
581	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
582
583	return (ntb->reg->mw_bar[mw]);
584}
585
586static inline bool
587bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
588{
589	/* XXX This assertion could be stronger. */
590	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
591	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
592}
593
594static inline void
595bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
596    uint32_t *xlat, uint32_t *lmt)
597{
598	uint32_t basev, lmtv, xlatv;
599
600	switch (bar) {
601	case NTB_B2B_BAR_1:
602		basev = ntb->xlat_reg->bar2_base;
603		lmtv = ntb->xlat_reg->bar2_limit;
604		xlatv = ntb->xlat_reg->bar2_xlat;
605		break;
606	case NTB_B2B_BAR_2:
607		basev = ntb->xlat_reg->bar4_base;
608		lmtv = ntb->xlat_reg->bar4_limit;
609		xlatv = ntb->xlat_reg->bar4_xlat;
610		break;
611	case NTB_B2B_BAR_3:
612		basev = ntb->xlat_reg->bar5_base;
613		lmtv = ntb->xlat_reg->bar5_limit;
614		xlatv = ntb->xlat_reg->bar5_xlat;
615		break;
616	default:
617		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
618		    ("bad bar"));
619		basev = lmtv = xlatv = 0;
620		break;
621	}
622
623	if (base != NULL)
624		*base = basev;
625	if (xlat != NULL)
626		*xlat = xlatv;
627	if (lmt != NULL)
628		*lmt = lmtv;
629}
630
631static int
632ntb_map_pci_bars(struct ntb_softc *ntb)
633{
634	int rc;
635
636	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
637	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
638	if (rc != 0)
639		goto out;
640
641	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
642	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
643	if (rc != 0)
644		goto out;
645	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
646	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
647	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
648
649	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
650	/* XXX Are shared MW B2Bs write-combining? */
651	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
652		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
653	else
654		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
655	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
656	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
657	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
658
659	if (!HAS_FEATURE(NTB_SPLIT_BAR))
660		goto out;
661
662	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
663	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
664		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
665	else
666		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
667	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
668	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
669	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
670
671out:
672	if (rc != 0)
673		device_printf(ntb->device,
674		    "unable to allocate pci resource\n");
675	return (rc);
676}
677
678static void
679print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
680{
681
682	device_printf(ntb->device, "Bar size = %lx, v %p, p %p\n",
683	    bar->size, bar->vbase, (void *)(bar->pbase));
684}
685
686static int
687map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
688{
689
690	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
691	    &bar->pci_resource_id, RF_ACTIVE);
692	if (bar->pci_resource == NULL)
693		return (ENXIO);
694
695	save_bar_parameters(bar);
696	print_map_success(ntb, bar);
697	return (0);
698}
699
700static int
701map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
702{
703	int rc;
704	uint8_t bar_size_bits = 0;
705
706	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
707	    &bar->pci_resource_id, RF_ACTIVE);
708
709	if (bar->pci_resource == NULL)
710		return (ENXIO);
711
712	save_bar_parameters(bar);
713	/*
714	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
715	 * hardware issue. To work around this, query the size it should be
716	 * configured to by the device and modify the resource to correspond to
717	 * this new size. The BIOS on systems with this problem is required to
718	 * provide enough address space to allow the driver to make this change
719	 * safely.
720	 *
721	 * Ideally I could have just specified the size when I allocated the
722	 * resource like:
723	 *  bus_alloc_resource(ntb->device,
724	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
725	 *	1ul << bar_size_bits, RF_ACTIVE);
726	 * but the PCI driver does not honor the size in this call, so we have
727	 * to modify it after the fact.
728	 */
729	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
730		if (bar->pci_resource_id == PCIR_BAR(2))
731			bar_size_bits = pci_read_config(ntb->device,
732			    XEON_PBAR23SZ_OFFSET, 1);
733		else
734			bar_size_bits = pci_read_config(ntb->device,
735			    XEON_PBAR45SZ_OFFSET, 1);
736
737		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
738		    bar->pci_resource, bar->pbase,
739		    bar->pbase + (1ul << bar_size_bits) - 1);
740		if (rc != 0) {
741			device_printf(ntb->device,
742			    "unable to resize bar\n");
743			return (rc);
744		}
745
746		save_bar_parameters(bar);
747	}
748
749	/* Mark bar region as write combining to improve performance. */
750	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
751	    VM_MEMATTR_WRITE_COMBINING);
752	if (rc != 0) {
753		device_printf(ntb->device,
754		    "unable to mark bar as WRITE_COMBINING\n");
755		return (rc);
756	}
757	print_map_success(ntb, bar);
758	return (0);
759}
760
761static void
762ntb_unmap_pci_bar(struct ntb_softc *ntb)
763{
764	struct ntb_pci_bar_info *current_bar;
765	int i;
766
767	for (i = 0; i < NTB_MAX_BARS; i++) {
768		current_bar = &ntb->bar_info[i];
769		if (current_bar->pci_resource != NULL)
770			bus_release_resource(ntb->device, SYS_RES_MEMORY,
771			    current_bar->pci_resource_id,
772			    current_bar->pci_resource);
773	}
774}
775
776static int
777ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
778{
779	uint32_t i;
780	int rc;
781
782	for (i = 0; i < num_vectors; i++) {
783		ntb->int_info[i].rid = i + 1;
784		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
785		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
786		if (ntb->int_info[i].res == NULL) {
787			device_printf(ntb->device,
788			    "bus_alloc_resource failed\n");
789			return (ENOMEM);
790		}
791		ntb->int_info[i].tag = NULL;
792		ntb->allocated_interrupts++;
793		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
794		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
795		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
796		if (rc != 0) {
797			device_printf(ntb->device, "bus_setup_intr failed\n");
798			return (ENXIO);
799		}
800	}
801	return (0);
802}
803
804/*
805 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
806 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
807 * should be okay.  This tunable should enable us to test that hypothesis
808 * when someone gets their hands on some Xeon hardware.
809 */
810static int ntb_force_remap_mode;
811SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
812    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
813    " to a smaller number of ithreads, even if the desired number are "
814    "available");
815
816/*
817 * In case it is NOT ok, give consumers an abort button.
818 */
819static int ntb_prefer_intx;
820SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
821    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
822    "than remapping MSI-X messages over available slots (match Linux driver "
823    "behavior)");
824
825/*
826 * Remap the desired number of MSI-X messages to available ithreads in a simple
827 * round-robin fashion.
828 */
829static int
830ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
831{
832	u_int *vectors;
833	uint32_t i;
834	int rc;
835
836	if (ntb_prefer_intx != 0)
837		return (ENXIO);
838
839	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
840
841	for (i = 0; i < desired; i++)
842		vectors[i] = (i % avail) + 1;
843
844	rc = pci_remap_msix(dev, desired, vectors);
845	free(vectors, M_NTB);
846	return (rc);
847}
848
849static int
850ntb_init_isr(struct ntb_softc *ntb)
851{
852	uint32_t desired_vectors, num_vectors;
853	int rc;
854
855	ntb->allocated_interrupts = 0;
856	ntb->last_ts = ticks;
857
858	/*
859	 * Mask all doorbell interrupts.
860	 */
861	ntb_db_set_mask(ntb, ntb->db_valid_mask);
862
863	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
864	    ntb->db_count);
865	if (desired_vectors >= 1) {
866		rc = pci_alloc_msix(ntb->device, &num_vectors);
867
868		if (ntb_force_remap_mode != 0 && rc == 0 &&
869		    num_vectors == desired_vectors)
870			num_vectors--;
871
872		if (rc == 0 && num_vectors < desired_vectors) {
873			rc = ntb_remap_msix(ntb->device, desired_vectors,
874			    num_vectors);
875			if (rc == 0)
876				num_vectors = desired_vectors;
877			else
878				pci_release_msi(ntb->device);
879		}
880		if (rc != 0)
881			num_vectors = 1;
882	} else
883		num_vectors = 1;
884
885	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
886		ntb->db_vec_count = 1;
887		ntb->db_vec_shift = ntb->db_count;
888		rc = ntb_setup_legacy_interrupt(ntb);
889	} else {
890		ntb_create_msix_vec(ntb, num_vectors);
891		rc = ntb_setup_msix(ntb, num_vectors);
892	}
893	if (rc != 0) {
894		device_printf(ntb->device,
895		    "Error allocating interrupts: %d\n", rc);
896		ntb_free_msix_vec(ntb);
897	}
898
899	return (rc);
900}
901
902static int
903ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
904{
905	int rc;
906
907	ntb->int_info[0].rid = 0;
908	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
909	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
910	if (ntb->int_info[0].res == NULL) {
911		device_printf(ntb->device, "bus_alloc_resource failed\n");
912		return (ENOMEM);
913	}
914
915	ntb->int_info[0].tag = NULL;
916	ntb->allocated_interrupts = 1;
917
918	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
919	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
920	    ntb, &ntb->int_info[0].tag);
921	if (rc != 0) {
922		device_printf(ntb->device, "bus_setup_intr failed\n");
923		return (ENXIO);
924	}
925
926	return (0);
927}
928
929static void
930ntb_teardown_interrupts(struct ntb_softc *ntb)
931{
932	struct ntb_int_info *current_int;
933	int i;
934
935	for (i = 0; i < ntb->allocated_interrupts; i++) {
936		current_int = &ntb->int_info[i];
937		if (current_int->tag != NULL)
938			bus_teardown_intr(ntb->device, current_int->res,
939			    current_int->tag);
940
941		if (current_int->res != NULL)
942			bus_release_resource(ntb->device, SYS_RES_IRQ,
943			    rman_get_rid(current_int->res), current_int->res);
944	}
945
946	ntb_free_msix_vec(ntb);
947	pci_release_msi(ntb->device);
948}
949
950/*
951 * Doorbell register and mask are 64-bit on SoC, 16-bit on Xeon.  Abstract it
952 * out to make code clearer.
953 */
954static inline uint64_t
955db_ioread(struct ntb_softc *ntb, uint64_t regoff)
956{
957
958	if (ntb->type == NTB_SOC)
959		return (ntb_reg_read(8, regoff));
960
961	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
962
963	return (ntb_reg_read(2, regoff));
964}
965
966static inline void
967db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
968{
969
970	KASSERT((val & ~ntb->db_valid_mask) == 0,
971	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
972	     (uintmax_t)(val & ~ntb->db_valid_mask),
973	     (uintmax_t)ntb->db_valid_mask));
974
975	if (regoff == ntb->self_reg->db_mask)
976		DB_MASK_ASSERT(ntb, MA_OWNED);
977
978	if (ntb->type == NTB_SOC) {
979		ntb_reg_write(8, regoff, val);
980		return;
981	}
982
983	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
984	ntb_reg_write(2, regoff, (uint16_t)val);
985}
986
987void
988ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
989{
990
991	DB_MASK_LOCK(ntb);
992	ntb->db_mask |= bits;
993	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
994	DB_MASK_UNLOCK(ntb);
995}
996
997void
998ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
999{
1000
1001	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1002	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1003	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1004	     (uintmax_t)ntb->db_valid_mask));
1005
1006	DB_MASK_LOCK(ntb);
1007	ntb->db_mask &= ~bits;
1008	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1009	DB_MASK_UNLOCK(ntb);
1010}
1011
1012uint64_t
1013ntb_db_read(struct ntb_softc *ntb)
1014{
1015
1016	return (db_ioread(ntb, ntb->self_reg->db_bell));
1017}
1018
1019void
1020ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1021{
1022
1023	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1024	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1025	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1026	     (uintmax_t)ntb->db_valid_mask));
1027
1028	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1029}
1030
1031static inline uint64_t
1032ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1033{
1034	uint64_t shift, mask;
1035
1036	shift = ntb->db_vec_shift;
1037	mask = (1ull << shift) - 1;
1038	return (mask << (shift * db_vector));
1039}
1040
1041static void
1042ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1043{
1044	uint64_t vec_mask;
1045
1046	ntb->last_ts = ticks;
1047	vec_mask = ntb_vec_mask(ntb, vec);
1048
1049	if ((vec_mask & ntb->db_link_mask) != 0) {
1050		if (ntb_poll_link(ntb))
1051			ntb_link_event(ntb);
1052	}
1053
1054	if ((vec_mask & ntb->db_valid_mask) != 0)
1055		ntb_db_event(ntb, vec);
1056}
1057
1058static void
1059ndev_vec_isr(void *arg)
1060{
1061	struct ntb_vec *nvec = arg;
1062
1063	ntb_interrupt(nvec->ntb, nvec->num);
1064}
1065
1066static void
1067ndev_irq_isr(void *arg)
1068{
1069	/* If we couldn't set up MSI-X, we only have the one vector. */
1070	ntb_interrupt(arg, 0);
1071}
1072
1073static int
1074ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1075{
1076	uint32_t i;
1077
1078	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1079	    M_ZERO | M_WAITOK);
1080	for (i = 0; i < num_vectors; i++) {
1081		ntb->msix_vec[i].num = i;
1082		ntb->msix_vec[i].ntb = ntb;
1083	}
1084
1085	return (0);
1086}
1087
1088static void
1089ntb_free_msix_vec(struct ntb_softc *ntb)
1090{
1091
1092	if (ntb->msix_vec == NULL)
1093		return;
1094
1095	free(ntb->msix_vec, M_NTB);
1096	ntb->msix_vec = NULL;
1097}
1098
1099static struct ntb_hw_info *
1100ntb_get_device_info(uint32_t device_id)
1101{
1102	struct ntb_hw_info *ep = pci_ids;
1103
1104	while (ep->device_id) {
1105		if (ep->device_id == device_id)
1106			return (ep);
1107		++ep;
1108	}
1109	return (NULL);
1110}
1111
1112static void
1113ntb_teardown_xeon(struct ntb_softc *ntb)
1114{
1115
1116	if (ntb->reg != NULL)
1117		ntb_link_disable(ntb);
1118}
1119
1120static void
1121ntb_detect_max_mw(struct ntb_softc *ntb)
1122{
1123
1124	if (ntb->type == NTB_SOC) {
1125		ntb->mw_count = SOC_MW_COUNT;
1126		return;
1127	}
1128
1129	if (HAS_FEATURE(NTB_SPLIT_BAR))
1130		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1131	else
1132		ntb->mw_count = XEON_SNB_MW_COUNT;
1133}
1134
1135static int
1136ntb_detect_xeon(struct ntb_softc *ntb)
1137{
1138	uint8_t ppd, conn_type;
1139
1140	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1141	ntb->ppd = ppd;
1142
1143	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1144		ntb->dev_type = NTB_DEV_USD;
1145	else
1146		ntb->dev_type = NTB_DEV_DSD;
1147
1148	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1149		ntb->features |= NTB_SPLIT_BAR;
1150
1151	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1152	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1153		ntb->features |= NTB_SDOORBELL_LOCKUP;
1154
1155	conn_type = ppd & XEON_PPD_CONN_TYPE;
1156	switch (conn_type) {
1157	case NTB_CONN_B2B:
1158		ntb->conn_type = conn_type;
1159		break;
1160	case NTB_CONN_RP:
1161	case NTB_CONN_TRANSPARENT:
1162	default:
1163		device_printf(ntb->device, "Unsupported connection type: %u\n",
1164		    (unsigned)conn_type);
1165		return (ENXIO);
1166	}
1167	return (0);
1168}
1169
1170static int
1171ntb_detect_soc(struct ntb_softc *ntb)
1172{
1173	uint32_t ppd, conn_type;
1174
1175	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1176	ntb->ppd = ppd;
1177
1178	if ((ppd & SOC_PPD_DEV_TYPE) != 0)
1179		ntb->dev_type = NTB_DEV_DSD;
1180	else
1181		ntb->dev_type = NTB_DEV_USD;
1182
1183	conn_type = (ppd & SOC_PPD_CONN_TYPE) >> 8;
1184	switch (conn_type) {
1185	case NTB_CONN_B2B:
1186		ntb->conn_type = conn_type;
1187		break;
1188	default:
1189		device_printf(ntb->device, "Unsupported NTB configuration\n");
1190		return (ENXIO);
1191	}
1192	return (0);
1193}
1194
1195static int
1196ntb_xeon_init_dev(struct ntb_softc *ntb)
1197{
1198	int rc;
1199
1200	ntb->spad_count		= XEON_SPAD_COUNT;
1201	ntb->db_count		= XEON_DB_COUNT;
1202	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1203	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1204	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1205
1206	if (ntb->conn_type != NTB_CONN_B2B) {
1207		device_printf(ntb->device, "Connection type %d not supported\n",
1208		    ntb->conn_type);
1209		return (ENXIO);
1210	}
1211
1212	ntb->reg = &xeon_reg;
1213	ntb->self_reg = &xeon_pri_reg;
1214	ntb->peer_reg = &xeon_b2b_reg;
1215	ntb->xlat_reg = &xeon_sec_xlat;
1216
1217	/*
1218	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1219	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1220	 * which may hang the system.  To workaround this use the second memory
1221	 * window to access the interrupt and scratch pad registers on the
1222	 * remote system.
1223	 */
1224	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1225		/* Use the last MW for mapping remote spad */
1226		ntb->b2b_mw_idx = ntb->mw_count - 1;
1227	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1228		/*
1229		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1230		 * mirrored to the remote system.  Shrink the number of bits by one,
1231		 * since bit 14 is the last bit.
1232		 *
1233		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1234		 * anyway.  Nor for non-B2B connection types.
1235		 */
1236		ntb->db_count = XEON_DB_COUNT - 1;
1237
1238	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1239
1240	if (ntb->dev_type == NTB_DEV_USD)
1241		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1242		    &xeon_b2b_usd_addr);
1243	else
1244		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1245		    &xeon_b2b_dsd_addr);
1246	if (rc != 0)
1247		return (rc);
1248
1249	/* Enable Bus Master and Memory Space on the secondary side */
1250	ntb_reg_write(2, XEON_PCICMD_OFFSET,
1251	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1252
1253	/* Enable link training */
1254	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1255
1256	return (0);
1257}
1258
1259static int
1260ntb_soc_init_dev(struct ntb_softc *ntb)
1261{
1262
1263	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1264	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1265
1266	ntb->spad_count		 = SOC_SPAD_COUNT;
1267	ntb->db_count		 = SOC_DB_COUNT;
1268	ntb->db_vec_count	 = SOC_DB_MSIX_VECTOR_COUNT;
1269	ntb->db_vec_shift	 = SOC_DB_MSIX_VECTOR_SHIFT;
1270	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1271
1272	ntb->reg = &soc_reg;
1273	ntb->self_reg = &soc_pri_reg;
1274	ntb->peer_reg = &soc_b2b_reg;
1275	ntb->xlat_reg = &soc_sec_xlat;
1276
1277	/*
1278	 * FIXME - MSI-X bug on early SOC HW, remove once internal issue is
1279	 * resolved.  Mask transaction layer internal parity errors.
1280	 */
1281	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1282
1283	configure_soc_secondary_side_bars(ntb);
1284
1285	/* Enable Bus Master and Memory Space on the secondary side */
1286	ntb_reg_write(2, SOC_PCICMD_OFFSET,
1287	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1288
1289	/* Initiate PCI-E link training */
1290	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1291
1292	callout_reset(&ntb->heartbeat_timer, 0, soc_link_hb, ntb);
1293
1294	return (0);
1295}
1296
1297/* XXX: Linux driver doesn't seem to do any of this for SoC. */
1298static void
1299configure_soc_secondary_side_bars(struct ntb_softc *ntb)
1300{
1301
1302	if (ntb->dev_type == NTB_DEV_USD) {
1303		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET,
1304		    XEON_B2B_BAR2_DSD_ADDR64);
1305		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET,
1306		    XEON_B2B_BAR4_DSD_ADDR64);
1307		ntb_reg_write(8, SOC_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1308		ntb_reg_write(8, SOC_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1309	} else {
1310		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET,
1311		    XEON_B2B_BAR2_USD_ADDR64);
1312		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET,
1313		    XEON_B2B_BAR4_USD_ADDR64);
1314		ntb_reg_write(8, SOC_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1315		ntb_reg_write(8, SOC_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1316	}
1317}
1318
1319
1320/*
1321 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1322 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1323 * remains for use by a higher layer.
1324 *
1325 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1326 * MW size is sufficiently large.
1327 */
1328static unsigned int ntb_b2b_mw_share;
1329SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1330    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1331    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1332    "value here.");
1333
1334static void
1335xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1336    enum ntb_bar regbar)
1337{
1338	struct ntb_pci_bar_info *bar;
1339	uint8_t bar_sz;
1340
1341	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1342		return;
1343
1344	bar = &ntb->bar_info[idx];
1345	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1346	if (idx == regbar) {
1347		if (ntb->b2b_off != 0)
1348			bar_sz--;
1349		else
1350			bar_sz = 0;
1351	}
1352	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1353	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1354	(void)bar_sz;
1355}
1356
1357static void
1358xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1359    enum ntb_bar idx, enum ntb_bar regbar)
1360{
1361	uint64_t reg_val;
1362	uint32_t base_reg, lmt_reg;
1363
1364	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1365	if (idx == regbar)
1366		bar_addr += ntb->b2b_off;
1367
1368	if (!bar_is_64bit(ntb, idx)) {
1369		ntb_reg_write(4, base_reg, bar_addr);
1370		reg_val = ntb_reg_read(4, base_reg);
1371		(void)reg_val;
1372
1373		ntb_reg_write(4, lmt_reg, bar_addr);
1374		reg_val = ntb_reg_read(4, lmt_reg);
1375		(void)reg_val;
1376	} else {
1377		ntb_reg_write(8, base_reg, bar_addr);
1378		reg_val = ntb_reg_read(8, base_reg);
1379		(void)reg_val;
1380
1381		ntb_reg_write(8, lmt_reg, bar_addr);
1382		reg_val = ntb_reg_read(8, lmt_reg);
1383		(void)reg_val;
1384	}
1385}
1386
1387static void
1388xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1389{
1390	struct ntb_pci_bar_info *bar;
1391
1392	bar = &ntb->bar_info[idx];
1393	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1394		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1395		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1396	} else {
1397		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1398		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1399	}
1400	(void)base_addr;
1401}
1402
1403static int
1404xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1405    const struct ntb_b2b_addr *peer_addr)
1406{
1407	struct ntb_pci_bar_info *b2b_bar;
1408	vm_size_t bar_size;
1409	uint64_t bar_addr;
1410	enum ntb_bar b2b_bar_num, i;
1411
1412	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1413		b2b_bar = NULL;
1414		b2b_bar_num = NTB_CONFIG_BAR;
1415		ntb->b2b_off = 0;
1416	} else {
1417		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1418		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1419		    ("invalid b2b mw bar"));
1420
1421		b2b_bar = &ntb->bar_info[b2b_bar_num];
1422		bar_size = b2b_bar->size;
1423
1424		if (ntb_b2b_mw_share != 0 &&
1425		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1426			ntb->b2b_off = bar_size >> 1;
1427		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1428			ntb->b2b_off = 0;
1429			ntb->mw_count--;
1430		} else {
1431			device_printf(ntb->device,
1432			    "B2B bar size is too small!\n");
1433			return (EIO);
1434		}
1435	}
1436
1437	/*
1438	 * Reset the secondary bar sizes to match the primary bar sizes.
1439	 * (Except, disable or halve the size of the B2B secondary bar.)
1440	 */
1441	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1442		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1443
1444	bar_addr = 0;
1445	if (b2b_bar_num == NTB_CONFIG_BAR)
1446		bar_addr = addr->bar0_addr;
1447	else if (b2b_bar_num == NTB_B2B_BAR_1)
1448		bar_addr = addr->bar2_addr64;
1449	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1450		bar_addr = addr->bar4_addr64;
1451	else if (b2b_bar_num == NTB_B2B_BAR_2)
1452		bar_addr = addr->bar4_addr32;
1453	else if (b2b_bar_num == NTB_B2B_BAR_3)
1454		bar_addr = addr->bar5_addr32;
1455	else
1456		KASSERT(false, ("invalid bar"));
1457
1458	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1459
1460	/*
1461	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1462	 * register BAR.  The B2B BAR is either disabled above or configured
1463	 * half-size.  It starts at PBAR xlat + offset.
1464	 *
1465	 * Also set up incoming BAR limits == base (zero length window).
1466	 */
1467	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1468	    b2b_bar_num);
1469	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1470		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1471		    NTB_B2B_BAR_2, b2b_bar_num);
1472		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1473		    NTB_B2B_BAR_3, b2b_bar_num);
1474	} else
1475		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1476		    NTB_B2B_BAR_2, b2b_bar_num);
1477
1478	/* Zero incoming translation addrs */
1479	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1480	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1481
1482	/* Zero outgoing translation limits (whole bar size windows) */
1483	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1484	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1485
1486	/* Set outgoing translation offsets */
1487	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1488	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1489		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1490		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1491	} else
1492		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1493
1494	/* Set the translation offset for B2B registers */
1495	bar_addr = 0;
1496	if (b2b_bar_num == NTB_CONFIG_BAR)
1497		bar_addr = peer_addr->bar0_addr;
1498	else if (b2b_bar_num == NTB_B2B_BAR_1)
1499		bar_addr = peer_addr->bar2_addr64;
1500	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1501		bar_addr = peer_addr->bar4_addr64;
1502	else if (b2b_bar_num == NTB_B2B_BAR_2)
1503		bar_addr = peer_addr->bar4_addr32;
1504	else if (b2b_bar_num == NTB_B2B_BAR_3)
1505		bar_addr = peer_addr->bar5_addr32;
1506	else
1507		KASSERT(false, ("invalid bar"));
1508
1509	/*
1510	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1511	 * at a time.
1512	 */
1513	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1514	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1515	return (0);
1516}
1517
1518static inline bool
1519link_is_up(struct ntb_softc *ntb)
1520{
1521
1522	if (ntb->type == NTB_XEON) {
1523		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1524			return (true);
1525		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1526	}
1527
1528	KASSERT(ntb->type == NTB_SOC, ("ntb type"));
1529	return ((ntb->ntb_ctl & SOC_CNTL_LINK_DOWN) == 0);
1530}
1531
1532static inline bool
1533soc_link_is_err(struct ntb_softc *ntb)
1534{
1535	uint32_t status;
1536
1537	KASSERT(ntb->type == NTB_SOC, ("ntb type"));
1538
1539	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1540	if ((status & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0)
1541		return (true);
1542
1543	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1544	return ((status & SOC_IBIST_ERR_OFLOW) != 0);
1545}
1546
1547/* SOC does not have link status interrupt, poll on that platform */
1548static void
1549soc_link_hb(void *arg)
1550{
1551	struct ntb_softc *ntb = arg;
1552	sbintime_t timo, poll_ts;
1553
1554	timo = NTB_HB_TIMEOUT * hz;
1555	poll_ts = ntb->last_ts + timo;
1556
1557	/*
1558	 * Delay polling the link status if an interrupt was received, unless
1559	 * the cached link status says the link is down.
1560	 */
1561	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1562		timo = poll_ts - ticks;
1563		goto out;
1564	}
1565
1566	if (ntb_poll_link(ntb))
1567		ntb_link_event(ntb);
1568
1569	if (!link_is_up(ntb) && soc_link_is_err(ntb)) {
1570		/* Link is down with error, proceed with recovery */
1571		callout_reset(&ntb->lr_timer, 0, recover_soc_link, ntb);
1572		return;
1573	}
1574
1575out:
1576	callout_reset(&ntb->heartbeat_timer, timo, soc_link_hb, ntb);
1577}
1578
1579static void
1580soc_perform_link_restart(struct ntb_softc *ntb)
1581{
1582	uint32_t status;
1583
1584	/* Driver resets the NTB ModPhy lanes - magic! */
1585	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0xe0);
1586	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x40);
1587	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x60);
1588	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0x60);
1589
1590	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1591	pause("ModPhy", hz / 10);
1592
1593	/* Clear AER Errors, write to clear */
1594	status = ntb_reg_read(4, SOC_ERRCORSTS_OFFSET);
1595	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1596	ntb_reg_write(4, SOC_ERRCORSTS_OFFSET, status);
1597
1598	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1599	status = ntb_reg_read(4, SOC_LTSSMERRSTS0_OFFSET);
1600	status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI;
1601	ntb_reg_write(4, SOC_LTSSMERRSTS0_OFFSET, status);
1602
1603	/* Clear DeSkew Buffer error, write to clear */
1604	status = ntb_reg_read(4, SOC_DESKEWSTS_OFFSET);
1605	status |= SOC_DESKEWSTS_DBERR;
1606	ntb_reg_write(4, SOC_DESKEWSTS_OFFSET, status);
1607
1608	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1609	status &= SOC_IBIST_ERR_OFLOW;
1610	ntb_reg_write(4, SOC_IBSTERRRCRVSTS0_OFFSET, status);
1611
1612	/* Releases the NTB state machine to allow the link to retrain */
1613	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1614	status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT;
1615	ntb_reg_write(4, SOC_LTSSMSTATEJMP_OFFSET, status);
1616}
1617
1618/*
1619 * ntb_set_ctx() - associate a driver context with an ntb device
1620 * @ntb:        NTB device context
1621 * @ctx:        Driver context
1622 * @ctx_ops:    Driver context operations
1623 *
1624 * Associate a driver context and operations with a ntb device.  The context is
1625 * provided by the client driver, and the driver may associate a different
1626 * context with each ntb device.
1627 *
1628 * Return: Zero if the context is associated, otherwise an error number.
1629 */
1630int
1631ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1632{
1633
1634	if (ctx == NULL || ops == NULL)
1635		return (EINVAL);
1636	if (ntb->ctx_ops != NULL)
1637		return (EINVAL);
1638
1639	CTX_LOCK(ntb);
1640	if (ntb->ctx_ops != NULL) {
1641		CTX_UNLOCK(ntb);
1642		return (EINVAL);
1643	}
1644	ntb->ntb_ctx = ctx;
1645	ntb->ctx_ops = ops;
1646	CTX_UNLOCK(ntb);
1647
1648	return (0);
1649}
1650
1651/*
1652 * It is expected that this will only be used from contexts where the ctx_lock
1653 * is not needed to protect ntb_ctx lifetime.
1654 */
1655void *
1656ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1657{
1658
1659	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1660	if (ops != NULL)
1661		*ops = ntb->ctx_ops;
1662	return (ntb->ntb_ctx);
1663}
1664
1665/*
1666 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1667 * @ntb:        NTB device context
1668 *
1669 * Clear any association that may exist between a driver context and the ntb
1670 * device.
1671 */
1672void
1673ntb_clear_ctx(struct ntb_softc *ntb)
1674{
1675
1676	CTX_LOCK(ntb);
1677	ntb->ntb_ctx = NULL;
1678	ntb->ctx_ops = NULL;
1679	CTX_UNLOCK(ntb);
1680}
1681
1682/*
1683 * ntb_link_event() - notify driver context of a change in link status
1684 * @ntb:        NTB device context
1685 *
1686 * Notify the driver context that the link status may have changed.  The driver
1687 * should call ntb_link_is_up() to get the current status.
1688 */
1689void
1690ntb_link_event(struct ntb_softc *ntb)
1691{
1692
1693	CTX_LOCK(ntb);
1694	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1695		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1696	CTX_UNLOCK(ntb);
1697}
1698
1699/*
1700 * ntb_db_event() - notify driver context of a doorbell event
1701 * @ntb:        NTB device context
1702 * @vector:     Interrupt vector number
1703 *
1704 * Notify the driver context of a doorbell event.  If hardware supports
1705 * multiple interrupt vectors for doorbells, the vector number indicates which
1706 * vector received the interrupt.  The vector number is relative to the first
1707 * vector used for doorbells, starting at zero, and must be less than
1708 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1709 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1710 * those bits are associated with the vector number.
1711 */
1712static void
1713ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1714{
1715
1716	CTX_LOCK(ntb);
1717	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1718		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1719	CTX_UNLOCK(ntb);
1720}
1721
1722/*
1723 * ntb_link_enable() - enable the link on the secondary side of the ntb
1724 * @ntb:        NTB device context
1725 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1726 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1727 *
1728 * Enable the link on the secondary side of the ntb.  This can only be done
1729 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1730 * should train the link to its maximum speed and width, or the requested speed
1731 * and width, whichever is smaller, if supported.
1732 *
1733 * Return: Zero on success, otherwise an error number.
1734 *
1735 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1736 *      and width input will be ignored.
1737 */
1738int
1739ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1740    enum ntb_width w __unused)
1741{
1742	uint32_t cntl;
1743
1744	if (ntb->type == NTB_SOC) {
1745		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1746		    ntb->ppd | SOC_PPD_INIT_LINK, 4);
1747		return (0);
1748	}
1749
1750	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1751		ntb_link_event(ntb);
1752		return (0);
1753	}
1754
1755	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1756	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1757	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1758	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1759	if (HAS_FEATURE(NTB_SPLIT_BAR))
1760		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1761	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1762	return (0);
1763}
1764
1765/*
1766 * ntb_link_disable() - disable the link on the secondary side of the ntb
1767 * @ntb:        NTB device context
1768 *
1769 * Disable the link on the secondary side of the ntb.  This can only be done
1770 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1771 * should disable the link.  Returning from this call must indicate that a
1772 * barrier has passed, though with no more writes may pass in either direction
1773 * across the link, except if this call returns an error number.
1774 *
1775 * Return: Zero on success, otherwise an error number.
1776 */
1777int
1778ntb_link_disable(struct ntb_softc *ntb)
1779{
1780	uint32_t cntl;
1781
1782	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1783		ntb_link_event(ntb);
1784		return (0);
1785	}
1786
1787	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1788	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1789	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1790	if (HAS_FEATURE(NTB_SPLIT_BAR))
1791		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1792	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1793	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1794	return (0);
1795}
1796
1797static void
1798recover_soc_link(void *arg)
1799{
1800	struct ntb_softc *ntb = arg;
1801	unsigned speed, width, oldspeed, oldwidth;
1802	uint32_t status32;
1803
1804	soc_perform_link_restart(ntb);
1805
1806	/*
1807	 * There is a potential race between the 2 NTB devices recovering at
1808	 * the same time.  If the times are the same, the link will not recover
1809	 * and the driver will be stuck in this loop forever.  Add a random
1810	 * interval to the recovery time to prevent this race.
1811	 */
1812	status32 = arc4random() % SOC_LINK_RECOVERY_TIME;
1813	pause("Link", (SOC_LINK_RECOVERY_TIME + status32) * hz / 1000);
1814
1815	if (soc_link_is_err(ntb))
1816		goto retry;
1817
1818	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1819	if ((status32 & SOC_CNTL_LINK_DOWN) != 0)
1820		goto out;
1821
1822	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1823	width = NTB_LNK_STA_WIDTH(status32);
1824	speed = status32 & NTB_LINK_SPEED_MASK;
1825
1826	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1827	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1828	if (oldwidth != width || oldspeed != speed)
1829		goto retry;
1830
1831out:
1832	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, soc_link_hb,
1833	    ntb);
1834	return;
1835
1836retry:
1837	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link,
1838	    ntb);
1839}
1840
1841/*
1842 * Polls the HW link status register(s); returns true if something has changed.
1843 */
1844static bool
1845ntb_poll_link(struct ntb_softc *ntb)
1846{
1847	uint32_t ntb_cntl;
1848	uint16_t reg_val;
1849
1850	if (ntb->type == NTB_SOC) {
1851		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1852		if (ntb_cntl == ntb->ntb_ctl)
1853			return (false);
1854
1855		ntb->ntb_ctl = ntb_cntl;
1856		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1857	} else {
1858		db_iowrite(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1859
1860		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1861		if (reg_val == ntb->lnk_sta)
1862			return (false);
1863
1864		ntb->lnk_sta = reg_val;
1865	}
1866	return (true);
1867}
1868
1869static inline enum ntb_speed
1870ntb_link_sta_speed(struct ntb_softc *ntb)
1871{
1872
1873	if (!link_is_up(ntb))
1874		return (NTB_SPEED_NONE);
1875	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
1876}
1877
1878static inline enum ntb_width
1879ntb_link_sta_width(struct ntb_softc *ntb)
1880{
1881
1882	if (!link_is_up(ntb))
1883		return (NTB_WIDTH_NONE);
1884	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
1885}
1886
1887/*
1888 * Public API to the rest of the OS
1889 */
1890
1891/**
1892 * ntb_get_max_spads() - get the total scratch regs usable
1893 * @ntb: pointer to ntb_softc instance
1894 *
1895 * This function returns the max 32bit scratchpad registers usable by the
1896 * upper layer.
1897 *
1898 * RETURNS: total number of scratch pad registers available
1899 */
1900uint8_t
1901ntb_get_max_spads(struct ntb_softc *ntb)
1902{
1903
1904	return (ntb->spad_count);
1905}
1906
1907uint8_t
1908ntb_mw_count(struct ntb_softc *ntb)
1909{
1910
1911	return (ntb->mw_count);
1912}
1913
1914/**
1915 * ntb_spad_write() - write to the secondary scratchpad register
1916 * @ntb: pointer to ntb_softc instance
1917 * @idx: index to the scratchpad register, 0 based
1918 * @val: the data value to put into the register
1919 *
1920 * This function allows writing of a 32bit value to the indexed scratchpad
1921 * register. The register resides on the secondary (external) side.
1922 *
1923 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1924 */
1925int
1926ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1927{
1928
1929	if (idx >= ntb->spad_count)
1930		return (EINVAL);
1931
1932	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
1933
1934	return (0);
1935}
1936
1937/**
1938 * ntb_spad_read() - read from the primary scratchpad register
1939 * @ntb: pointer to ntb_softc instance
1940 * @idx: index to scratchpad register, 0 based
1941 * @val: pointer to 32bit integer for storing the register value
1942 *
1943 * This function allows reading of the 32bit scratchpad register on
1944 * the primary (internal) side.
1945 *
1946 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1947 */
1948int
1949ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1950{
1951
1952	if (idx >= ntb->spad_count)
1953		return (EINVAL);
1954
1955	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
1956
1957	return (0);
1958}
1959
1960/**
1961 * ntb_peer_spad_write() - write to the secondary scratchpad register
1962 * @ntb: pointer to ntb_softc instance
1963 * @idx: index to the scratchpad register, 0 based
1964 * @val: the data value to put into the register
1965 *
1966 * This function allows writing of a 32bit value to the indexed scratchpad
1967 * register. The register resides on the secondary (external) side.
1968 *
1969 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1970 */
1971int
1972ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1973{
1974
1975	if (idx >= ntb->spad_count)
1976		return (EINVAL);
1977
1978	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1979		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
1980	else
1981		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
1982
1983	return (0);
1984}
1985
1986/**
1987 * ntb_peer_spad_read() - read from the primary scratchpad register
1988 * @ntb: pointer to ntb_softc instance
1989 * @idx: index to scratchpad register, 0 based
1990 * @val: pointer to 32bit integer for storing the register value
1991 *
1992 * This function allows reading of the 32bit scratchpad register on
1993 * the primary (internal) side.
1994 *
1995 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1996 */
1997int
1998ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1999{
2000
2001	if (idx >= ntb->spad_count)
2002		return (EINVAL);
2003
2004	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2005		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
2006	else
2007		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2008
2009	return (0);
2010}
2011
2012/*
2013 * ntb_mw_get_range() - get the range of a memory window
2014 * @ntb:        NTB device context
2015 * @idx:        Memory window number
2016 * @base:       OUT - the base address for mapping the memory window
2017 * @size:       OUT - the size for mapping the memory window
2018 * @align:      OUT - the base alignment for translating the memory window
2019 * @align_size: OUT - the size alignment for translating the memory window
2020 *
2021 * Get the range of a memory window.  NULL may be given for any output
2022 * parameter if the value is not needed.  The base and size may be used for
2023 * mapping the memory window, to access the peer memory.  The alignment and
2024 * size may be used for translating the memory window, for the peer to access
2025 * memory on the local system.
2026 *
2027 * Return: Zero on success, otherwise an error number.
2028 */
2029int
2030ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2031    void **vbase, size_t *size, size_t *align, size_t *align_size)
2032{
2033	struct ntb_pci_bar_info *bar;
2034	size_t bar_b2b_off;
2035
2036	if (mw_idx >= ntb_mw_count(ntb))
2037		return (EINVAL);
2038
2039	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, mw_idx)];
2040	bar_b2b_off = 0;
2041	if (mw_idx == ntb->b2b_mw_idx) {
2042		KASSERT(ntb->b2b_off != 0,
2043		    ("user shouldn't get non-shared b2b mw"));
2044		bar_b2b_off = ntb->b2b_off;
2045	}
2046
2047	if (base != NULL)
2048		*base = bar->pbase + bar_b2b_off;
2049	if (vbase != NULL)
2050		*vbase = (char *)bar->vbase + bar_b2b_off;
2051	if (size != NULL)
2052		*size = bar->size - bar_b2b_off;
2053	if (align != NULL)
2054		*align = bar->size;
2055	if (align_size != NULL)
2056		*align_size = 1;
2057	return (0);
2058}
2059
2060/*
2061 * ntb_mw_set_trans() - set the translation of a memory window
2062 * @ntb:        NTB device context
2063 * @idx:        Memory window number
2064 * @addr:       The dma address local memory to expose to the peer
2065 * @size:       The size of the local memory to expose to the peer
2066 *
2067 * Set the translation of a memory window.  The peer may access local memory
2068 * through the window starting at the address, up to the size.  The address
2069 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2070 * must be aligned to the size alignment specified by ntb_mw_get_range().
2071 *
2072 * Return: Zero on success, otherwise an error number.
2073 */
2074int
2075ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2076    size_t size)
2077{
2078	struct ntb_pci_bar_info *bar;
2079	uint64_t base, limit, reg_val;
2080	size_t bar_size, mw_size;
2081	uint32_t base_reg, xlat_reg, limit_reg;
2082	enum ntb_bar bar_num;
2083
2084	if (idx >= ntb_mw_count(ntb))
2085		return (EINVAL);
2086
2087	bar_num = ntb_mw_to_bar(ntb, idx);
2088	bar = &ntb->bar_info[bar_num];
2089
2090	bar_size = bar->size;
2091	if (idx == ntb->b2b_mw_idx)
2092		mw_size = bar_size - ntb->b2b_off;
2093	else
2094		mw_size = bar_size;
2095
2096	/* Hardware requires that addr is aligned to bar size */
2097	if ((addr & (bar_size - 1)) != 0)
2098		return (EINVAL);
2099
2100	if (size > mw_size)
2101		return (EINVAL);
2102
2103	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2104
2105	limit = 0;
2106	if (bar_is_64bit(ntb, bar_num)) {
2107		base = ntb_reg_read(8, base_reg);
2108
2109		if (limit_reg != 0 && size != mw_size)
2110			limit = base + size;
2111
2112		/* Set and verify translation address */
2113		ntb_reg_write(8, xlat_reg, addr);
2114		reg_val = ntb_reg_read(8, xlat_reg);
2115		if (reg_val != addr) {
2116			ntb_reg_write(8, xlat_reg, 0);
2117			return (EIO);
2118		}
2119
2120		/* Set and verify the limit */
2121		ntb_reg_write(8, limit_reg, limit);
2122		reg_val = ntb_reg_read(8, limit_reg);
2123		if (reg_val != limit) {
2124			ntb_reg_write(8, limit_reg, base);
2125			ntb_reg_write(8, xlat_reg, 0);
2126			return (EIO);
2127		}
2128	} else {
2129		/* Configure 32-bit (split) BAR MW */
2130
2131		if ((addr & ~UINT32_MAX) != 0)
2132			return (EINVAL);
2133		if (((addr + size) & ~UINT32_MAX) != 0)
2134			return (EINVAL);
2135
2136		base = ntb_reg_read(4, base_reg);
2137
2138		if (limit_reg != 0 && size != mw_size)
2139			limit = base + size;
2140
2141		/* Set and verify translation address */
2142		ntb_reg_write(4, xlat_reg, addr);
2143		reg_val = ntb_reg_read(4, xlat_reg);
2144		if (reg_val != addr) {
2145			ntb_reg_write(4, xlat_reg, 0);
2146			return (EIO);
2147		}
2148
2149		/* Set and verify the limit */
2150		ntb_reg_write(4, limit_reg, limit);
2151		reg_val = ntb_reg_read(4, limit_reg);
2152		if (reg_val != limit) {
2153			ntb_reg_write(4, limit_reg, base);
2154			ntb_reg_write(4, xlat_reg, 0);
2155			return (EIO);
2156		}
2157	}
2158	return (0);
2159}
2160
2161/*
2162 * ntb_mw_clear_trans() - clear the translation of a memory window
2163 * @ntb:	NTB device context
2164 * @idx:	Memory window number
2165 *
2166 * Clear the translation of a memory window.  The peer may no longer access
2167 * local memory through the window.
2168 *
2169 * Return: Zero on success, otherwise an error number.
2170 */
2171int
2172ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2173{
2174
2175	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2176}
2177
2178/**
2179 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2180 * @ntb: pointer to ntb_softc instance
2181 * @bit: doorbell bits to ring
2182 *
2183 * This function allows triggering of a doorbell on the secondary/external
2184 * side that will initiate an interrupt on the remote host
2185 */
2186void
2187ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2188{
2189
2190	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2191		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
2192		return;
2193	}
2194
2195	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2196}
2197
2198/*
2199 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2200 * as well as the size of the register (via *sz_out).
2201 *
2202 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2203 * ring to its memory window write.
2204 *
2205 * Note that writing the peer doorbell via a memory window will *not* generate
2206 * an interrupt on the remote host; that must be done seperately.
2207 */
2208bus_addr_t
2209ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2210{
2211	struct ntb_pci_bar_info *bar;
2212	uint64_t regoff;
2213
2214	KASSERT(sz_out != NULL, ("must be non-NULL"));
2215
2216	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2217		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2218		regoff = ntb->peer_reg->db_bell;
2219	} else {
2220		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2221		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2222		    ("mw_count invalid after setup"));
2223		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2224		    ("invalid b2b idx"));
2225
2226		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2227		regoff = XEON_SHADOW_PDOORBELL_OFFSET;
2228	}
2229	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2230
2231	*sz_out = ntb->reg->db_size;
2232	/* HACK: Specific to current x86 bus implementation. */
2233	return ((uint64_t)bar->pci_bus_handle + regoff);
2234}
2235
2236/*
2237 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2238 * @ntb:	NTB device context
2239 *
2240 * Hardware may support different number or arrangement of doorbell bits.
2241 *
2242 * Return: A mask of doorbell bits supported by the ntb.
2243 */
2244uint64_t
2245ntb_db_valid_mask(struct ntb_softc *ntb)
2246{
2247
2248	return (ntb->db_valid_mask);
2249}
2250
2251/*
2252 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2253 * @ntb:	NTB device context
2254 * @vector:	Doorbell vector number
2255 *
2256 * Each interrupt vector may have a different number or arrangement of bits.
2257 *
2258 * Return: A mask of doorbell bits serviced by a vector.
2259 */
2260uint64_t
2261ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2262{
2263
2264	if (vector > ntb->db_vec_count)
2265		return (0);
2266	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2267}
2268
2269/**
2270 * ntb_link_is_up() - get the current ntb link state
2271 * @ntb:        NTB device context
2272 * @speed:      OUT - The link speed expressed as PCIe generation number
2273 * @width:      OUT - The link width expressed as the number of PCIe lanes
2274 *
2275 * RETURNS: true or false based on the hardware link state
2276 */
2277bool
2278ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2279    enum ntb_width *width)
2280{
2281
2282	if (speed != NULL)
2283		*speed = ntb_link_sta_speed(ntb);
2284	if (width != NULL)
2285		*width = ntb_link_sta_width(ntb);
2286	return (link_is_up(ntb));
2287}
2288
2289static void
2290save_bar_parameters(struct ntb_pci_bar_info *bar)
2291{
2292
2293	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2294	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2295	bar->pbase = rman_get_start(bar->pci_resource);
2296	bar->size = rman_get_size(bar->pci_resource);
2297	bar->vbase = rman_get_virtual(bar->pci_resource);
2298}
2299
2300device_t
2301ntb_get_device(struct ntb_softc *ntb)
2302{
2303
2304	return (ntb->device);
2305}
2306
2307/* Export HW-specific errata information. */
2308bool
2309ntb_has_feature(struct ntb_softc *ntb, uint64_t feature)
2310{
2311
2312	return (HAS_FEATURE(feature));
2313}
2314