ntb_hw.c revision 290130
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 290130 2015-10-29 04:16:28Z cem $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/queue.h>
39#include <sys/rman.h>
40#include <sys/sbuf.h>
41#include <sys/sysctl.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44#include <machine/bus.h>
45#include <machine/pmap.h>
46#include <machine/resource.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49
50#include "ntb_regs.h"
51#include "ntb_hw.h"
52
53/*
54 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55 * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69
70#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
71
72enum ntb_device_type {
73	NTB_XEON,
74	NTB_ATOM
75};
76
77/* ntb_conn_type are hardware numbers, cannot change. */
78enum ntb_conn_type {
79	NTB_CONN_TRANSPARENT = 0,
80	NTB_CONN_B2B = 1,
81	NTB_CONN_RP = 2,
82};
83
84enum ntb_b2b_direction {
85	NTB_DEV_USD = 0,
86	NTB_DEV_DSD = 1,
87};
88
89enum ntb_bar {
90	NTB_CONFIG_BAR = 0,
91	NTB_B2B_BAR_1,
92	NTB_B2B_BAR_2,
93	NTB_B2B_BAR_3,
94	NTB_MAX_BARS
95};
96
97/* Device features and workarounds */
98#define HAS_FEATURE(feature)	\
99	((ntb->features & (feature)) != 0)
100
101struct ntb_hw_info {
102	uint32_t		device_id;
103	const char		*desc;
104	enum ntb_device_type	type;
105	uint32_t		features;
106};
107
108struct ntb_pci_bar_info {
109	bus_space_tag_t		pci_bus_tag;
110	bus_space_handle_t	pci_bus_handle;
111	int			pci_resource_id;
112	struct resource		*pci_resource;
113	vm_paddr_t		pbase;
114	void			*vbase;
115	u_long			size;
116
117	/* Configuration register offsets */
118	uint32_t		psz_off;
119	uint32_t		ssz_off;
120	uint32_t		pbarxlat_off;
121};
122
123struct ntb_int_info {
124	struct resource	*res;
125	int		rid;
126	void		*tag;
127};
128
129struct ntb_vec {
130	struct ntb_softc	*ntb;
131	uint32_t		num;
132};
133
134struct ntb_reg {
135	uint32_t	ntb_ctl;
136	uint32_t	lnk_sta;
137	uint8_t		db_size;
138	unsigned	mw_bar[NTB_MAX_BARS];
139};
140
141struct ntb_alt_reg {
142	uint32_t	db_bell;
143	uint32_t	db_mask;
144	uint32_t	spad;
145};
146
147struct ntb_xlat_reg {
148	uint32_t	bar0_base;
149	uint32_t	bar2_base;
150	uint32_t	bar4_base;
151	uint32_t	bar5_base;
152
153	uint32_t	bar2_xlat;
154	uint32_t	bar4_xlat;
155	uint32_t	bar5_xlat;
156
157	uint32_t	bar2_limit;
158	uint32_t	bar4_limit;
159	uint32_t	bar5_limit;
160};
161
162struct ntb_b2b_addr {
163	uint64_t	bar0_addr;
164	uint64_t	bar2_addr64;
165	uint64_t	bar4_addr64;
166	uint64_t	bar4_addr32;
167	uint64_t	bar5_addr32;
168};
169
170struct ntb_softc {
171	device_t		device;
172	enum ntb_device_type	type;
173	uint32_t		features;
174
175	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
176	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
177	uint32_t		allocated_interrupts;
178
179	struct callout		heartbeat_timer;
180	struct callout		lr_timer;
181
182	void			*ntb_ctx;
183	const struct ntb_ctx_ops *ctx_ops;
184	struct ntb_vec		*msix_vec;
185#define CTX_LOCK(sc)		mtx_lock_spin(&(sc)->ctx_lock)
186#define CTX_UNLOCK(sc)		mtx_unlock_spin(&(sc)->ctx_lock)
187#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
188	struct mtx		ctx_lock;
189
190	uint32_t		ppd;
191	enum ntb_conn_type	conn_type;
192	enum ntb_b2b_direction	dev_type;
193
194	/* Offset of peer bar0 in B2B BAR */
195	uint64_t			b2b_off;
196	/* Memory window used to access peer bar0 */
197#define B2B_MW_DISABLED			UINT8_MAX
198	uint8_t				b2b_mw_idx;
199
200	uint8_t				mw_count;
201	uint8_t				spad_count;
202	uint8_t				db_count;
203	uint8_t				db_vec_count;
204	uint8_t				db_vec_shift;
205
206	/* Protects local db_mask. */
207#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
208#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
209#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
210	struct mtx			db_mask_lock;
211
212	uint32_t			ntb_ctl;
213	uint32_t			lnk_sta;
214
215	uint64_t			db_valid_mask;
216	uint64_t			db_link_mask;
217	uint64_t			db_mask;
218
219	int				last_ts;	/* ticks @ last irq */
220
221	const struct ntb_reg		*reg;
222	const struct ntb_alt_reg	*self_reg;
223	const struct ntb_alt_reg	*peer_reg;
224	const struct ntb_xlat_reg	*xlat_reg;
225};
226
227#ifdef __i386__
228static __inline uint64_t
229bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
230    bus_size_t offset)
231{
232
233	return (bus_space_read_4(tag, handle, offset) |
234	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
235}
236
237static __inline void
238bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
239    bus_size_t offset, uint64_t val)
240{
241
242	bus_space_write_4(tag, handle, offset, val);
243	bus_space_write_4(tag, handle, offset + 4, val >> 32);
244}
245#endif
246
247#define ntb_bar_read(SIZE, bar, offset) \
248	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
249	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
250#define ntb_bar_write(SIZE, bar, offset, val) \
251	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
252	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
253#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
254#define ntb_reg_write(SIZE, offset, val) \
255	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
256#define ntb_mw_read(SIZE, offset) \
257	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
258#define ntb_mw_write(SIZE, offset, val) \
259	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
260		offset, val)
261
262static int ntb_probe(device_t device);
263static int ntb_attach(device_t device);
264static int ntb_detach(device_t device);
265static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
266static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
267static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
268    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
269static int ntb_map_pci_bars(struct ntb_softc *ntb);
270static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
271    const char *);
272static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
273static int map_memory_window_bar(struct ntb_softc *ntb,
274    struct ntb_pci_bar_info *bar);
275static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
276static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
277static int ntb_init_isr(struct ntb_softc *ntb);
278static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
279static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
280static void ntb_teardown_interrupts(struct ntb_softc *ntb);
281static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
282static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
283static void ndev_vec_isr(void *arg);
284static void ndev_irq_isr(void *arg);
285static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
286static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t val);
287static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
288static void ntb_free_msix_vec(struct ntb_softc *ntb);
289static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
290static void ntb_detect_max_mw(struct ntb_softc *ntb);
291static int ntb_detect_xeon(struct ntb_softc *ntb);
292static int ntb_detect_atom(struct ntb_softc *ntb);
293static int ntb_xeon_init_dev(struct ntb_softc *ntb);
294static int ntb_atom_init_dev(struct ntb_softc *ntb);
295static void ntb_teardown_xeon(struct ntb_softc *ntb);
296static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
297static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
298    enum ntb_bar regbar);
299static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
300    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
301static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
302    enum ntb_bar idx);
303static int xeon_setup_b2b_mw(struct ntb_softc *,
304    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
305static inline bool link_is_up(struct ntb_softc *ntb);
306static inline bool atom_link_is_err(struct ntb_softc *ntb);
307static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
308static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
309static void atom_link_hb(void *arg);
310static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
311static void recover_atom_link(void *arg);
312static bool ntb_poll_link(struct ntb_softc *ntb);
313static void save_bar_parameters(struct ntb_pci_bar_info *bar);
314static void ntb_sysctl_init(struct ntb_softc *);
315static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
316static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
317static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
318
319static struct ntb_hw_info pci_ids[] = {
320	/* XXX: PS/SS IDs left out until they are supported. */
321	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
322		NTB_ATOM, 0 },
323
324	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
325		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
326	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
327		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
328	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
329		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
330		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
331	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
332		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
333		    NTB_SB01BASE_LOCKUP },
334	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
335		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
336		    NTB_SB01BASE_LOCKUP },
337
338	{ 0x00000000, NULL, NTB_ATOM, 0 }
339};
340
341static const struct ntb_reg atom_reg = {
342	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
343	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
344	.db_size = sizeof(uint64_t),
345	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
346};
347
348static const struct ntb_alt_reg atom_pri_reg = {
349	.db_bell = ATOM_PDOORBELL_OFFSET,
350	.db_mask = ATOM_PDBMSK_OFFSET,
351	.spad = ATOM_SPAD_OFFSET,
352};
353
354static const struct ntb_alt_reg atom_b2b_reg = {
355	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
356	.spad = ATOM_B2B_SPAD_OFFSET,
357};
358
359static const struct ntb_xlat_reg atom_sec_xlat = {
360#if 0
361	/* "FIXME" says the Linux driver. */
362	.bar0_base = ATOM_SBAR0BASE_OFFSET,
363	.bar2_base = ATOM_SBAR2BASE_OFFSET,
364	.bar4_base = ATOM_SBAR4BASE_OFFSET,
365
366	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
367	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
368#endif
369
370	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
371	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
372};
373
374static const struct ntb_reg xeon_reg = {
375	.ntb_ctl = XEON_NTBCNTL_OFFSET,
376	.lnk_sta = XEON_LINK_STATUS_OFFSET,
377	.db_size = sizeof(uint16_t),
378	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
379};
380
381static const struct ntb_alt_reg xeon_pri_reg = {
382	.db_bell = XEON_PDOORBELL_OFFSET,
383	.db_mask = XEON_PDBMSK_OFFSET,
384	.spad = XEON_SPAD_OFFSET,
385};
386
387static const struct ntb_alt_reg xeon_b2b_reg = {
388	.db_bell = XEON_B2B_DOORBELL_OFFSET,
389	.spad = XEON_B2B_SPAD_OFFSET,
390};
391
392static const struct ntb_xlat_reg xeon_sec_xlat = {
393	.bar0_base = XEON_SBAR0BASE_OFFSET,
394	.bar2_base = XEON_SBAR2BASE_OFFSET,
395	.bar4_base = XEON_SBAR4BASE_OFFSET,
396	.bar5_base = XEON_SBAR5BASE_OFFSET,
397
398	.bar2_limit = XEON_SBAR2LMT_OFFSET,
399	.bar4_limit = XEON_SBAR4LMT_OFFSET,
400	.bar5_limit = XEON_SBAR5LMT_OFFSET,
401
402	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
403	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
404	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
405};
406
407static struct ntb_b2b_addr xeon_b2b_usd_addr = {
408	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
409	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
410	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
411	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
412	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
413};
414
415static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
416	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
417	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
418	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
419	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
420	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
421};
422
423SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
424    "B2B MW segment overrides -- MUST be the same on both sides");
425
426SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
427    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
428    "hardware, use this 64-bit address on the bus between the NTB devices for "
429    "the window at BAR2, on the upstream side of the link.  MUST be the same "
430    "address on both sides.");
431SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
432    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
433SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
434    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
435    "(split-BAR mode).");
436SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
437    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
438    "(split-BAR mode).");
439
440SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
441    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
442    "hardware, use this 64-bit address on the bus between the NTB devices for "
443    "the window at BAR2, on the downstream side of the link.  MUST be the same"
444    " address on both sides.");
445SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
446    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
447SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
448    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
449    "(split-BAR mode).");
450SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
451    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
452    "(split-BAR mode).");
453
454/*
455 * OS <-> Driver interface structures
456 */
457MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
458
459static device_method_t ntb_pci_methods[] = {
460	/* Device interface */
461	DEVMETHOD(device_probe,     ntb_probe),
462	DEVMETHOD(device_attach,    ntb_attach),
463	DEVMETHOD(device_detach,    ntb_detach),
464	DEVMETHOD_END
465};
466
467static driver_t ntb_pci_driver = {
468	"ntb_hw",
469	ntb_pci_methods,
470	sizeof(struct ntb_softc),
471};
472
473static devclass_t ntb_devclass;
474DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
475MODULE_VERSION(ntb_hw, 1);
476
477SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
478
479/*
480 * OS <-> Driver linkage functions
481 */
482static int
483ntb_probe(device_t device)
484{
485	struct ntb_hw_info *p;
486
487	p = ntb_get_device_info(pci_get_devid(device));
488	if (p == NULL)
489		return (ENXIO);
490
491	device_set_desc(device, p->desc);
492	return (0);
493}
494
495static int
496ntb_attach(device_t device)
497{
498	struct ntb_softc *ntb;
499	struct ntb_hw_info *p;
500	int error;
501
502	ntb = DEVICE2SOFTC(device);
503	p = ntb_get_device_info(pci_get_devid(device));
504
505	ntb->device = device;
506	ntb->type = p->type;
507	ntb->features = p->features;
508	ntb->b2b_mw_idx = B2B_MW_DISABLED;
509
510	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
511	callout_init(&ntb->heartbeat_timer, 1);
512	callout_init(&ntb->lr_timer, 1);
513	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
514	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_SPIN);
515
516	if (ntb->type == NTB_ATOM)
517		error = ntb_detect_atom(ntb);
518	else
519		error = ntb_detect_xeon(ntb);
520	if (error)
521		goto out;
522
523	ntb_detect_max_mw(ntb);
524
525	error = ntb_map_pci_bars(ntb);
526	if (error)
527		goto out;
528	if (ntb->type == NTB_ATOM)
529		error = ntb_atom_init_dev(ntb);
530	else
531		error = ntb_xeon_init_dev(ntb);
532	if (error)
533		goto out;
534	error = ntb_init_isr(ntb);
535	if (error)
536		goto out;
537	ntb_sysctl_init(ntb);
538
539	pci_enable_busmaster(ntb->device);
540
541out:
542	if (error != 0)
543		ntb_detach(device);
544	return (error);
545}
546
547static int
548ntb_detach(device_t device)
549{
550	struct ntb_softc *ntb;
551
552	ntb = DEVICE2SOFTC(device);
553
554	if (ntb->self_reg != NULL)
555		ntb_db_set_mask(ntb, ntb->db_valid_mask);
556	callout_drain(&ntb->heartbeat_timer);
557	callout_drain(&ntb->lr_timer);
558	if (ntb->type == NTB_XEON)
559		ntb_teardown_xeon(ntb);
560	ntb_teardown_interrupts(ntb);
561
562	mtx_destroy(&ntb->db_mask_lock);
563	mtx_destroy(&ntb->ctx_lock);
564
565	/*
566	 * Redetect total MWs so we unmap properly -- in case we lowered the
567	 * maximum to work around Xeon errata.
568	 */
569	ntb_detect_max_mw(ntb);
570	ntb_unmap_pci_bar(ntb);
571
572	return (0);
573}
574
575/*
576 * Driver internal routines
577 */
578static inline enum ntb_bar
579ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
580{
581
582	KASSERT(mw < ntb->mw_count ||
583	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
584	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
585	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
586
587	return (ntb->reg->mw_bar[mw]);
588}
589
590static inline bool
591bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
592{
593	/* XXX This assertion could be stronger. */
594	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
595	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
596}
597
598static inline void
599bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
600    uint32_t *xlat, uint32_t *lmt)
601{
602	uint32_t basev, lmtv, xlatv;
603
604	switch (bar) {
605	case NTB_B2B_BAR_1:
606		basev = ntb->xlat_reg->bar2_base;
607		lmtv = ntb->xlat_reg->bar2_limit;
608		xlatv = ntb->xlat_reg->bar2_xlat;
609		break;
610	case NTB_B2B_BAR_2:
611		basev = ntb->xlat_reg->bar4_base;
612		lmtv = ntb->xlat_reg->bar4_limit;
613		xlatv = ntb->xlat_reg->bar4_xlat;
614		break;
615	case NTB_B2B_BAR_3:
616		basev = ntb->xlat_reg->bar5_base;
617		lmtv = ntb->xlat_reg->bar5_limit;
618		xlatv = ntb->xlat_reg->bar5_xlat;
619		break;
620	default:
621		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
622		    ("bad bar"));
623		basev = lmtv = xlatv = 0;
624		break;
625	}
626
627	if (base != NULL)
628		*base = basev;
629	if (xlat != NULL)
630		*xlat = xlatv;
631	if (lmt != NULL)
632		*lmt = lmtv;
633}
634
635static int
636ntb_map_pci_bars(struct ntb_softc *ntb)
637{
638	int rc;
639
640	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
641	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
642	if (rc != 0)
643		goto out;
644
645	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
646	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
647	if (rc != 0)
648		goto out;
649	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
650	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
651	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
652
653	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
654	/* XXX Are shared MW B2Bs write-combining? */
655	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
656		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
657	else
658		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
659	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
660	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
661	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
662
663	if (!HAS_FEATURE(NTB_SPLIT_BAR))
664		goto out;
665
666	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
667	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
668		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
669	else
670		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
671	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
672	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
673	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
674
675out:
676	if (rc != 0)
677		device_printf(ntb->device,
678		    "unable to allocate pci resource\n");
679	return (rc);
680}
681
682static void
683print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
684    const char *kind)
685{
686
687	device_printf(ntb->device,
688	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
689	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
690	    (char *)bar->vbase + bar->size - 1,
691	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
692	    (uintmax_t)bar->size, kind);
693}
694
695static int
696map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
697{
698
699	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
700	    &bar->pci_resource_id, RF_ACTIVE);
701	if (bar->pci_resource == NULL)
702		return (ENXIO);
703
704	save_bar_parameters(bar);
705	print_map_success(ntb, bar, "mmr");
706	return (0);
707}
708
709static int
710map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
711{
712	int rc;
713	uint8_t bar_size_bits = 0;
714
715	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
716	    &bar->pci_resource_id, RF_ACTIVE);
717
718	if (bar->pci_resource == NULL)
719		return (ENXIO);
720
721	save_bar_parameters(bar);
722	/*
723	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
724	 * hardware issue. To work around this, query the size it should be
725	 * configured to by the device and modify the resource to correspond to
726	 * this new size. The BIOS on systems with this problem is required to
727	 * provide enough address space to allow the driver to make this change
728	 * safely.
729	 *
730	 * Ideally I could have just specified the size when I allocated the
731	 * resource like:
732	 *  bus_alloc_resource(ntb->device,
733	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
734	 *	1ul << bar_size_bits, RF_ACTIVE);
735	 * but the PCI driver does not honor the size in this call, so we have
736	 * to modify it after the fact.
737	 */
738	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
739		if (bar->pci_resource_id == PCIR_BAR(2))
740			bar_size_bits = pci_read_config(ntb->device,
741			    XEON_PBAR23SZ_OFFSET, 1);
742		else
743			bar_size_bits = pci_read_config(ntb->device,
744			    XEON_PBAR45SZ_OFFSET, 1);
745
746		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
747		    bar->pci_resource, bar->pbase,
748		    bar->pbase + (1ul << bar_size_bits) - 1);
749		if (rc != 0) {
750			device_printf(ntb->device,
751			    "unable to resize bar\n");
752			return (rc);
753		}
754
755		save_bar_parameters(bar);
756	}
757
758#if 0	/* XXX: amd64 pmap_change_attr() assumes region lies in DMAP. */
759	/* Mark bar region as write combining to improve performance. */
760	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
761	    VM_MEMATTR_WRITE_COMBINING);
762#else
763	rc = EINVAL;
764#endif
765	print_map_success(ntb, bar, "mw");
766	if (rc == 0)
767		device_printf(ntb->device,
768		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
769		    "WRITE_COMBINING.\n",
770		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
771		    (char *)bar->vbase + bar->size - 1,
772		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1));
773	else
774		device_printf(ntb->device,
775		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
776		    "WRITE_COMBINING: %d\n",
777		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
778		    (char *)bar->vbase + bar->size - 1,
779		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
780		    rc);
781		/* Proceed anyway */
782	return (0);
783}
784
785static void
786ntb_unmap_pci_bar(struct ntb_softc *ntb)
787{
788	struct ntb_pci_bar_info *current_bar;
789	int i;
790
791	for (i = 0; i < NTB_MAX_BARS; i++) {
792		current_bar = &ntb->bar_info[i];
793		if (current_bar->pci_resource != NULL)
794			bus_release_resource(ntb->device, SYS_RES_MEMORY,
795			    current_bar->pci_resource_id,
796			    current_bar->pci_resource);
797	}
798}
799
800static int
801ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
802{
803	uint32_t i;
804	int rc;
805
806	for (i = 0; i < num_vectors; i++) {
807		ntb->int_info[i].rid = i + 1;
808		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
809		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
810		if (ntb->int_info[i].res == NULL) {
811			device_printf(ntb->device,
812			    "bus_alloc_resource failed\n");
813			return (ENOMEM);
814		}
815		ntb->int_info[i].tag = NULL;
816		ntb->allocated_interrupts++;
817		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
818		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
819		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
820		if (rc != 0) {
821			device_printf(ntb->device, "bus_setup_intr failed\n");
822			return (ENXIO);
823		}
824	}
825	return (0);
826}
827
828/*
829 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
830 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
831 * should be okay.  This tunable should enable us to test that hypothesis
832 * when someone gets their hands on some Xeon hardware.
833 */
834static int ntb_force_remap_mode;
835SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
836    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
837    " to a smaller number of ithreads, even if the desired number are "
838    "available");
839
840/*
841 * In case it is NOT ok, give consumers an abort button.
842 */
843static int ntb_prefer_intx;
844SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
845    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
846    "than remapping MSI-X messages over available slots (match Linux driver "
847    "behavior)");
848
849/*
850 * Remap the desired number of MSI-X messages to available ithreads in a simple
851 * round-robin fashion.
852 */
853static int
854ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
855{
856	u_int *vectors;
857	uint32_t i;
858	int rc;
859
860	if (ntb_prefer_intx != 0)
861		return (ENXIO);
862
863	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
864
865	for (i = 0; i < desired; i++)
866		vectors[i] = (i % avail) + 1;
867
868	rc = pci_remap_msix(dev, desired, vectors);
869	free(vectors, M_NTB);
870	return (rc);
871}
872
873static int
874ntb_init_isr(struct ntb_softc *ntb)
875{
876	uint32_t desired_vectors, num_vectors;
877	int rc;
878
879	ntb->allocated_interrupts = 0;
880	ntb->last_ts = ticks;
881
882	/*
883	 * Mask all doorbell interrupts.
884	 */
885	ntb_db_set_mask(ntb, ntb->db_valid_mask);
886
887	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
888	    ntb->db_count);
889	if (desired_vectors >= 1) {
890		rc = pci_alloc_msix(ntb->device, &num_vectors);
891
892		if (ntb_force_remap_mode != 0 && rc == 0 &&
893		    num_vectors == desired_vectors)
894			num_vectors--;
895
896		if (rc == 0 && num_vectors < desired_vectors) {
897			rc = ntb_remap_msix(ntb->device, desired_vectors,
898			    num_vectors);
899			if (rc == 0)
900				num_vectors = desired_vectors;
901			else
902				pci_release_msi(ntb->device);
903		}
904		if (rc != 0)
905			num_vectors = 1;
906	} else
907		num_vectors = 1;
908
909	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
910		ntb->db_vec_count = 1;
911		ntb->db_vec_shift = ntb->db_count;
912		rc = ntb_setup_legacy_interrupt(ntb);
913	} else {
914		ntb_create_msix_vec(ntb, num_vectors);
915		rc = ntb_setup_msix(ntb, num_vectors);
916	}
917	if (rc != 0) {
918		device_printf(ntb->device,
919		    "Error allocating interrupts: %d\n", rc);
920		ntb_free_msix_vec(ntb);
921	}
922
923	return (rc);
924}
925
926static int
927ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
928{
929	int rc;
930
931	ntb->int_info[0].rid = 0;
932	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
933	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
934	if (ntb->int_info[0].res == NULL) {
935		device_printf(ntb->device, "bus_alloc_resource failed\n");
936		return (ENOMEM);
937	}
938
939	ntb->int_info[0].tag = NULL;
940	ntb->allocated_interrupts = 1;
941
942	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
943	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
944	    ntb, &ntb->int_info[0].tag);
945	if (rc != 0) {
946		device_printf(ntb->device, "bus_setup_intr failed\n");
947		return (ENXIO);
948	}
949
950	return (0);
951}
952
953static void
954ntb_teardown_interrupts(struct ntb_softc *ntb)
955{
956	struct ntb_int_info *current_int;
957	int i;
958
959	for (i = 0; i < ntb->allocated_interrupts; i++) {
960		current_int = &ntb->int_info[i];
961		if (current_int->tag != NULL)
962			bus_teardown_intr(ntb->device, current_int->res,
963			    current_int->tag);
964
965		if (current_int->res != NULL)
966			bus_release_resource(ntb->device, SYS_RES_IRQ,
967			    rman_get_rid(current_int->res), current_int->res);
968	}
969
970	ntb_free_msix_vec(ntb);
971	pci_release_msi(ntb->device);
972}
973
974/*
975 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
976 * out to make code clearer.
977 */
978static inline uint64_t
979db_ioread(struct ntb_softc *ntb, uint64_t regoff)
980{
981
982	if (ntb->type == NTB_ATOM)
983		return (ntb_reg_read(8, regoff));
984
985	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
986
987	return (ntb_reg_read(2, regoff));
988}
989
990static inline void
991db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
992{
993
994	KASSERT((val & ~ntb->db_valid_mask) == 0,
995	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
996	     (uintmax_t)(val & ~ntb->db_valid_mask),
997	     (uintmax_t)ntb->db_valid_mask));
998
999	if (regoff == ntb->self_reg->db_mask)
1000		DB_MASK_ASSERT(ntb, MA_OWNED);
1001
1002	if (ntb->type == NTB_ATOM) {
1003		ntb_reg_write(8, regoff, val);
1004		return;
1005	}
1006
1007	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1008	ntb_reg_write(2, regoff, (uint16_t)val);
1009}
1010
1011void
1012ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1013{
1014
1015	DB_MASK_LOCK(ntb);
1016	ntb->db_mask |= bits;
1017	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1018	DB_MASK_UNLOCK(ntb);
1019}
1020
1021void
1022ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1023{
1024
1025	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1026	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1027	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1028	     (uintmax_t)ntb->db_valid_mask));
1029
1030	DB_MASK_LOCK(ntb);
1031	ntb->db_mask &= ~bits;
1032	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1033	DB_MASK_UNLOCK(ntb);
1034}
1035
1036uint64_t
1037ntb_db_read(struct ntb_softc *ntb)
1038{
1039
1040	return (db_ioread(ntb, ntb->self_reg->db_bell));
1041}
1042
1043void
1044ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1045{
1046
1047	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1048	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1049	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1050	     (uintmax_t)ntb->db_valid_mask));
1051
1052	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1053}
1054
1055static inline uint64_t
1056ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1057{
1058	uint64_t shift, mask;
1059
1060	shift = ntb->db_vec_shift;
1061	mask = (1ull << shift) - 1;
1062	return (mask << (shift * db_vector));
1063}
1064
1065static void
1066ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1067{
1068	uint64_t vec_mask;
1069
1070	ntb->last_ts = ticks;
1071	vec_mask = ntb_vec_mask(ntb, vec);
1072
1073	if ((vec_mask & ntb->db_link_mask) != 0) {
1074		if (ntb_poll_link(ntb))
1075			ntb_link_event(ntb);
1076	}
1077
1078	if ((vec_mask & ntb->db_valid_mask) != 0)
1079		ntb_db_event(ntb, vec);
1080}
1081
1082static void
1083ndev_vec_isr(void *arg)
1084{
1085	struct ntb_vec *nvec = arg;
1086
1087	ntb_interrupt(nvec->ntb, nvec->num);
1088}
1089
1090static void
1091ndev_irq_isr(void *arg)
1092{
1093	/* If we couldn't set up MSI-X, we only have the one vector. */
1094	ntb_interrupt(arg, 0);
1095}
1096
1097static int
1098ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1099{
1100	uint32_t i;
1101
1102	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1103	    M_ZERO | M_WAITOK);
1104	for (i = 0; i < num_vectors; i++) {
1105		ntb->msix_vec[i].num = i;
1106		ntb->msix_vec[i].ntb = ntb;
1107	}
1108
1109	return (0);
1110}
1111
1112static void
1113ntb_free_msix_vec(struct ntb_softc *ntb)
1114{
1115
1116	if (ntb->msix_vec == NULL)
1117		return;
1118
1119	free(ntb->msix_vec, M_NTB);
1120	ntb->msix_vec = NULL;
1121}
1122
1123static struct ntb_hw_info *
1124ntb_get_device_info(uint32_t device_id)
1125{
1126	struct ntb_hw_info *ep = pci_ids;
1127
1128	while (ep->device_id) {
1129		if (ep->device_id == device_id)
1130			return (ep);
1131		++ep;
1132	}
1133	return (NULL);
1134}
1135
1136static void
1137ntb_teardown_xeon(struct ntb_softc *ntb)
1138{
1139
1140	if (ntb->reg != NULL)
1141		ntb_link_disable(ntb);
1142}
1143
1144static void
1145ntb_detect_max_mw(struct ntb_softc *ntb)
1146{
1147
1148	if (ntb->type == NTB_ATOM) {
1149		ntb->mw_count = ATOM_MW_COUNT;
1150		return;
1151	}
1152
1153	if (HAS_FEATURE(NTB_SPLIT_BAR))
1154		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1155	else
1156		ntb->mw_count = XEON_SNB_MW_COUNT;
1157}
1158
1159static int
1160ntb_detect_xeon(struct ntb_softc *ntb)
1161{
1162	uint8_t ppd, conn_type;
1163
1164	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1165	ntb->ppd = ppd;
1166
1167	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1168		ntb->dev_type = NTB_DEV_USD;
1169	else
1170		ntb->dev_type = NTB_DEV_DSD;
1171
1172	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1173		ntb->features |= NTB_SPLIT_BAR;
1174
1175	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1176	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1177		ntb->features |= NTB_SDOORBELL_LOCKUP;
1178
1179	conn_type = ppd & XEON_PPD_CONN_TYPE;
1180	switch (conn_type) {
1181	case NTB_CONN_B2B:
1182		ntb->conn_type = conn_type;
1183		break;
1184	case NTB_CONN_RP:
1185	case NTB_CONN_TRANSPARENT:
1186	default:
1187		device_printf(ntb->device, "Unsupported connection type: %u\n",
1188		    (unsigned)conn_type);
1189		return (ENXIO);
1190	}
1191	return (0);
1192}
1193
1194static int
1195ntb_detect_atom(struct ntb_softc *ntb)
1196{
1197	uint32_t ppd, conn_type;
1198
1199	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1200	ntb->ppd = ppd;
1201
1202	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1203		ntb->dev_type = NTB_DEV_DSD;
1204	else
1205		ntb->dev_type = NTB_DEV_USD;
1206
1207	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1208	switch (conn_type) {
1209	case NTB_CONN_B2B:
1210		ntb->conn_type = conn_type;
1211		break;
1212	default:
1213		device_printf(ntb->device, "Unsupported NTB configuration\n");
1214		return (ENXIO);
1215	}
1216	return (0);
1217}
1218
1219static int
1220ntb_xeon_init_dev(struct ntb_softc *ntb)
1221{
1222	int rc;
1223
1224	ntb->spad_count		= XEON_SPAD_COUNT;
1225	ntb->db_count		= XEON_DB_COUNT;
1226	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1227	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1228	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1229
1230	if (ntb->conn_type != NTB_CONN_B2B) {
1231		device_printf(ntb->device, "Connection type %d not supported\n",
1232		    ntb->conn_type);
1233		return (ENXIO);
1234	}
1235
1236	ntb->reg = &xeon_reg;
1237	ntb->self_reg = &xeon_pri_reg;
1238	ntb->peer_reg = &xeon_b2b_reg;
1239	ntb->xlat_reg = &xeon_sec_xlat;
1240
1241	/*
1242	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1243	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1244	 * which may hang the system.  To workaround this use the second memory
1245	 * window to access the interrupt and scratch pad registers on the
1246	 * remote system.
1247	 */
1248	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1249		/* Use the last MW for mapping remote spad */
1250		ntb->b2b_mw_idx = ntb->mw_count - 1;
1251	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1252		/*
1253		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1254		 * mirrored to the remote system.  Shrink the number of bits by one,
1255		 * since bit 14 is the last bit.
1256		 *
1257		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1258		 * anyway.  Nor for non-B2B connection types.
1259		 */
1260		ntb->db_count = XEON_DB_COUNT - 1;
1261
1262	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1263
1264	if (ntb->dev_type == NTB_DEV_USD)
1265		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1266		    &xeon_b2b_usd_addr);
1267	else
1268		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1269		    &xeon_b2b_dsd_addr);
1270	if (rc != 0)
1271		return (rc);
1272
1273	/* Enable Bus Master and Memory Space on the secondary side */
1274	ntb_reg_write(2, XEON_PCICMD_OFFSET,
1275	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1276
1277	/* Enable link training */
1278	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1279
1280	return (0);
1281}
1282
1283static int
1284ntb_atom_init_dev(struct ntb_softc *ntb)
1285{
1286
1287	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1288	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1289
1290	ntb->spad_count		 = ATOM_SPAD_COUNT;
1291	ntb->db_count		 = ATOM_DB_COUNT;
1292	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1293	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1294	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1295
1296	ntb->reg = &atom_reg;
1297	ntb->self_reg = &atom_pri_reg;
1298	ntb->peer_reg = &atom_b2b_reg;
1299	ntb->xlat_reg = &atom_sec_xlat;
1300
1301	/*
1302	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1303	 * resolved.  Mask transaction layer internal parity errors.
1304	 */
1305	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1306
1307	configure_atom_secondary_side_bars(ntb);
1308
1309	/* Enable Bus Master and Memory Space on the secondary side */
1310	ntb_reg_write(2, ATOM_PCICMD_OFFSET,
1311	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1312
1313	/* Initiate PCI-E link training */
1314	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1315
1316	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1317
1318	return (0);
1319}
1320
1321/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1322static void
1323configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1324{
1325
1326	if (ntb->dev_type == NTB_DEV_USD) {
1327		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1328		    XEON_B2B_BAR2_DSD_ADDR64);
1329		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1330		    XEON_B2B_BAR4_DSD_ADDR64);
1331		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1332		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1333	} else {
1334		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1335		    XEON_B2B_BAR2_USD_ADDR64);
1336		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1337		    XEON_B2B_BAR4_USD_ADDR64);
1338		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1339		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1340	}
1341}
1342
1343
1344/*
1345 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1346 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1347 * remains for use by a higher layer.
1348 *
1349 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1350 * MW size is sufficiently large.
1351 */
1352static unsigned int ntb_b2b_mw_share;
1353SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1354    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1355    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1356    "value here.");
1357
1358static void
1359xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1360    enum ntb_bar regbar)
1361{
1362	struct ntb_pci_bar_info *bar;
1363	uint8_t bar_sz;
1364
1365	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1366		return;
1367
1368	bar = &ntb->bar_info[idx];
1369	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1370	if (idx == regbar) {
1371		if (ntb->b2b_off != 0)
1372			bar_sz--;
1373		else
1374			bar_sz = 0;
1375	}
1376	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1377	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1378	(void)bar_sz;
1379}
1380
1381static void
1382xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1383    enum ntb_bar idx, enum ntb_bar regbar)
1384{
1385	uint64_t reg_val;
1386	uint32_t base_reg, lmt_reg;
1387
1388	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1389	if (idx == regbar)
1390		bar_addr += ntb->b2b_off;
1391
1392	if (!bar_is_64bit(ntb, idx)) {
1393		ntb_reg_write(4, base_reg, bar_addr);
1394		reg_val = ntb_reg_read(4, base_reg);
1395		(void)reg_val;
1396
1397		ntb_reg_write(4, lmt_reg, bar_addr);
1398		reg_val = ntb_reg_read(4, lmt_reg);
1399		(void)reg_val;
1400	} else {
1401		ntb_reg_write(8, base_reg, bar_addr);
1402		reg_val = ntb_reg_read(8, base_reg);
1403		(void)reg_val;
1404
1405		ntb_reg_write(8, lmt_reg, bar_addr);
1406		reg_val = ntb_reg_read(8, lmt_reg);
1407		(void)reg_val;
1408	}
1409}
1410
1411static void
1412xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1413{
1414	struct ntb_pci_bar_info *bar;
1415
1416	bar = &ntb->bar_info[idx];
1417	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1418		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1419		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1420	} else {
1421		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1422		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1423	}
1424	(void)base_addr;
1425}
1426
1427static int
1428xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1429    const struct ntb_b2b_addr *peer_addr)
1430{
1431	struct ntb_pci_bar_info *b2b_bar;
1432	vm_size_t bar_size;
1433	uint64_t bar_addr;
1434	enum ntb_bar b2b_bar_num, i;
1435
1436	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1437		b2b_bar = NULL;
1438		b2b_bar_num = NTB_CONFIG_BAR;
1439		ntb->b2b_off = 0;
1440	} else {
1441		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1442		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1443		    ("invalid b2b mw bar"));
1444
1445		b2b_bar = &ntb->bar_info[b2b_bar_num];
1446		bar_size = b2b_bar->size;
1447
1448		if (ntb_b2b_mw_share != 0 &&
1449		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1450			ntb->b2b_off = bar_size >> 1;
1451		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1452			ntb->b2b_off = 0;
1453			ntb->mw_count--;
1454		} else {
1455			device_printf(ntb->device,
1456			    "B2B bar size is too small!\n");
1457			return (EIO);
1458		}
1459	}
1460
1461	/*
1462	 * Reset the secondary bar sizes to match the primary bar sizes.
1463	 * (Except, disable or halve the size of the B2B secondary bar.)
1464	 */
1465	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1466		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1467
1468	bar_addr = 0;
1469	if (b2b_bar_num == NTB_CONFIG_BAR)
1470		bar_addr = addr->bar0_addr;
1471	else if (b2b_bar_num == NTB_B2B_BAR_1)
1472		bar_addr = addr->bar2_addr64;
1473	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1474		bar_addr = addr->bar4_addr64;
1475	else if (b2b_bar_num == NTB_B2B_BAR_2)
1476		bar_addr = addr->bar4_addr32;
1477	else if (b2b_bar_num == NTB_B2B_BAR_3)
1478		bar_addr = addr->bar5_addr32;
1479	else
1480		KASSERT(false, ("invalid bar"));
1481
1482	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1483
1484	/*
1485	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1486	 * register BAR.  The B2B BAR is either disabled above or configured
1487	 * half-size.  It starts at PBAR xlat + offset.
1488	 *
1489	 * Also set up incoming BAR limits == base (zero length window).
1490	 */
1491	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1492	    b2b_bar_num);
1493	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1494		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1495		    NTB_B2B_BAR_2, b2b_bar_num);
1496		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1497		    NTB_B2B_BAR_3, b2b_bar_num);
1498	} else
1499		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1500		    NTB_B2B_BAR_2, b2b_bar_num);
1501
1502	/* Zero incoming translation addrs */
1503	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1504	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1505
1506	/* Zero outgoing translation limits (whole bar size windows) */
1507	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1508	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1509
1510	/* Set outgoing translation offsets */
1511	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1512	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1513		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1514		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1515	} else
1516		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1517
1518	/* Set the translation offset for B2B registers */
1519	bar_addr = 0;
1520	if (b2b_bar_num == NTB_CONFIG_BAR)
1521		bar_addr = peer_addr->bar0_addr;
1522	else if (b2b_bar_num == NTB_B2B_BAR_1)
1523		bar_addr = peer_addr->bar2_addr64;
1524	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1525		bar_addr = peer_addr->bar4_addr64;
1526	else if (b2b_bar_num == NTB_B2B_BAR_2)
1527		bar_addr = peer_addr->bar4_addr32;
1528	else if (b2b_bar_num == NTB_B2B_BAR_3)
1529		bar_addr = peer_addr->bar5_addr32;
1530	else
1531		KASSERT(false, ("invalid bar"));
1532
1533	/*
1534	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1535	 * at a time.
1536	 */
1537	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1538	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1539	return (0);
1540}
1541
1542static inline bool
1543link_is_up(struct ntb_softc *ntb)
1544{
1545
1546	if (ntb->type == NTB_XEON) {
1547		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1548			return (true);
1549		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1550	}
1551
1552	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1553	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1554}
1555
1556static inline bool
1557atom_link_is_err(struct ntb_softc *ntb)
1558{
1559	uint32_t status;
1560
1561	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1562
1563	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1564	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1565		return (true);
1566
1567	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1568	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1569}
1570
1571/* Atom does not have link status interrupt, poll on that platform */
1572static void
1573atom_link_hb(void *arg)
1574{
1575	struct ntb_softc *ntb = arg;
1576	sbintime_t timo, poll_ts;
1577
1578	timo = NTB_HB_TIMEOUT * hz;
1579	poll_ts = ntb->last_ts + timo;
1580
1581	/*
1582	 * Delay polling the link status if an interrupt was received, unless
1583	 * the cached link status says the link is down.
1584	 */
1585	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1586		timo = poll_ts - ticks;
1587		goto out;
1588	}
1589
1590	if (ntb_poll_link(ntb))
1591		ntb_link_event(ntb);
1592
1593	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1594		/* Link is down with error, proceed with recovery */
1595		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1596		return;
1597	}
1598
1599out:
1600	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1601}
1602
1603static void
1604atom_perform_link_restart(struct ntb_softc *ntb)
1605{
1606	uint32_t status;
1607
1608	/* Driver resets the NTB ModPhy lanes - magic! */
1609	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1610	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1611	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1612	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1613
1614	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1615	pause("ModPhy", hz / 10);
1616
1617	/* Clear AER Errors, write to clear */
1618	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1619	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1620	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1621
1622	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1623	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1624	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1625	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1626
1627	/* Clear DeSkew Buffer error, write to clear */
1628	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1629	status |= ATOM_DESKEWSTS_DBERR;
1630	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1631
1632	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1633	status &= ATOM_IBIST_ERR_OFLOW;
1634	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1635
1636	/* Releases the NTB state machine to allow the link to retrain */
1637	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1638	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1639	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1640}
1641
1642/*
1643 * ntb_set_ctx() - associate a driver context with an ntb device
1644 * @ntb:        NTB device context
1645 * @ctx:        Driver context
1646 * @ctx_ops:    Driver context operations
1647 *
1648 * Associate a driver context and operations with a ntb device.  The context is
1649 * provided by the client driver, and the driver may associate a different
1650 * context with each ntb device.
1651 *
1652 * Return: Zero if the context is associated, otherwise an error number.
1653 */
1654int
1655ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1656{
1657
1658	if (ctx == NULL || ops == NULL)
1659		return (EINVAL);
1660	if (ntb->ctx_ops != NULL)
1661		return (EINVAL);
1662
1663	CTX_LOCK(ntb);
1664	if (ntb->ctx_ops != NULL) {
1665		CTX_UNLOCK(ntb);
1666		return (EINVAL);
1667	}
1668	ntb->ntb_ctx = ctx;
1669	ntb->ctx_ops = ops;
1670	CTX_UNLOCK(ntb);
1671
1672	return (0);
1673}
1674
1675/*
1676 * It is expected that this will only be used from contexts where the ctx_lock
1677 * is not needed to protect ntb_ctx lifetime.
1678 */
1679void *
1680ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1681{
1682
1683	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1684	if (ops != NULL)
1685		*ops = ntb->ctx_ops;
1686	return (ntb->ntb_ctx);
1687}
1688
1689/*
1690 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1691 * @ntb:        NTB device context
1692 *
1693 * Clear any association that may exist between a driver context and the ntb
1694 * device.
1695 */
1696void
1697ntb_clear_ctx(struct ntb_softc *ntb)
1698{
1699
1700	CTX_LOCK(ntb);
1701	ntb->ntb_ctx = NULL;
1702	ntb->ctx_ops = NULL;
1703	CTX_UNLOCK(ntb);
1704}
1705
1706/*
1707 * ntb_link_event() - notify driver context of a change in link status
1708 * @ntb:        NTB device context
1709 *
1710 * Notify the driver context that the link status may have changed.  The driver
1711 * should call ntb_link_is_up() to get the current status.
1712 */
1713void
1714ntb_link_event(struct ntb_softc *ntb)
1715{
1716
1717	CTX_LOCK(ntb);
1718	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1719		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1720	CTX_UNLOCK(ntb);
1721}
1722
1723/*
1724 * ntb_db_event() - notify driver context of a doorbell event
1725 * @ntb:        NTB device context
1726 * @vector:     Interrupt vector number
1727 *
1728 * Notify the driver context of a doorbell event.  If hardware supports
1729 * multiple interrupt vectors for doorbells, the vector number indicates which
1730 * vector received the interrupt.  The vector number is relative to the first
1731 * vector used for doorbells, starting at zero, and must be less than
1732 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1733 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1734 * those bits are associated with the vector number.
1735 */
1736static void
1737ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1738{
1739
1740	CTX_LOCK(ntb);
1741	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1742		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1743	CTX_UNLOCK(ntb);
1744}
1745
1746/*
1747 * ntb_link_enable() - enable the link on the secondary side of the ntb
1748 * @ntb:        NTB device context
1749 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1750 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1751 *
1752 * Enable the link on the secondary side of the ntb.  This can only be done
1753 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1754 * should train the link to its maximum speed and width, or the requested speed
1755 * and width, whichever is smaller, if supported.
1756 *
1757 * Return: Zero on success, otherwise an error number.
1758 *
1759 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1760 *      and width input will be ignored.
1761 */
1762int
1763ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1764    enum ntb_width w __unused)
1765{
1766	uint32_t cntl;
1767
1768	if (ntb->type == NTB_ATOM) {
1769		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1770		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1771		return (0);
1772	}
1773
1774	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1775		ntb_link_event(ntb);
1776		return (0);
1777	}
1778
1779	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1780	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1781	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1782	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1783	if (HAS_FEATURE(NTB_SPLIT_BAR))
1784		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1785	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1786	return (0);
1787}
1788
1789/*
1790 * ntb_link_disable() - disable the link on the secondary side of the ntb
1791 * @ntb:        NTB device context
1792 *
1793 * Disable the link on the secondary side of the ntb.  This can only be done
1794 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1795 * should disable the link.  Returning from this call must indicate that a
1796 * barrier has passed, though with no more writes may pass in either direction
1797 * across the link, except if this call returns an error number.
1798 *
1799 * Return: Zero on success, otherwise an error number.
1800 */
1801int
1802ntb_link_disable(struct ntb_softc *ntb)
1803{
1804	uint32_t cntl;
1805
1806	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1807		ntb_link_event(ntb);
1808		return (0);
1809	}
1810
1811	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1812	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1813	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1814	if (HAS_FEATURE(NTB_SPLIT_BAR))
1815		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1816	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1817	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1818	return (0);
1819}
1820
1821static void
1822recover_atom_link(void *arg)
1823{
1824	struct ntb_softc *ntb = arg;
1825	unsigned speed, width, oldspeed, oldwidth;
1826	uint32_t status32;
1827
1828	atom_perform_link_restart(ntb);
1829
1830	/*
1831	 * There is a potential race between the 2 NTB devices recovering at
1832	 * the same time.  If the times are the same, the link will not recover
1833	 * and the driver will be stuck in this loop forever.  Add a random
1834	 * interval to the recovery time to prevent this race.
1835	 */
1836	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1837	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1838
1839	if (atom_link_is_err(ntb))
1840		goto retry;
1841
1842	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1843	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1844		goto out;
1845
1846	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1847	width = NTB_LNK_STA_WIDTH(status32);
1848	speed = status32 & NTB_LINK_SPEED_MASK;
1849
1850	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1851	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1852	if (oldwidth != width || oldspeed != speed)
1853		goto retry;
1854
1855out:
1856	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1857	    ntb);
1858	return;
1859
1860retry:
1861	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1862	    ntb);
1863}
1864
1865/*
1866 * Polls the HW link status register(s); returns true if something has changed.
1867 */
1868static bool
1869ntb_poll_link(struct ntb_softc *ntb)
1870{
1871	uint32_t ntb_cntl;
1872	uint16_t reg_val;
1873
1874	if (ntb->type == NTB_ATOM) {
1875		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1876		if (ntb_cntl == ntb->ntb_ctl)
1877			return (false);
1878
1879		ntb->ntb_ctl = ntb_cntl;
1880		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1881	} else {
1882		db_iowrite(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1883
1884		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1885		if (reg_val == ntb->lnk_sta)
1886			return (false);
1887
1888		ntb->lnk_sta = reg_val;
1889	}
1890	return (true);
1891}
1892
1893static inline enum ntb_speed
1894ntb_link_sta_speed(struct ntb_softc *ntb)
1895{
1896
1897	if (!link_is_up(ntb))
1898		return (NTB_SPEED_NONE);
1899	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
1900}
1901
1902static inline enum ntb_width
1903ntb_link_sta_width(struct ntb_softc *ntb)
1904{
1905
1906	if (!link_is_up(ntb))
1907		return (NTB_WIDTH_NONE);
1908	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
1909}
1910
1911SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
1912    "Driver state, statistics, and HW registers");
1913
1914#define NTB_REGSZ_MASK	(3ul << 30)
1915#define NTB_REG_64	(1ul << 30)
1916#define NTB_REG_32	(2ul << 30)
1917#define NTB_REG_16	(3ul << 30)
1918#define NTB_REG_8	(0ul << 30)
1919
1920#define NTB_DB_READ	(1ul << 29)
1921#define NTB_PCI_REG	(1ul << 28)
1922#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
1923
1924static void
1925ntb_sysctl_init(struct ntb_softc *ntb)
1926{
1927	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
1928	struct sysctl_ctx_list *ctx;
1929	struct sysctl_oid *tree, *tmptree;
1930
1931	ctx = device_get_sysctl_ctx(ntb->device);
1932
1933	tree = SYSCTL_ADD_NODE(ctx,
1934	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
1935	    "debug_info", CTLFLAG_RD, NULL,
1936	    "Driver state, statistics, and HW registers");
1937	tree_par = SYSCTL_CHILDREN(tree);
1938
1939	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
1940	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
1941	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
1942	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
1943
1944	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
1945		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
1946		    &ntb->b2b_mw_idx, 0,
1947		    "Index of the MW used for B2B remote register access");
1948		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
1949		    CTLFLAG_RD, &ntb->b2b_off,
1950		    "If non-zero, offset of B2B register region in shared MW");
1951	}
1952
1953	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
1954	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
1955	    "Features/errata of this NTB device");
1956
1957	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
1958	    &ntb->ntb_ctl, 0, "NTB CTL register (cached)");
1959	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
1960	    &ntb->lnk_sta, 0, "LNK STA register (cached)");
1961
1962	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
1963	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
1964	    "A", "Link status");
1965
1966	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
1967	    &ntb->mw_count, 0, "MW count (excl. non-shared B2B register BAR)");
1968	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
1969	    &ntb->spad_count, 0, "Scratchpad count");
1970	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
1971	    &ntb->db_count, 0, "Doorbell count");
1972	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
1973	    &ntb->db_vec_count, 0, "Doorbell vector count");
1974	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
1975	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
1976
1977	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
1978	    &ntb->db_valid_mask, "Doorbell valid mask");
1979	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
1980	    &ntb->db_link_mask, "Doorbell link mask");
1981	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
1982	    &ntb->db_mask, "Doorbell mask (cached)");
1983
1984	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
1985	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
1986	regpar = SYSCTL_CHILDREN(tmptree);
1987
1988	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
1989	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1990	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
1991	    sysctl_handle_register, "QU", "Doorbell mask register");
1992	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
1993	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1994	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
1995	    sysctl_handle_register, "QU", "Doorbell register");
1996
1997	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
1998	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1999	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2000	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2001	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2002		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2003		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2004		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2005		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2006		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2007		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2008		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2009		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2010	} else {
2011		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2012		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2013		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2014		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2015	}
2016
2017	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2018	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2019	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2020	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2021	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2022		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2023		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2024		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2025		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2026		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2027		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2028		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2029		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2030	} else {
2031		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2032		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2033		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2034		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2035	}
2036
2037	if (ntb->type == NTB_ATOM)
2038		return;
2039
2040	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2041	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2042	statpar = SYSCTL_CHILDREN(tmptree);
2043	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2044	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2045	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2046	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2047
2048	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2049	    CTLFLAG_RD, NULL, "Xeon HW errors");
2050	errpar = SYSCTL_CHILDREN(tmptree);
2051
2052	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "devsts",
2053	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2054	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2055	    sysctl_handle_register, "SU", "DEVSTS");
2056	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "lnksts",
2057	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2058	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2059	    sysctl_handle_register, "SU", "LNKSTS");
2060	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2061	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2062	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2063	    sysctl_handle_register, "IU", "UNCERRSTS");
2064	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2065	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2066	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2067	    sysctl_handle_register, "IU", "CORERRSTS");
2068
2069	if (ntb->conn_type != NTB_CONN_B2B)
2070		return;
2071
2072	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2073	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2074	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2075	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2076	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2077		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2078		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2079		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2080		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2081		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2082		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2083		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2084		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2085	} else {
2086		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2087		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2088		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2089		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2090	}
2091
2092	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2093	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2094	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2095	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2096	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2097		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2098		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2099		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2100		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2101		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2102		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2103		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2104		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2105	} else {
2106		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2107		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2108		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2109		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2110	}
2111
2112	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2113	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2114	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2115	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2116	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2117	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2118	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2119	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2120	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2121		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2122		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2123		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2124		    sysctl_handle_register, "IU",
2125		    "Secondary BAR4 base register");
2126		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2127		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2128		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2129		    sysctl_handle_register, "IU",
2130		    "Secondary BAR5 base register");
2131	} else {
2132		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2133		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2134		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2135		    sysctl_handle_register, "QU",
2136		    "Secondary BAR45 base register");
2137	}
2138}
2139
2140static int
2141sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2142{
2143	struct ntb_softc *ntb;
2144	struct sbuf sb;
2145	int error;
2146
2147	error = 0;
2148	ntb = arg1;
2149
2150	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2151
2152	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2153	error = sbuf_finish(&sb);
2154	sbuf_delete(&sb);
2155
2156	if (error || !req->newptr)
2157		return (error);
2158	return (EINVAL);
2159}
2160
2161static int
2162sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2163{
2164	struct ntb_softc *ntb;
2165	struct sbuf sb;
2166	enum ntb_speed speed;
2167	enum ntb_width width;
2168	int error;
2169
2170	error = 0;
2171	ntb = arg1;
2172
2173	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2174
2175	if (ntb_link_is_up(ntb, &speed, &width))
2176		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2177		    (unsigned)speed, (unsigned)width);
2178	else
2179		sbuf_printf(&sb, "down");
2180
2181	error = sbuf_finish(&sb);
2182	sbuf_delete(&sb);
2183
2184	if (error || !req->newptr)
2185		return (error);
2186	return (EINVAL);
2187}
2188
2189static int
2190sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2191{
2192	struct ntb_softc *ntb;
2193	const void *outp;
2194	uintptr_t sz;
2195	uint64_t umv;
2196	char be[sizeof(umv)];
2197	size_t outsz;
2198	uint32_t reg;
2199	bool db, pci;
2200	int error;
2201
2202	ntb = arg1;
2203	reg = arg2 & ~NTB_REGFLAGS_MASK;
2204	sz = arg2 & NTB_REGSZ_MASK;
2205	db = (arg2 & NTB_DB_READ) != 0;
2206	pci = (arg2 & NTB_PCI_REG) != 0;
2207
2208	KASSERT(!(db && pci), ("bogus"));
2209
2210	if (db) {
2211		KASSERT(sz == NTB_REG_64, ("bogus"));
2212		umv = db_ioread(ntb, reg);
2213		outsz = sizeof(uint64_t);
2214	} else {
2215		switch (sz) {
2216		case NTB_REG_64:
2217			if (pci)
2218				umv = pci_read_config(ntb->device, reg, 8);
2219			else
2220				umv = ntb_reg_read(8, reg);
2221			outsz = sizeof(uint64_t);
2222			break;
2223		case NTB_REG_32:
2224			if (pci)
2225				umv = pci_read_config(ntb->device, reg, 4);
2226			else
2227				umv = ntb_reg_read(4, reg);
2228			outsz = sizeof(uint32_t);
2229			break;
2230		case NTB_REG_16:
2231			if (pci)
2232				umv = pci_read_config(ntb->device, reg, 2);
2233			else
2234				umv = ntb_reg_read(2, reg);
2235			outsz = sizeof(uint16_t);
2236			break;
2237		case NTB_REG_8:
2238			if (pci)
2239				umv = pci_read_config(ntb->device, reg, 1);
2240			else
2241				umv = ntb_reg_read(1, reg);
2242			outsz = sizeof(uint8_t);
2243			break;
2244		default:
2245			panic("bogus");
2246			break;
2247		}
2248	}
2249
2250	/* Encode bigendian so that sysctl -x is legible. */
2251	be64enc(be, umv);
2252	outp = ((char *)be) + sizeof(umv) - outsz;
2253
2254	error = SYSCTL_OUT(req, outp, outsz);
2255	if (error || !req->newptr)
2256		return (error);
2257	return (EINVAL);
2258}
2259
2260/*
2261 * Public API to the rest of the OS
2262 */
2263
2264/**
2265 * ntb_get_max_spads() - get the total scratch regs usable
2266 * @ntb: pointer to ntb_softc instance
2267 *
2268 * This function returns the max 32bit scratchpad registers usable by the
2269 * upper layer.
2270 *
2271 * RETURNS: total number of scratch pad registers available
2272 */
2273uint8_t
2274ntb_get_max_spads(struct ntb_softc *ntb)
2275{
2276
2277	return (ntb->spad_count);
2278}
2279
2280uint8_t
2281ntb_mw_count(struct ntb_softc *ntb)
2282{
2283
2284	return (ntb->mw_count);
2285}
2286
2287/**
2288 * ntb_spad_write() - write to the secondary scratchpad register
2289 * @ntb: pointer to ntb_softc instance
2290 * @idx: index to the scratchpad register, 0 based
2291 * @val: the data value to put into the register
2292 *
2293 * This function allows writing of a 32bit value to the indexed scratchpad
2294 * register. The register resides on the secondary (external) side.
2295 *
2296 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2297 */
2298int
2299ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2300{
2301
2302	if (idx >= ntb->spad_count)
2303		return (EINVAL);
2304
2305	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2306
2307	return (0);
2308}
2309
2310/**
2311 * ntb_spad_read() - read from the primary scratchpad register
2312 * @ntb: pointer to ntb_softc instance
2313 * @idx: index to scratchpad register, 0 based
2314 * @val: pointer to 32bit integer for storing the register value
2315 *
2316 * This function allows reading of the 32bit scratchpad register on
2317 * the primary (internal) side.
2318 *
2319 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2320 */
2321int
2322ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2323{
2324
2325	if (idx >= ntb->spad_count)
2326		return (EINVAL);
2327
2328	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2329
2330	return (0);
2331}
2332
2333/**
2334 * ntb_peer_spad_write() - write to the secondary scratchpad register
2335 * @ntb: pointer to ntb_softc instance
2336 * @idx: index to the scratchpad register, 0 based
2337 * @val: the data value to put into the register
2338 *
2339 * This function allows writing of a 32bit value to the indexed scratchpad
2340 * register. The register resides on the secondary (external) side.
2341 *
2342 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2343 */
2344int
2345ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2346{
2347
2348	if (idx >= ntb->spad_count)
2349		return (EINVAL);
2350
2351	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2352		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
2353	else
2354		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2355
2356	return (0);
2357}
2358
2359/**
2360 * ntb_peer_spad_read() - read from the primary scratchpad register
2361 * @ntb: pointer to ntb_softc instance
2362 * @idx: index to scratchpad register, 0 based
2363 * @val: pointer to 32bit integer for storing the register value
2364 *
2365 * This function allows reading of the 32bit scratchpad register on
2366 * the primary (internal) side.
2367 *
2368 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2369 */
2370int
2371ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2372{
2373
2374	if (idx >= ntb->spad_count)
2375		return (EINVAL);
2376
2377	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2378		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
2379	else
2380		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2381
2382	return (0);
2383}
2384
2385/*
2386 * ntb_mw_get_range() - get the range of a memory window
2387 * @ntb:        NTB device context
2388 * @idx:        Memory window number
2389 * @base:       OUT - the base address for mapping the memory window
2390 * @size:       OUT - the size for mapping the memory window
2391 * @align:      OUT - the base alignment for translating the memory window
2392 * @align_size: OUT - the size alignment for translating the memory window
2393 *
2394 * Get the range of a memory window.  NULL may be given for any output
2395 * parameter if the value is not needed.  The base and size may be used for
2396 * mapping the memory window, to access the peer memory.  The alignment and
2397 * size may be used for translating the memory window, for the peer to access
2398 * memory on the local system.
2399 *
2400 * Return: Zero on success, otherwise an error number.
2401 */
2402int
2403ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2404    void **vbase, size_t *size, size_t *align, size_t *align_size)
2405{
2406	struct ntb_pci_bar_info *bar;
2407	size_t bar_b2b_off;
2408
2409	if (mw_idx >= ntb_mw_count(ntb))
2410		return (EINVAL);
2411
2412	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, mw_idx)];
2413	bar_b2b_off = 0;
2414	if (mw_idx == ntb->b2b_mw_idx) {
2415		KASSERT(ntb->b2b_off != 0,
2416		    ("user shouldn't get non-shared b2b mw"));
2417		bar_b2b_off = ntb->b2b_off;
2418	}
2419
2420	if (base != NULL)
2421		*base = bar->pbase + bar_b2b_off;
2422	if (vbase != NULL)
2423		*vbase = (char *)bar->vbase + bar_b2b_off;
2424	if (size != NULL)
2425		*size = bar->size - bar_b2b_off;
2426	if (align != NULL)
2427		*align = bar->size;
2428	if (align_size != NULL)
2429		*align_size = 1;
2430	return (0);
2431}
2432
2433/*
2434 * ntb_mw_set_trans() - set the translation of a memory window
2435 * @ntb:        NTB device context
2436 * @idx:        Memory window number
2437 * @addr:       The dma address local memory to expose to the peer
2438 * @size:       The size of the local memory to expose to the peer
2439 *
2440 * Set the translation of a memory window.  The peer may access local memory
2441 * through the window starting at the address, up to the size.  The address
2442 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2443 * must be aligned to the size alignment specified by ntb_mw_get_range().
2444 *
2445 * Return: Zero on success, otherwise an error number.
2446 */
2447int
2448ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2449    size_t size)
2450{
2451	struct ntb_pci_bar_info *bar;
2452	uint64_t base, limit, reg_val;
2453	size_t bar_size, mw_size;
2454	uint32_t base_reg, xlat_reg, limit_reg;
2455	enum ntb_bar bar_num;
2456
2457	if (idx >= ntb_mw_count(ntb))
2458		return (EINVAL);
2459
2460	bar_num = ntb_mw_to_bar(ntb, idx);
2461	bar = &ntb->bar_info[bar_num];
2462
2463	bar_size = bar->size;
2464	if (idx == ntb->b2b_mw_idx)
2465		mw_size = bar_size - ntb->b2b_off;
2466	else
2467		mw_size = bar_size;
2468
2469	/* Hardware requires that addr is aligned to bar size */
2470	if ((addr & (bar_size - 1)) != 0)
2471		return (EINVAL);
2472
2473	if (size > mw_size)
2474		return (EINVAL);
2475
2476	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2477
2478	limit = 0;
2479	if (bar_is_64bit(ntb, bar_num)) {
2480		base = ntb_reg_read(8, base_reg);
2481
2482		if (limit_reg != 0 && size != mw_size)
2483			limit = base + size;
2484
2485		/* Set and verify translation address */
2486		ntb_reg_write(8, xlat_reg, addr);
2487		reg_val = ntb_reg_read(8, xlat_reg);
2488		if (reg_val != addr) {
2489			ntb_reg_write(8, xlat_reg, 0);
2490			return (EIO);
2491		}
2492
2493		/* Set and verify the limit */
2494		ntb_reg_write(8, limit_reg, limit);
2495		reg_val = ntb_reg_read(8, limit_reg);
2496		if (reg_val != limit) {
2497			ntb_reg_write(8, limit_reg, base);
2498			ntb_reg_write(8, xlat_reg, 0);
2499			return (EIO);
2500		}
2501	} else {
2502		/* Configure 32-bit (split) BAR MW */
2503
2504		if ((addr & ~UINT32_MAX) != 0)
2505			return (EINVAL);
2506		if (((addr + size) & ~UINT32_MAX) != 0)
2507			return (EINVAL);
2508
2509		base = ntb_reg_read(4, base_reg);
2510
2511		if (limit_reg != 0 && size != mw_size)
2512			limit = base + size;
2513
2514		/* Set and verify translation address */
2515		ntb_reg_write(4, xlat_reg, addr);
2516		reg_val = ntb_reg_read(4, xlat_reg);
2517		if (reg_val != addr) {
2518			ntb_reg_write(4, xlat_reg, 0);
2519			return (EIO);
2520		}
2521
2522		/* Set and verify the limit */
2523		ntb_reg_write(4, limit_reg, limit);
2524		reg_val = ntb_reg_read(4, limit_reg);
2525		if (reg_val != limit) {
2526			ntb_reg_write(4, limit_reg, base);
2527			ntb_reg_write(4, xlat_reg, 0);
2528			return (EIO);
2529		}
2530	}
2531	return (0);
2532}
2533
2534/*
2535 * ntb_mw_clear_trans() - clear the translation of a memory window
2536 * @ntb:	NTB device context
2537 * @idx:	Memory window number
2538 *
2539 * Clear the translation of a memory window.  The peer may no longer access
2540 * local memory through the window.
2541 *
2542 * Return: Zero on success, otherwise an error number.
2543 */
2544int
2545ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2546{
2547
2548	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2549}
2550
2551/**
2552 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2553 * @ntb: pointer to ntb_softc instance
2554 * @bit: doorbell bits to ring
2555 *
2556 * This function allows triggering of a doorbell on the secondary/external
2557 * side that will initiate an interrupt on the remote host
2558 */
2559void
2560ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2561{
2562
2563	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2564		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
2565		return;
2566	}
2567
2568	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2569}
2570
2571/*
2572 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2573 * as well as the size of the register (via *sz_out).
2574 *
2575 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2576 * ring to its memory window write.
2577 *
2578 * Note that writing the peer doorbell via a memory window will *not* generate
2579 * an interrupt on the remote host; that must be done seperately.
2580 */
2581bus_addr_t
2582ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2583{
2584	struct ntb_pci_bar_info *bar;
2585	uint64_t regoff;
2586
2587	KASSERT(sz_out != NULL, ("must be non-NULL"));
2588
2589	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2590		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2591		regoff = ntb->peer_reg->db_bell;
2592	} else {
2593		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2594		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2595		    ("mw_count invalid after setup"));
2596		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2597		    ("invalid b2b idx"));
2598
2599		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2600		regoff = XEON_SHADOW_PDOORBELL_OFFSET;
2601	}
2602	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2603
2604	*sz_out = ntb->reg->db_size;
2605	/* HACK: Specific to current x86 bus implementation. */
2606	return ((uint64_t)bar->pci_bus_handle + regoff);
2607}
2608
2609/*
2610 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2611 * @ntb:	NTB device context
2612 *
2613 * Hardware may support different number or arrangement of doorbell bits.
2614 *
2615 * Return: A mask of doorbell bits supported by the ntb.
2616 */
2617uint64_t
2618ntb_db_valid_mask(struct ntb_softc *ntb)
2619{
2620
2621	return (ntb->db_valid_mask);
2622}
2623
2624/*
2625 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2626 * @ntb:	NTB device context
2627 * @vector:	Doorbell vector number
2628 *
2629 * Each interrupt vector may have a different number or arrangement of bits.
2630 *
2631 * Return: A mask of doorbell bits serviced by a vector.
2632 */
2633uint64_t
2634ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2635{
2636
2637	if (vector > ntb->db_vec_count)
2638		return (0);
2639	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2640}
2641
2642/**
2643 * ntb_link_is_up() - get the current ntb link state
2644 * @ntb:        NTB device context
2645 * @speed:      OUT - The link speed expressed as PCIe generation number
2646 * @width:      OUT - The link width expressed as the number of PCIe lanes
2647 *
2648 * RETURNS: true or false based on the hardware link state
2649 */
2650bool
2651ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2652    enum ntb_width *width)
2653{
2654
2655	if (speed != NULL)
2656		*speed = ntb_link_sta_speed(ntb);
2657	if (width != NULL)
2658		*width = ntb_link_sta_width(ntb);
2659	return (link_is_up(ntb));
2660}
2661
2662static void
2663save_bar_parameters(struct ntb_pci_bar_info *bar)
2664{
2665
2666	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2667	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2668	bar->pbase = rman_get_start(bar->pci_resource);
2669	bar->size = rman_get_size(bar->pci_resource);
2670	bar->vbase = rman_get_virtual(bar->pci_resource);
2671}
2672
2673device_t
2674ntb_get_device(struct ntb_softc *ntb)
2675{
2676
2677	return (ntb->device);
2678}
2679
2680/* Export HW-specific errata information. */
2681bool
2682ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2683{
2684
2685	return (HAS_FEATURE(feature));
2686}
2687