ntb_hw_intel.c revision 290681
1144513Simp/*-
2144611Simp * Copyright (C) 2013 Intel Corporation
3144611Simp * Copyright (C) 2015 EMC Corporation
4144611Simp * All rights reserved.
5144611Simp *
6144611Simp * Redistribution and use in source and binary forms, with or without
7144611Simp * modification, are permitted provided that the following conditions
8144611Simp * are met:
9144611Simp * 1. Redistributions of source code must retain the above copyright
10144611Simp *    notice, this list of conditions and the following disclaimer.
11144611Simp * 2. Redistributions in binary form must reproduce the above copyright
12144611Simp *    notice, this list of conditions and the following disclaimer in the
13144611Simp *    documentation and/or other materials provided with the distribution.
14144611Simp *
15144611Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16144611Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17144611Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18144611Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19144611Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20144611Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21144611Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22144611Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23144611Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24144611Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25144611Simp * SUCH DAMAGE.
26144513Simp */
27144513Simp
28144611Simp#include <sys/cdefs.h>
29144611Simp__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 290681 2015-11-11 18:55:16Z cem $");
30144611Simp
31144611Simp#include <sys/param.h>
32144611Simp#include <sys/kernel.h>
33144611Simp#include <sys/systm.h>
34144611Simp#include <sys/bus.h>
35215140Sjkim#include <sys/endian.h>
36144611Simp#include <sys/malloc.h>
37144611Simp#include <sys/module.h>
38144611Simp#include <sys/queue.h>
39144611Simp#include <sys/rman.h>
40144611Simp#include <sys/sbuf.h>
41144611Simp#include <sys/sysctl.h>
42144611Simp#include <vm/vm.h>
43144611Simp#include <vm/pmap.h>
44144611Simp#include <machine/bus.h>
45144611Simp#include <machine/pmap.h>
46144611Simp#include <machine/resource.h>
47144611Simp#include <dev/pci/pcireg.h>
48144611Simp#include <dev/pci/pcivar.h>
49144611Simp
50144611Simp#include "ntb_regs.h"
51144611Simp#include "ntb_hw.h"
52144611Simp
53144611Simp/*
54144611Simp * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55144611Simp * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69
70#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
71
72enum ntb_device_type {
73	NTB_XEON,
74	NTB_ATOM
75};
76
77/* ntb_conn_type are hardware numbers, cannot change. */
78enum ntb_conn_type {
79	NTB_CONN_TRANSPARENT = 0,
80	NTB_CONN_B2B = 1,
81	NTB_CONN_RP = 2,
82};
83
84enum ntb_b2b_direction {
85	NTB_DEV_USD = 0,
86	NTB_DEV_DSD = 1,
87};
88
89enum ntb_bar {
90	NTB_CONFIG_BAR = 0,
91	NTB_B2B_BAR_1,
92	NTB_B2B_BAR_2,
93	NTB_B2B_BAR_3,
94	NTB_MAX_BARS
95};
96
97/* Device features and workarounds */
98#define HAS_FEATURE(feature)	\
99	((ntb->features & (feature)) != 0)
100
101struct ntb_hw_info {
102	uint32_t		device_id;
103	const char		*desc;
104	enum ntb_device_type	type;
105	uint32_t		features;
106};
107
108struct ntb_pci_bar_info {
109	bus_space_tag_t		pci_bus_tag;
110	bus_space_handle_t	pci_bus_handle;
111	int			pci_resource_id;
112	struct resource		*pci_resource;
113	vm_paddr_t		pbase;
114	caddr_t			vbase;
115	vm_size_t		size;
116
117	/* Configuration register offsets */
118	uint32_t		psz_off;
119	uint32_t		ssz_off;
120	uint32_t		pbarxlat_off;
121};
122
123struct ntb_int_info {
124	struct resource	*res;
125	int		rid;
126	void		*tag;
127};
128
129struct ntb_vec {
130	struct ntb_softc	*ntb;
131	uint32_t		num;
132};
133
134struct ntb_reg {
135	uint32_t	ntb_ctl;
136	uint32_t	lnk_sta;
137	uint8_t		db_size;
138	unsigned	mw_bar[NTB_MAX_BARS];
139};
140
141struct ntb_alt_reg {
142	uint32_t	db_bell;
143	uint32_t	db_mask;
144	uint32_t	spad;
145};
146
147struct ntb_xlat_reg {
148	uint32_t	bar0_base;
149	uint32_t	bar2_base;
150	uint32_t	bar4_base;
151	uint32_t	bar5_base;
152
153	uint32_t	bar2_xlat;
154	uint32_t	bar4_xlat;
155	uint32_t	bar5_xlat;
156
157	uint32_t	bar2_limit;
158	uint32_t	bar4_limit;
159	uint32_t	bar5_limit;
160};
161
162struct ntb_b2b_addr {
163	uint64_t	bar0_addr;
164	uint64_t	bar2_addr64;
165	uint64_t	bar4_addr64;
166	uint64_t	bar4_addr32;
167	uint64_t	bar5_addr32;
168};
169
170struct ntb_softc {
171	device_t		device;
172	enum ntb_device_type	type;
173	uint32_t		features;
174
175	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
176	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
177	uint32_t		allocated_interrupts;
178
179	struct callout		heartbeat_timer;
180	struct callout		lr_timer;
181
182	void			*ntb_ctx;
183	const struct ntb_ctx_ops *ctx_ops;
184	struct ntb_vec		*msix_vec;
185#define CTX_LOCK(sc)		mtx_lock_spin(&(sc)->ctx_lock)
186#define CTX_UNLOCK(sc)		mtx_unlock_spin(&(sc)->ctx_lock)
187#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
188	struct mtx		ctx_lock;
189
190	uint32_t		ppd;
191	enum ntb_conn_type	conn_type;
192	enum ntb_b2b_direction	dev_type;
193
194	/* Offset of peer bar0 in B2B BAR */
195	uint64_t			b2b_off;
196	/* Memory window used to access peer bar0 */
197#define B2B_MW_DISABLED			UINT8_MAX
198	uint8_t				b2b_mw_idx;
199
200	uint8_t				mw_count;
201	uint8_t				spad_count;
202	uint8_t				db_count;
203	uint8_t				db_vec_count;
204	uint8_t				db_vec_shift;
205
206	/* Protects local db_mask. */
207#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
208#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
209#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
210	struct mtx			db_mask_lock;
211
212	uint32_t			ntb_ctl;
213	uint32_t			lnk_sta;
214
215	uint64_t			db_valid_mask;
216	uint64_t			db_link_mask;
217	uint64_t			db_mask;
218
219	int				last_ts;	/* ticks @ last irq */
220
221	const struct ntb_reg		*reg;
222	const struct ntb_alt_reg	*self_reg;
223	const struct ntb_alt_reg	*peer_reg;
224	const struct ntb_xlat_reg	*xlat_reg;
225};
226
227#ifdef __i386__
228static __inline uint64_t
229bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
230    bus_size_t offset)
231{
232
233	return (bus_space_read_4(tag, handle, offset) |
234	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
235}
236
237static __inline void
238bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
239    bus_size_t offset, uint64_t val)
240{
241
242	bus_space_write_4(tag, handle, offset, val);
243	bus_space_write_4(tag, handle, offset + 4, val >> 32);
244}
245#endif
246
247#define ntb_bar_read(SIZE, bar, offset) \
248	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
249	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
250#define ntb_bar_write(SIZE, bar, offset, val) \
251	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
252	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
253#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
254#define ntb_reg_write(SIZE, offset, val) \
255	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
256#define ntb_mw_read(SIZE, offset) \
257	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
258#define ntb_mw_write(SIZE, offset, val) \
259	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
260		offset, val)
261
262static int ntb_probe(device_t device);
263static int ntb_attach(device_t device);
264static int ntb_detach(device_t device);
265static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
266static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
267static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
268    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
269static int ntb_map_pci_bars(struct ntb_softc *ntb);
270static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
271    const char *);
272static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
273static int map_memory_window_bar(struct ntb_softc *ntb,
274    struct ntb_pci_bar_info *bar);
275static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
276static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
277static int ntb_init_isr(struct ntb_softc *ntb);
278static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
279static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
280static void ntb_teardown_interrupts(struct ntb_softc *ntb);
281static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
282static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
283static void ndev_vec_isr(void *arg);
284static void ndev_irq_isr(void *arg);
285static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
286static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
287static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
288static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
289static void ntb_free_msix_vec(struct ntb_softc *ntb);
290static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
291static void ntb_detect_max_mw(struct ntb_softc *ntb);
292static int ntb_detect_xeon(struct ntb_softc *ntb);
293static int ntb_detect_atom(struct ntb_softc *ntb);
294static int ntb_xeon_init_dev(struct ntb_softc *ntb);
295static int ntb_atom_init_dev(struct ntb_softc *ntb);
296static void ntb_teardown_xeon(struct ntb_softc *ntb);
297static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
298static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
299    enum ntb_bar regbar);
300static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
301    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
302static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
303    enum ntb_bar idx);
304static int xeon_setup_b2b_mw(struct ntb_softc *,
305    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
306static inline bool link_is_up(struct ntb_softc *ntb);
307static inline bool atom_link_is_err(struct ntb_softc *ntb);
308static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
309static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
310static void atom_link_hb(void *arg);
311static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
312static void recover_atom_link(void *arg);
313static bool ntb_poll_link(struct ntb_softc *ntb);
314static void save_bar_parameters(struct ntb_pci_bar_info *bar);
315static void ntb_sysctl_init(struct ntb_softc *);
316static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
317static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
318static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
319
320static struct ntb_hw_info pci_ids[] = {
321	/* XXX: PS/SS IDs left out until they are supported. */
322	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
323		NTB_ATOM, 0 },
324
325	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
326		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
327	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
328		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
329	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
330		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
331		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
332	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
333		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
334		    NTB_SB01BASE_LOCKUP },
335	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
336		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
337		    NTB_SB01BASE_LOCKUP },
338
339	{ 0x00000000, NULL, NTB_ATOM, 0 }
340};
341
342static const struct ntb_reg atom_reg = {
343	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
344	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
345	.db_size = sizeof(uint64_t),
346	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
347};
348
349static const struct ntb_alt_reg atom_pri_reg = {
350	.db_bell = ATOM_PDOORBELL_OFFSET,
351	.db_mask = ATOM_PDBMSK_OFFSET,
352	.spad = ATOM_SPAD_OFFSET,
353};
354
355static const struct ntb_alt_reg atom_b2b_reg = {
356	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
357	.spad = ATOM_B2B_SPAD_OFFSET,
358};
359
360static const struct ntb_xlat_reg atom_sec_xlat = {
361#if 0
362	/* "FIXME" says the Linux driver. */
363	.bar0_base = ATOM_SBAR0BASE_OFFSET,
364	.bar2_base = ATOM_SBAR2BASE_OFFSET,
365	.bar4_base = ATOM_SBAR4BASE_OFFSET,
366
367	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
368	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
369#endif
370
371	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
372	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
373};
374
375static const struct ntb_reg xeon_reg = {
376	.ntb_ctl = XEON_NTBCNTL_OFFSET,
377	.lnk_sta = XEON_LINK_STATUS_OFFSET,
378	.db_size = sizeof(uint16_t),
379	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
380};
381
382static const struct ntb_alt_reg xeon_pri_reg = {
383	.db_bell = XEON_PDOORBELL_OFFSET,
384	.db_mask = XEON_PDBMSK_OFFSET,
385	.spad = XEON_SPAD_OFFSET,
386};
387
388static const struct ntb_alt_reg xeon_b2b_reg = {
389	.db_bell = XEON_B2B_DOORBELL_OFFSET,
390	.spad = XEON_B2B_SPAD_OFFSET,
391};
392
393static const struct ntb_xlat_reg xeon_sec_xlat = {
394	.bar0_base = XEON_SBAR0BASE_OFFSET,
395	.bar2_base = XEON_SBAR2BASE_OFFSET,
396	.bar4_base = XEON_SBAR4BASE_OFFSET,
397	.bar5_base = XEON_SBAR5BASE_OFFSET,
398
399	.bar2_limit = XEON_SBAR2LMT_OFFSET,
400	.bar4_limit = XEON_SBAR4LMT_OFFSET,
401	.bar5_limit = XEON_SBAR5LMT_OFFSET,
402
403	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
404	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
405	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
406};
407
408static struct ntb_b2b_addr xeon_b2b_usd_addr = {
409	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
410	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
411	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
412	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
413	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
414};
415
416static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
417	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
418	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
419	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
420	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
421	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
422};
423
424SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
425    "B2B MW segment overrides -- MUST be the same on both sides");
426
427SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
428    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
429    "hardware, use this 64-bit address on the bus between the NTB devices for "
430    "the window at BAR2, on the upstream side of the link.  MUST be the same "
431    "address on both sides.");
432SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
433    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
434SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
435    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
436    "(split-BAR mode).");
437SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
438    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
439    "(split-BAR mode).");
440
441SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
442    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
443    "hardware, use this 64-bit address on the bus between the NTB devices for "
444    "the window at BAR2, on the downstream side of the link.  MUST be the same"
445    " address on both sides.");
446SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
447    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
448SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
449    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
450    "(split-BAR mode).");
451SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
452    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
453    "(split-BAR mode).");
454
455/*
456 * OS <-> Driver interface structures
457 */
458MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
459
460static device_method_t ntb_pci_methods[] = {
461	/* Device interface */
462	DEVMETHOD(device_probe,     ntb_probe),
463	DEVMETHOD(device_attach,    ntb_attach),
464	DEVMETHOD(device_detach,    ntb_detach),
465	DEVMETHOD_END
466};
467
468static driver_t ntb_pci_driver = {
469	"ntb_hw",
470	ntb_pci_methods,
471	sizeof(struct ntb_softc),
472};
473
474static devclass_t ntb_devclass;
475DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
476MODULE_VERSION(ntb_hw, 1);
477
478SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
479
480/*
481 * OS <-> Driver linkage functions
482 */
483static int
484ntb_probe(device_t device)
485{
486	struct ntb_hw_info *p;
487
488	p = ntb_get_device_info(pci_get_devid(device));
489	if (p == NULL)
490		return (ENXIO);
491
492	device_set_desc(device, p->desc);
493	return (0);
494}
495
496static int
497ntb_attach(device_t device)
498{
499	struct ntb_softc *ntb;
500	struct ntb_hw_info *p;
501	int error;
502
503	ntb = DEVICE2SOFTC(device);
504	p = ntb_get_device_info(pci_get_devid(device));
505
506	ntb->device = device;
507	ntb->type = p->type;
508	ntb->features = p->features;
509	ntb->b2b_mw_idx = B2B_MW_DISABLED;
510
511	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
512	callout_init(&ntb->heartbeat_timer, 1);
513	callout_init(&ntb->lr_timer, 1);
514	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
515	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_SPIN);
516
517	if (ntb->type == NTB_ATOM)
518		error = ntb_detect_atom(ntb);
519	else
520		error = ntb_detect_xeon(ntb);
521	if (error)
522		goto out;
523
524	ntb_detect_max_mw(ntb);
525
526	error = ntb_map_pci_bars(ntb);
527	if (error)
528		goto out;
529	if (ntb->type == NTB_ATOM)
530		error = ntb_atom_init_dev(ntb);
531	else
532		error = ntb_xeon_init_dev(ntb);
533	if (error)
534		goto out;
535	error = ntb_init_isr(ntb);
536	if (error)
537		goto out;
538	ntb_sysctl_init(ntb);
539
540	pci_enable_busmaster(ntb->device);
541
542out:
543	if (error != 0)
544		ntb_detach(device);
545	return (error);
546}
547
548static int
549ntb_detach(device_t device)
550{
551	struct ntb_softc *ntb;
552
553	ntb = DEVICE2SOFTC(device);
554
555	if (ntb->self_reg != NULL)
556		ntb_db_set_mask(ntb, ntb->db_valid_mask);
557	callout_drain(&ntb->heartbeat_timer);
558	callout_drain(&ntb->lr_timer);
559	if (ntb->type == NTB_XEON)
560		ntb_teardown_xeon(ntb);
561	ntb_teardown_interrupts(ntb);
562
563	mtx_destroy(&ntb->db_mask_lock);
564	mtx_destroy(&ntb->ctx_lock);
565
566	/*
567	 * Redetect total MWs so we unmap properly -- in case we lowered the
568	 * maximum to work around Xeon errata.
569	 */
570	ntb_detect_max_mw(ntb);
571	ntb_unmap_pci_bar(ntb);
572
573	return (0);
574}
575
576/*
577 * Driver internal routines
578 */
579static inline enum ntb_bar
580ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
581{
582
583	KASSERT(mw < ntb->mw_count ||
584	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
585	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
586	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
587
588	return (ntb->reg->mw_bar[mw]);
589}
590
591static inline bool
592bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
593{
594	/* XXX This assertion could be stronger. */
595	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
596	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
597}
598
599static inline void
600bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
601    uint32_t *xlat, uint32_t *lmt)
602{
603	uint32_t basev, lmtv, xlatv;
604
605	switch (bar) {
606	case NTB_B2B_BAR_1:
607		basev = ntb->xlat_reg->bar2_base;
608		lmtv = ntb->xlat_reg->bar2_limit;
609		xlatv = ntb->xlat_reg->bar2_xlat;
610		break;
611	case NTB_B2B_BAR_2:
612		basev = ntb->xlat_reg->bar4_base;
613		lmtv = ntb->xlat_reg->bar4_limit;
614		xlatv = ntb->xlat_reg->bar4_xlat;
615		break;
616	case NTB_B2B_BAR_3:
617		basev = ntb->xlat_reg->bar5_base;
618		lmtv = ntb->xlat_reg->bar5_limit;
619		xlatv = ntb->xlat_reg->bar5_xlat;
620		break;
621	default:
622		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
623		    ("bad bar"));
624		basev = lmtv = xlatv = 0;
625		break;
626	}
627
628	if (base != NULL)
629		*base = basev;
630	if (xlat != NULL)
631		*xlat = xlatv;
632	if (lmt != NULL)
633		*lmt = lmtv;
634}
635
636static int
637ntb_map_pci_bars(struct ntb_softc *ntb)
638{
639	int rc;
640
641	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
642	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
643	if (rc != 0)
644		goto out;
645
646	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
647	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
648	if (rc != 0)
649		goto out;
650	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
651	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
652	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
653
654	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
655	/* XXX Are shared MW B2Bs write-combining? */
656	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
657		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
658	else
659		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
660	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
661	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
662	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
663
664	if (!HAS_FEATURE(NTB_SPLIT_BAR))
665		goto out;
666
667	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
668	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
669		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
670	else
671		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
672	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
673	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
674	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
675
676out:
677	if (rc != 0)
678		device_printf(ntb->device,
679		    "unable to allocate pci resource\n");
680	return (rc);
681}
682
683static void
684print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
685    const char *kind)
686{
687
688	device_printf(ntb->device,
689	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
690	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
691	    (char *)bar->vbase + bar->size - 1,
692	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
693	    (uintmax_t)bar->size, kind);
694}
695
696static int
697map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
698{
699
700	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
701	    &bar->pci_resource_id, RF_ACTIVE);
702	if (bar->pci_resource == NULL)
703		return (ENXIO);
704
705	save_bar_parameters(bar);
706	print_map_success(ntb, bar, "mmr");
707	return (0);
708}
709
710static int
711map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
712{
713	int rc;
714	uint8_t bar_size_bits = 0;
715
716	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
717	    &bar->pci_resource_id, RF_ACTIVE);
718
719	if (bar->pci_resource == NULL)
720		return (ENXIO);
721
722	save_bar_parameters(bar);
723	/*
724	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
725	 * hardware issue. To work around this, query the size it should be
726	 * configured to by the device and modify the resource to correspond to
727	 * this new size. The BIOS on systems with this problem is required to
728	 * provide enough address space to allow the driver to make this change
729	 * safely.
730	 *
731	 * Ideally I could have just specified the size when I allocated the
732	 * resource like:
733	 *  bus_alloc_resource(ntb->device,
734	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
735	 *	1ul << bar_size_bits, RF_ACTIVE);
736	 * but the PCI driver does not honor the size in this call, so we have
737	 * to modify it after the fact.
738	 */
739	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
740		if (bar->pci_resource_id == PCIR_BAR(2))
741			bar_size_bits = pci_read_config(ntb->device,
742			    XEON_PBAR23SZ_OFFSET, 1);
743		else
744			bar_size_bits = pci_read_config(ntb->device,
745			    XEON_PBAR45SZ_OFFSET, 1);
746
747		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
748		    bar->pci_resource, bar->pbase,
749		    bar->pbase + (1ul << bar_size_bits) - 1);
750		if (rc != 0) {
751			device_printf(ntb->device,
752			    "unable to resize bar\n");
753			return (rc);
754		}
755
756		save_bar_parameters(bar);
757	}
758
759	/* Mark bar region as write combining to improve performance. */
760	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
761	    VM_MEMATTR_WRITE_COMBINING);
762	print_map_success(ntb, bar, "mw");
763	if (rc == 0)
764		device_printf(ntb->device,
765		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
766		    "WRITE_COMBINING.\n",
767		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
768		    (char *)bar->vbase + bar->size - 1,
769		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1));
770	else
771		device_printf(ntb->device,
772		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
773		    "WRITE_COMBINING: %d\n",
774		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
775		    (char *)bar->vbase + bar->size - 1,
776		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
777		    rc);
778		/* Proceed anyway */
779	return (0);
780}
781
782static void
783ntb_unmap_pci_bar(struct ntb_softc *ntb)
784{
785	struct ntb_pci_bar_info *current_bar;
786	int i;
787
788	for (i = 0; i < NTB_MAX_BARS; i++) {
789		current_bar = &ntb->bar_info[i];
790		if (current_bar->pci_resource != NULL)
791			bus_release_resource(ntb->device, SYS_RES_MEMORY,
792			    current_bar->pci_resource_id,
793			    current_bar->pci_resource);
794	}
795}
796
797static int
798ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
799{
800	uint32_t i;
801	int rc;
802
803	for (i = 0; i < num_vectors; i++) {
804		ntb->int_info[i].rid = i + 1;
805		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
806		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
807		if (ntb->int_info[i].res == NULL) {
808			device_printf(ntb->device,
809			    "bus_alloc_resource failed\n");
810			return (ENOMEM);
811		}
812		ntb->int_info[i].tag = NULL;
813		ntb->allocated_interrupts++;
814		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
815		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
816		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
817		if (rc != 0) {
818			device_printf(ntb->device, "bus_setup_intr failed\n");
819			return (ENXIO);
820		}
821	}
822	return (0);
823}
824
825/*
826 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
827 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
828 * should be okay.  This tunable should enable us to test that hypothesis
829 * when someone gets their hands on some Xeon hardware.
830 */
831static int ntb_force_remap_mode;
832SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
833    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
834    " to a smaller number of ithreads, even if the desired number are "
835    "available");
836
837/*
838 * In case it is NOT ok, give consumers an abort button.
839 */
840static int ntb_prefer_intx;
841SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
842    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
843    "than remapping MSI-X messages over available slots (match Linux driver "
844    "behavior)");
845
846/*
847 * Remap the desired number of MSI-X messages to available ithreads in a simple
848 * round-robin fashion.
849 */
850static int
851ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
852{
853	u_int *vectors;
854	uint32_t i;
855	int rc;
856
857	if (ntb_prefer_intx != 0)
858		return (ENXIO);
859
860	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
861
862	for (i = 0; i < desired; i++)
863		vectors[i] = (i % avail) + 1;
864
865	rc = pci_remap_msix(dev, desired, vectors);
866	free(vectors, M_NTB);
867	return (rc);
868}
869
870static int
871ntb_init_isr(struct ntb_softc *ntb)
872{
873	uint32_t desired_vectors, num_vectors;
874	int rc;
875
876	ntb->allocated_interrupts = 0;
877	ntb->last_ts = ticks;
878
879	/*
880	 * Mask all doorbell interrupts.
881	 */
882	ntb_db_set_mask(ntb, ntb->db_valid_mask);
883
884	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
885	    ntb->db_count);
886	if (desired_vectors >= 1) {
887		rc = pci_alloc_msix(ntb->device, &num_vectors);
888
889		if (ntb_force_remap_mode != 0 && rc == 0 &&
890		    num_vectors == desired_vectors)
891			num_vectors--;
892
893		if (rc == 0 && num_vectors < desired_vectors) {
894			rc = ntb_remap_msix(ntb->device, desired_vectors,
895			    num_vectors);
896			if (rc == 0)
897				num_vectors = desired_vectors;
898			else
899				pci_release_msi(ntb->device);
900		}
901		if (rc != 0)
902			num_vectors = 1;
903	} else
904		num_vectors = 1;
905
906	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
907		ntb->db_vec_count = 1;
908		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
909		rc = ntb_setup_legacy_interrupt(ntb);
910	} else {
911		ntb_create_msix_vec(ntb, num_vectors);
912		rc = ntb_setup_msix(ntb, num_vectors);
913	}
914	if (rc != 0) {
915		device_printf(ntb->device,
916		    "Error allocating interrupts: %d\n", rc);
917		ntb_free_msix_vec(ntb);
918	}
919
920	return (rc);
921}
922
923static int
924ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
925{
926	int rc;
927
928	ntb->int_info[0].rid = 0;
929	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
930	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
931	if (ntb->int_info[0].res == NULL) {
932		device_printf(ntb->device, "bus_alloc_resource failed\n");
933		return (ENOMEM);
934	}
935
936	ntb->int_info[0].tag = NULL;
937	ntb->allocated_interrupts = 1;
938
939	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
940	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
941	    ntb, &ntb->int_info[0].tag);
942	if (rc != 0) {
943		device_printf(ntb->device, "bus_setup_intr failed\n");
944		return (ENXIO);
945	}
946
947	return (0);
948}
949
950static void
951ntb_teardown_interrupts(struct ntb_softc *ntb)
952{
953	struct ntb_int_info *current_int;
954	int i;
955
956	for (i = 0; i < ntb->allocated_interrupts; i++) {
957		current_int = &ntb->int_info[i];
958		if (current_int->tag != NULL)
959			bus_teardown_intr(ntb->device, current_int->res,
960			    current_int->tag);
961
962		if (current_int->res != NULL)
963			bus_release_resource(ntb->device, SYS_RES_IRQ,
964			    rman_get_rid(current_int->res), current_int->res);
965	}
966
967	ntb_free_msix_vec(ntb);
968	pci_release_msi(ntb->device);
969}
970
971/*
972 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
973 * out to make code clearer.
974 */
975static inline uint64_t
976db_ioread(struct ntb_softc *ntb, uint64_t regoff)
977{
978
979	if (ntb->type == NTB_ATOM)
980		return (ntb_reg_read(8, regoff));
981
982	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
983
984	return (ntb_reg_read(2, regoff));
985}
986
987static inline void
988db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
989{
990
991	KASSERT((val & ~ntb->db_valid_mask) == 0,
992	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
993	     (uintmax_t)(val & ~ntb->db_valid_mask),
994	     (uintmax_t)ntb->db_valid_mask));
995
996	if (regoff == ntb->self_reg->db_mask)
997		DB_MASK_ASSERT(ntb, MA_OWNED);
998	db_iowrite_raw(ntb, regoff, val);
999}
1000
1001static inline void
1002db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
1003{
1004
1005	if (ntb->type == NTB_ATOM) {
1006		ntb_reg_write(8, regoff, val);
1007		return;
1008	}
1009
1010	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1011	ntb_reg_write(2, regoff, (uint16_t)val);
1012}
1013
1014void
1015ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1016{
1017
1018	DB_MASK_LOCK(ntb);
1019	ntb->db_mask |= bits;
1020	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1021	DB_MASK_UNLOCK(ntb);
1022}
1023
1024void
1025ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1026{
1027
1028	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1029	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1030	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1031	     (uintmax_t)ntb->db_valid_mask));
1032
1033	DB_MASK_LOCK(ntb);
1034	ntb->db_mask &= ~bits;
1035	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1036	DB_MASK_UNLOCK(ntb);
1037}
1038
1039uint64_t
1040ntb_db_read(struct ntb_softc *ntb)
1041{
1042
1043	return (db_ioread(ntb, ntb->self_reg->db_bell));
1044}
1045
1046void
1047ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1048{
1049
1050	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1051	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1052	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1053	     (uintmax_t)ntb->db_valid_mask));
1054
1055	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1056}
1057
1058static inline uint64_t
1059ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1060{
1061	uint64_t shift, mask;
1062
1063	shift = ntb->db_vec_shift;
1064	mask = (1ull << shift) - 1;
1065	return (mask << (shift * db_vector));
1066}
1067
1068static void
1069ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1070{
1071	uint64_t vec_mask;
1072
1073	ntb->last_ts = ticks;
1074	vec_mask = ntb_vec_mask(ntb, vec);
1075
1076	if ((vec_mask & ntb->db_link_mask) != 0) {
1077		if (ntb_poll_link(ntb))
1078			ntb_link_event(ntb);
1079	}
1080
1081	if ((vec_mask & ntb->db_valid_mask) != 0)
1082		ntb_db_event(ntb, vec);
1083}
1084
1085static void
1086ndev_vec_isr(void *arg)
1087{
1088	struct ntb_vec *nvec = arg;
1089
1090	ntb_interrupt(nvec->ntb, nvec->num);
1091}
1092
1093static void
1094ndev_irq_isr(void *arg)
1095{
1096	/* If we couldn't set up MSI-X, we only have the one vector. */
1097	ntb_interrupt(arg, 0);
1098}
1099
1100static int
1101ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1102{
1103	uint32_t i;
1104
1105	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1106	    M_ZERO | M_WAITOK);
1107	for (i = 0; i < num_vectors; i++) {
1108		ntb->msix_vec[i].num = i;
1109		ntb->msix_vec[i].ntb = ntb;
1110	}
1111
1112	return (0);
1113}
1114
1115static void
1116ntb_free_msix_vec(struct ntb_softc *ntb)
1117{
1118
1119	if (ntb->msix_vec == NULL)
1120		return;
1121
1122	free(ntb->msix_vec, M_NTB);
1123	ntb->msix_vec = NULL;
1124}
1125
1126static struct ntb_hw_info *
1127ntb_get_device_info(uint32_t device_id)
1128{
1129	struct ntb_hw_info *ep = pci_ids;
1130
1131	while (ep->device_id) {
1132		if (ep->device_id == device_id)
1133			return (ep);
1134		++ep;
1135	}
1136	return (NULL);
1137}
1138
1139static void
1140ntb_teardown_xeon(struct ntb_softc *ntb)
1141{
1142
1143	if (ntb->reg != NULL)
1144		ntb_link_disable(ntb);
1145}
1146
1147static void
1148ntb_detect_max_mw(struct ntb_softc *ntb)
1149{
1150
1151	if (ntb->type == NTB_ATOM) {
1152		ntb->mw_count = ATOM_MW_COUNT;
1153		return;
1154	}
1155
1156	if (HAS_FEATURE(NTB_SPLIT_BAR))
1157		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1158	else
1159		ntb->mw_count = XEON_SNB_MW_COUNT;
1160}
1161
1162static int
1163ntb_detect_xeon(struct ntb_softc *ntb)
1164{
1165	uint8_t ppd, conn_type;
1166
1167	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1168	ntb->ppd = ppd;
1169
1170	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1171		ntb->dev_type = NTB_DEV_DSD;
1172	else
1173		ntb->dev_type = NTB_DEV_USD;
1174
1175	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1176		ntb->features |= NTB_SPLIT_BAR;
1177
1178	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1179	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1180		ntb->features |= NTB_SDOORBELL_LOCKUP;
1181
1182	conn_type = ppd & XEON_PPD_CONN_TYPE;
1183	switch (conn_type) {
1184	case NTB_CONN_B2B:
1185		ntb->conn_type = conn_type;
1186		break;
1187	case NTB_CONN_RP:
1188	case NTB_CONN_TRANSPARENT:
1189	default:
1190		device_printf(ntb->device, "Unsupported connection type: %u\n",
1191		    (unsigned)conn_type);
1192		return (ENXIO);
1193	}
1194	return (0);
1195}
1196
1197static int
1198ntb_detect_atom(struct ntb_softc *ntb)
1199{
1200	uint32_t ppd, conn_type;
1201
1202	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1203	ntb->ppd = ppd;
1204
1205	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1206		ntb->dev_type = NTB_DEV_DSD;
1207	else
1208		ntb->dev_type = NTB_DEV_USD;
1209
1210	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1211	switch (conn_type) {
1212	case NTB_CONN_B2B:
1213		ntb->conn_type = conn_type;
1214		break;
1215	default:
1216		device_printf(ntb->device, "Unsupported NTB configuration\n");
1217		return (ENXIO);
1218	}
1219	return (0);
1220}
1221
1222static int
1223ntb_xeon_init_dev(struct ntb_softc *ntb)
1224{
1225	int rc;
1226
1227	ntb->spad_count		= XEON_SPAD_COUNT;
1228	ntb->db_count		= XEON_DB_COUNT;
1229	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1230	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1231	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1232
1233	if (ntb->conn_type != NTB_CONN_B2B) {
1234		device_printf(ntb->device, "Connection type %d not supported\n",
1235		    ntb->conn_type);
1236		return (ENXIO);
1237	}
1238
1239	ntb->reg = &xeon_reg;
1240	ntb->self_reg = &xeon_pri_reg;
1241	ntb->peer_reg = &xeon_b2b_reg;
1242	ntb->xlat_reg = &xeon_sec_xlat;
1243
1244	/*
1245	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1246	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1247	 * which may hang the system.  To workaround this use the second memory
1248	 * window to access the interrupt and scratch pad registers on the
1249	 * remote system.
1250	 */
1251	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1252		/* Use the last MW for mapping remote spad */
1253		ntb->b2b_mw_idx = ntb->mw_count - 1;
1254	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1255		/*
1256		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1257		 * mirrored to the remote system.  Shrink the number of bits by one,
1258		 * since bit 14 is the last bit.
1259		 *
1260		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1261		 * anyway.  Nor for non-B2B connection types.
1262		 */
1263		ntb->db_count = XEON_DB_COUNT - 1;
1264
1265	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1266
1267	if (ntb->dev_type == NTB_DEV_USD)
1268		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1269		    &xeon_b2b_usd_addr);
1270	else
1271		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1272		    &xeon_b2b_dsd_addr);
1273	if (rc != 0)
1274		return (rc);
1275
1276	/* Enable Bus Master and Memory Space on the secondary side */
1277	ntb_reg_write(2, XEON_PCICMD_OFFSET,
1278	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1279
1280	/* Enable link training */
1281	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1282
1283	return (0);
1284}
1285
1286static int
1287ntb_atom_init_dev(struct ntb_softc *ntb)
1288{
1289
1290	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1291	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1292
1293	ntb->spad_count		 = ATOM_SPAD_COUNT;
1294	ntb->db_count		 = ATOM_DB_COUNT;
1295	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1296	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1297	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1298
1299	ntb->reg = &atom_reg;
1300	ntb->self_reg = &atom_pri_reg;
1301	ntb->peer_reg = &atom_b2b_reg;
1302	ntb->xlat_reg = &atom_sec_xlat;
1303
1304	/*
1305	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1306	 * resolved.  Mask transaction layer internal parity errors.
1307	 */
1308	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1309
1310	configure_atom_secondary_side_bars(ntb);
1311
1312	/* Enable Bus Master and Memory Space on the secondary side */
1313	ntb_reg_write(2, ATOM_PCICMD_OFFSET,
1314	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1315
1316	/* Initiate PCI-E link training */
1317	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1318
1319	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1320
1321	return (0);
1322}
1323
1324/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1325static void
1326configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1327{
1328
1329	if (ntb->dev_type == NTB_DEV_USD) {
1330		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1331		    XEON_B2B_BAR2_DSD_ADDR64);
1332		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1333		    XEON_B2B_BAR4_DSD_ADDR64);
1334		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1335		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1336	} else {
1337		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1338		    XEON_B2B_BAR2_USD_ADDR64);
1339		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1340		    XEON_B2B_BAR4_USD_ADDR64);
1341		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1342		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1343	}
1344}
1345
1346
1347/*
1348 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1349 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1350 * remains for use by a higher layer.
1351 *
1352 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1353 * MW size is sufficiently large.
1354 */
1355static unsigned int ntb_b2b_mw_share;
1356SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1357    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1358    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1359    "value here.");
1360
1361static void
1362xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1363    enum ntb_bar regbar)
1364{
1365	struct ntb_pci_bar_info *bar;
1366	uint8_t bar_sz;
1367
1368	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1369		return;
1370
1371	bar = &ntb->bar_info[idx];
1372	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1373	if (idx == regbar) {
1374		if (ntb->b2b_off != 0)
1375			bar_sz--;
1376		else
1377			bar_sz = 0;
1378	}
1379	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1380	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1381	(void)bar_sz;
1382}
1383
1384static void
1385xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1386    enum ntb_bar idx, enum ntb_bar regbar)
1387{
1388	uint64_t reg_val;
1389	uint32_t base_reg, lmt_reg;
1390
1391	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1392	if (idx == regbar)
1393		bar_addr += ntb->b2b_off;
1394
1395	if (!bar_is_64bit(ntb, idx)) {
1396		ntb_reg_write(4, base_reg, bar_addr);
1397		reg_val = ntb_reg_read(4, base_reg);
1398		(void)reg_val;
1399
1400		ntb_reg_write(4, lmt_reg, bar_addr);
1401		reg_val = ntb_reg_read(4, lmt_reg);
1402		(void)reg_val;
1403	} else {
1404		ntb_reg_write(8, base_reg, bar_addr);
1405		reg_val = ntb_reg_read(8, base_reg);
1406		(void)reg_val;
1407
1408		ntb_reg_write(8, lmt_reg, bar_addr);
1409		reg_val = ntb_reg_read(8, lmt_reg);
1410		(void)reg_val;
1411	}
1412}
1413
1414static void
1415xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1416{
1417	struct ntb_pci_bar_info *bar;
1418
1419	bar = &ntb->bar_info[idx];
1420	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1421		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1422		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1423	} else {
1424		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1425		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1426	}
1427	(void)base_addr;
1428}
1429
1430static int
1431xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1432    const struct ntb_b2b_addr *peer_addr)
1433{
1434	struct ntb_pci_bar_info *b2b_bar;
1435	vm_size_t bar_size;
1436	uint64_t bar_addr;
1437	enum ntb_bar b2b_bar_num, i;
1438
1439	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1440		b2b_bar = NULL;
1441		b2b_bar_num = NTB_CONFIG_BAR;
1442		ntb->b2b_off = 0;
1443	} else {
1444		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1445		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1446		    ("invalid b2b mw bar"));
1447
1448		b2b_bar = &ntb->bar_info[b2b_bar_num];
1449		bar_size = b2b_bar->size;
1450
1451		if (ntb_b2b_mw_share != 0 &&
1452		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1453			ntb->b2b_off = bar_size >> 1;
1454		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1455			ntb->b2b_off = 0;
1456			ntb->mw_count--;
1457		} else {
1458			device_printf(ntb->device,
1459			    "B2B bar size is too small!\n");
1460			return (EIO);
1461		}
1462	}
1463
1464	/*
1465	 * Reset the secondary bar sizes to match the primary bar sizes.
1466	 * (Except, disable or halve the size of the B2B secondary bar.)
1467	 */
1468	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1469		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1470
1471	bar_addr = 0;
1472	if (b2b_bar_num == NTB_CONFIG_BAR)
1473		bar_addr = addr->bar0_addr;
1474	else if (b2b_bar_num == NTB_B2B_BAR_1)
1475		bar_addr = addr->bar2_addr64;
1476	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1477		bar_addr = addr->bar4_addr64;
1478	else if (b2b_bar_num == NTB_B2B_BAR_2)
1479		bar_addr = addr->bar4_addr32;
1480	else if (b2b_bar_num == NTB_B2B_BAR_3)
1481		bar_addr = addr->bar5_addr32;
1482	else
1483		KASSERT(false, ("invalid bar"));
1484
1485	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1486
1487	/*
1488	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1489	 * register BAR.  The B2B BAR is either disabled above or configured
1490	 * half-size.  It starts at PBAR xlat + offset.
1491	 *
1492	 * Also set up incoming BAR limits == base (zero length window).
1493	 */
1494	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1495	    b2b_bar_num);
1496	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1497		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1498		    NTB_B2B_BAR_2, b2b_bar_num);
1499		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1500		    NTB_B2B_BAR_3, b2b_bar_num);
1501	} else
1502		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1503		    NTB_B2B_BAR_2, b2b_bar_num);
1504
1505	/* Zero incoming translation addrs */
1506	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1507	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1508
1509	/* Zero outgoing translation limits (whole bar size windows) */
1510	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1511	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1512
1513	/* Set outgoing translation offsets */
1514	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1515	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1516		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1517		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1518	} else
1519		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1520
1521	/* Set the translation offset for B2B registers */
1522	bar_addr = 0;
1523	if (b2b_bar_num == NTB_CONFIG_BAR)
1524		bar_addr = peer_addr->bar0_addr;
1525	else if (b2b_bar_num == NTB_B2B_BAR_1)
1526		bar_addr = peer_addr->bar2_addr64;
1527	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1528		bar_addr = peer_addr->bar4_addr64;
1529	else if (b2b_bar_num == NTB_B2B_BAR_2)
1530		bar_addr = peer_addr->bar4_addr32;
1531	else if (b2b_bar_num == NTB_B2B_BAR_3)
1532		bar_addr = peer_addr->bar5_addr32;
1533	else
1534		KASSERT(false, ("invalid bar"));
1535
1536	/*
1537	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1538	 * at a time.
1539	 */
1540	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1541	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1542	return (0);
1543}
1544
1545static inline bool
1546link_is_up(struct ntb_softc *ntb)
1547{
1548
1549	if (ntb->type == NTB_XEON) {
1550		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1551			return (true);
1552		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1553	}
1554
1555	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1556	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1557}
1558
1559static inline bool
1560atom_link_is_err(struct ntb_softc *ntb)
1561{
1562	uint32_t status;
1563
1564	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1565
1566	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1567	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1568		return (true);
1569
1570	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1571	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1572}
1573
1574/* Atom does not have link status interrupt, poll on that platform */
1575static void
1576atom_link_hb(void *arg)
1577{
1578	struct ntb_softc *ntb = arg;
1579	sbintime_t timo, poll_ts;
1580
1581	timo = NTB_HB_TIMEOUT * hz;
1582	poll_ts = ntb->last_ts + timo;
1583
1584	/*
1585	 * Delay polling the link status if an interrupt was received, unless
1586	 * the cached link status says the link is down.
1587	 */
1588	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1589		timo = poll_ts - ticks;
1590		goto out;
1591	}
1592
1593	if (ntb_poll_link(ntb))
1594		ntb_link_event(ntb);
1595
1596	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1597		/* Link is down with error, proceed with recovery */
1598		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1599		return;
1600	}
1601
1602out:
1603	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1604}
1605
1606static void
1607atom_perform_link_restart(struct ntb_softc *ntb)
1608{
1609	uint32_t status;
1610
1611	/* Driver resets the NTB ModPhy lanes - magic! */
1612	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1613	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1614	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1615	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1616
1617	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1618	pause("ModPhy", hz / 10);
1619
1620	/* Clear AER Errors, write to clear */
1621	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1622	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1623	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1624
1625	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1626	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1627	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1628	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1629
1630	/* Clear DeSkew Buffer error, write to clear */
1631	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1632	status |= ATOM_DESKEWSTS_DBERR;
1633	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1634
1635	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1636	status &= ATOM_IBIST_ERR_OFLOW;
1637	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1638
1639	/* Releases the NTB state machine to allow the link to retrain */
1640	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1641	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1642	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1643}
1644
1645/*
1646 * ntb_set_ctx() - associate a driver context with an ntb device
1647 * @ntb:        NTB device context
1648 * @ctx:        Driver context
1649 * @ctx_ops:    Driver context operations
1650 *
1651 * Associate a driver context and operations with a ntb device.  The context is
1652 * provided by the client driver, and the driver may associate a different
1653 * context with each ntb device.
1654 *
1655 * Return: Zero if the context is associated, otherwise an error number.
1656 */
1657int
1658ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1659{
1660
1661	if (ctx == NULL || ops == NULL)
1662		return (EINVAL);
1663	if (ntb->ctx_ops != NULL)
1664		return (EINVAL);
1665
1666	CTX_LOCK(ntb);
1667	if (ntb->ctx_ops != NULL) {
1668		CTX_UNLOCK(ntb);
1669		return (EINVAL);
1670	}
1671	ntb->ntb_ctx = ctx;
1672	ntb->ctx_ops = ops;
1673	CTX_UNLOCK(ntb);
1674
1675	return (0);
1676}
1677
1678/*
1679 * It is expected that this will only be used from contexts where the ctx_lock
1680 * is not needed to protect ntb_ctx lifetime.
1681 */
1682void *
1683ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1684{
1685
1686	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1687	if (ops != NULL)
1688		*ops = ntb->ctx_ops;
1689	return (ntb->ntb_ctx);
1690}
1691
1692/*
1693 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1694 * @ntb:        NTB device context
1695 *
1696 * Clear any association that may exist between a driver context and the ntb
1697 * device.
1698 */
1699void
1700ntb_clear_ctx(struct ntb_softc *ntb)
1701{
1702
1703	CTX_LOCK(ntb);
1704	ntb->ntb_ctx = NULL;
1705	ntb->ctx_ops = NULL;
1706	CTX_UNLOCK(ntb);
1707}
1708
1709/*
1710 * ntb_link_event() - notify driver context of a change in link status
1711 * @ntb:        NTB device context
1712 *
1713 * Notify the driver context that the link status may have changed.  The driver
1714 * should call ntb_link_is_up() to get the current status.
1715 */
1716void
1717ntb_link_event(struct ntb_softc *ntb)
1718{
1719
1720	CTX_LOCK(ntb);
1721	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1722		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1723	CTX_UNLOCK(ntb);
1724}
1725
1726/*
1727 * ntb_db_event() - notify driver context of a doorbell event
1728 * @ntb:        NTB device context
1729 * @vector:     Interrupt vector number
1730 *
1731 * Notify the driver context of a doorbell event.  If hardware supports
1732 * multiple interrupt vectors for doorbells, the vector number indicates which
1733 * vector received the interrupt.  The vector number is relative to the first
1734 * vector used for doorbells, starting at zero, and must be less than
1735 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1736 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1737 * those bits are associated with the vector number.
1738 */
1739static void
1740ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1741{
1742
1743	CTX_LOCK(ntb);
1744	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1745		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1746	CTX_UNLOCK(ntb);
1747}
1748
1749/*
1750 * ntb_link_enable() - enable the link on the secondary side of the ntb
1751 * @ntb:        NTB device context
1752 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1753 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1754 *
1755 * Enable the link on the secondary side of the ntb.  This can only be done
1756 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1757 * should train the link to its maximum speed and width, or the requested speed
1758 * and width, whichever is smaller, if supported.
1759 *
1760 * Return: Zero on success, otherwise an error number.
1761 *
1762 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1763 *      and width input will be ignored.
1764 */
1765int
1766ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1767    enum ntb_width w __unused)
1768{
1769	uint32_t cntl;
1770
1771	if (ntb->type == NTB_ATOM) {
1772		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1773		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1774		return (0);
1775	}
1776
1777	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1778		ntb_link_event(ntb);
1779		return (0);
1780	}
1781
1782	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1783	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1784	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1785	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1786	if (HAS_FEATURE(NTB_SPLIT_BAR))
1787		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1788	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1789	return (0);
1790}
1791
1792/*
1793 * ntb_link_disable() - disable the link on the secondary side of the ntb
1794 * @ntb:        NTB device context
1795 *
1796 * Disable the link on the secondary side of the ntb.  This can only be done
1797 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1798 * should disable the link.  Returning from this call must indicate that a
1799 * barrier has passed, though with no more writes may pass in either direction
1800 * across the link, except if this call returns an error number.
1801 *
1802 * Return: Zero on success, otherwise an error number.
1803 */
1804int
1805ntb_link_disable(struct ntb_softc *ntb)
1806{
1807	uint32_t cntl;
1808
1809	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1810		ntb_link_event(ntb);
1811		return (0);
1812	}
1813
1814	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1815	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1816	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1817	if (HAS_FEATURE(NTB_SPLIT_BAR))
1818		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1819	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1820	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1821	return (0);
1822}
1823
1824static void
1825recover_atom_link(void *arg)
1826{
1827	struct ntb_softc *ntb = arg;
1828	unsigned speed, width, oldspeed, oldwidth;
1829	uint32_t status32;
1830
1831	atom_perform_link_restart(ntb);
1832
1833	/*
1834	 * There is a potential race between the 2 NTB devices recovering at
1835	 * the same time.  If the times are the same, the link will not recover
1836	 * and the driver will be stuck in this loop forever.  Add a random
1837	 * interval to the recovery time to prevent this race.
1838	 */
1839	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1840	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1841
1842	if (atom_link_is_err(ntb))
1843		goto retry;
1844
1845	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1846	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1847		goto out;
1848
1849	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1850	width = NTB_LNK_STA_WIDTH(status32);
1851	speed = status32 & NTB_LINK_SPEED_MASK;
1852
1853	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1854	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1855	if (oldwidth != width || oldspeed != speed)
1856		goto retry;
1857
1858out:
1859	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1860	    ntb);
1861	return;
1862
1863retry:
1864	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1865	    ntb);
1866}
1867
1868/*
1869 * Polls the HW link status register(s); returns true if something has changed.
1870 */
1871static bool
1872ntb_poll_link(struct ntb_softc *ntb)
1873{
1874	uint32_t ntb_cntl;
1875	uint16_t reg_val;
1876
1877	if (ntb->type == NTB_ATOM) {
1878		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1879		if (ntb_cntl == ntb->ntb_ctl)
1880			return (false);
1881
1882		ntb->ntb_ctl = ntb_cntl;
1883		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1884	} else {
1885		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1886
1887		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1888		if (reg_val == ntb->lnk_sta)
1889			return (false);
1890
1891		ntb->lnk_sta = reg_val;
1892	}
1893	return (true);
1894}
1895
1896static inline enum ntb_speed
1897ntb_link_sta_speed(struct ntb_softc *ntb)
1898{
1899
1900	if (!link_is_up(ntb))
1901		return (NTB_SPEED_NONE);
1902	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
1903}
1904
1905static inline enum ntb_width
1906ntb_link_sta_width(struct ntb_softc *ntb)
1907{
1908
1909	if (!link_is_up(ntb))
1910		return (NTB_WIDTH_NONE);
1911	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
1912}
1913
1914SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
1915    "Driver state, statistics, and HW registers");
1916
1917#define NTB_REGSZ_MASK	(3ul << 30)
1918#define NTB_REG_64	(1ul << 30)
1919#define NTB_REG_32	(2ul << 30)
1920#define NTB_REG_16	(3ul << 30)
1921#define NTB_REG_8	(0ul << 30)
1922
1923#define NTB_DB_READ	(1ul << 29)
1924#define NTB_PCI_REG	(1ul << 28)
1925#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
1926
1927static void
1928ntb_sysctl_init(struct ntb_softc *ntb)
1929{
1930	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
1931	struct sysctl_ctx_list *ctx;
1932	struct sysctl_oid *tree, *tmptree;
1933
1934	ctx = device_get_sysctl_ctx(ntb->device);
1935
1936	tree = SYSCTL_ADD_NODE(ctx,
1937	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
1938	    "debug_info", CTLFLAG_RD, NULL,
1939	    "Driver state, statistics, and HW registers");
1940	tree_par = SYSCTL_CHILDREN(tree);
1941
1942	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
1943	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
1944	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
1945	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
1946
1947	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
1948		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
1949		    &ntb->b2b_mw_idx, 0,
1950		    "Index of the MW used for B2B remote register access");
1951		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
1952		    CTLFLAG_RD, &ntb->b2b_off,
1953		    "If non-zero, offset of B2B register region in shared MW");
1954	}
1955
1956	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
1957	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
1958	    "Features/errata of this NTB device");
1959
1960	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
1961	    &ntb->ntb_ctl, 0, "NTB CTL register (cached)");
1962	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
1963	    &ntb->lnk_sta, 0, "LNK STA register (cached)");
1964
1965	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
1966	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
1967	    "A", "Link status");
1968
1969	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
1970	    &ntb->mw_count, 0, "MW count (excl. non-shared B2B register BAR)");
1971	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
1972	    &ntb->spad_count, 0, "Scratchpad count");
1973	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
1974	    &ntb->db_count, 0, "Doorbell count");
1975	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
1976	    &ntb->db_vec_count, 0, "Doorbell vector count");
1977	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
1978	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
1979
1980	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
1981	    &ntb->db_valid_mask, "Doorbell valid mask");
1982	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
1983	    &ntb->db_link_mask, "Doorbell link mask");
1984	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
1985	    &ntb->db_mask, "Doorbell mask (cached)");
1986
1987	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
1988	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
1989	regpar = SYSCTL_CHILDREN(tmptree);
1990
1991	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
1992	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1993	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
1994	    sysctl_handle_register, "QU", "Doorbell mask register");
1995	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
1996	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1997	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
1998	    sysctl_handle_register, "QU", "Doorbell register");
1999
2000	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
2001	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2002	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
2003	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
2004	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2005		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
2006		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2007		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2008		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2009		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2010		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2011		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2012		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2013	} else {
2014		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2015		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2016		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2017		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2018	}
2019
2020	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2021	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2022	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2023	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2024	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2025		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2026		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2027		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2028		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2029		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2030		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2031		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2032		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2033	} else {
2034		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2035		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2036		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2037		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2038	}
2039
2040	if (ntb->type == NTB_ATOM)
2041		return;
2042
2043	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2044	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2045	statpar = SYSCTL_CHILDREN(tmptree);
2046	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2047	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2048	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2049	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2050
2051	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2052	    CTLFLAG_RD, NULL, "Xeon HW errors");
2053	errpar = SYSCTL_CHILDREN(tmptree);
2054
2055	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "devsts",
2056	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2057	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2058	    sysctl_handle_register, "SU", "DEVSTS");
2059	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "lnksts",
2060	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2061	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2062	    sysctl_handle_register, "SU", "LNKSTS");
2063	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2064	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2065	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2066	    sysctl_handle_register, "IU", "UNCERRSTS");
2067	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2068	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2069	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2070	    sysctl_handle_register, "IU", "CORERRSTS");
2071
2072	if (ntb->conn_type != NTB_CONN_B2B)
2073		return;
2074
2075	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2076	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2077	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2078	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2079	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2080		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2081		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2082		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2083		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2084		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2085		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2086		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2087		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2088	} else {
2089		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2090		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2091		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2092		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2093	}
2094
2095	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2096	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2097	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2098	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2099	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2100		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2101		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2102		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2103		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2104		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2105		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2106		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2107		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2108	} else {
2109		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2110		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2111		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2112		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2113	}
2114
2115	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2116	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2117	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2118	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2119	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2120	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2121	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2122	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2123	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2124		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2125		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2126		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2127		    sysctl_handle_register, "IU",
2128		    "Secondary BAR4 base register");
2129		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2130		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2131		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2132		    sysctl_handle_register, "IU",
2133		    "Secondary BAR5 base register");
2134	} else {
2135		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2136		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2137		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2138		    sysctl_handle_register, "QU",
2139		    "Secondary BAR45 base register");
2140	}
2141}
2142
2143static int
2144sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2145{
2146	struct ntb_softc *ntb;
2147	struct sbuf sb;
2148	int error;
2149
2150	error = 0;
2151	ntb = arg1;
2152
2153	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2154
2155	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2156	error = sbuf_finish(&sb);
2157	sbuf_delete(&sb);
2158
2159	if (error || !req->newptr)
2160		return (error);
2161	return (EINVAL);
2162}
2163
2164static int
2165sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2166{
2167	struct ntb_softc *ntb;
2168	struct sbuf sb;
2169	enum ntb_speed speed;
2170	enum ntb_width width;
2171	int error;
2172
2173	error = 0;
2174	ntb = arg1;
2175
2176	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2177
2178	if (ntb_link_is_up(ntb, &speed, &width))
2179		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2180		    (unsigned)speed, (unsigned)width);
2181	else
2182		sbuf_printf(&sb, "down");
2183
2184	error = sbuf_finish(&sb);
2185	sbuf_delete(&sb);
2186
2187	if (error || !req->newptr)
2188		return (error);
2189	return (EINVAL);
2190}
2191
2192static int
2193sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2194{
2195	struct ntb_softc *ntb;
2196	const void *outp;
2197	uintptr_t sz;
2198	uint64_t umv;
2199	char be[sizeof(umv)];
2200	size_t outsz;
2201	uint32_t reg;
2202	bool db, pci;
2203	int error;
2204
2205	ntb = arg1;
2206	reg = arg2 & ~NTB_REGFLAGS_MASK;
2207	sz = arg2 & NTB_REGSZ_MASK;
2208	db = (arg2 & NTB_DB_READ) != 0;
2209	pci = (arg2 & NTB_PCI_REG) != 0;
2210
2211	KASSERT(!(db && pci), ("bogus"));
2212
2213	if (db) {
2214		KASSERT(sz == NTB_REG_64, ("bogus"));
2215		umv = db_ioread(ntb, reg);
2216		outsz = sizeof(uint64_t);
2217	} else {
2218		switch (sz) {
2219		case NTB_REG_64:
2220			if (pci)
2221				umv = pci_read_config(ntb->device, reg, 8);
2222			else
2223				umv = ntb_reg_read(8, reg);
2224			outsz = sizeof(uint64_t);
2225			break;
2226		case NTB_REG_32:
2227			if (pci)
2228				umv = pci_read_config(ntb->device, reg, 4);
2229			else
2230				umv = ntb_reg_read(4, reg);
2231			outsz = sizeof(uint32_t);
2232			break;
2233		case NTB_REG_16:
2234			if (pci)
2235				umv = pci_read_config(ntb->device, reg, 2);
2236			else
2237				umv = ntb_reg_read(2, reg);
2238			outsz = sizeof(uint16_t);
2239			break;
2240		case NTB_REG_8:
2241			if (pci)
2242				umv = pci_read_config(ntb->device, reg, 1);
2243			else
2244				umv = ntb_reg_read(1, reg);
2245			outsz = sizeof(uint8_t);
2246			break;
2247		default:
2248			panic("bogus");
2249			break;
2250		}
2251	}
2252
2253	/* Encode bigendian so that sysctl -x is legible. */
2254	be64enc(be, umv);
2255	outp = ((char *)be) + sizeof(umv) - outsz;
2256
2257	error = SYSCTL_OUT(req, outp, outsz);
2258	if (error || !req->newptr)
2259		return (error);
2260	return (EINVAL);
2261}
2262
2263/*
2264 * Public API to the rest of the OS
2265 */
2266
2267/**
2268 * ntb_get_max_spads() - get the total scratch regs usable
2269 * @ntb: pointer to ntb_softc instance
2270 *
2271 * This function returns the max 32bit scratchpad registers usable by the
2272 * upper layer.
2273 *
2274 * RETURNS: total number of scratch pad registers available
2275 */
2276uint8_t
2277ntb_get_max_spads(struct ntb_softc *ntb)
2278{
2279
2280	return (ntb->spad_count);
2281}
2282
2283uint8_t
2284ntb_mw_count(struct ntb_softc *ntb)
2285{
2286
2287	return (ntb->mw_count);
2288}
2289
2290/**
2291 * ntb_spad_write() - write to the secondary scratchpad register
2292 * @ntb: pointer to ntb_softc instance
2293 * @idx: index to the scratchpad register, 0 based
2294 * @val: the data value to put into the register
2295 *
2296 * This function allows writing of a 32bit value to the indexed scratchpad
2297 * register. The register resides on the secondary (external) side.
2298 *
2299 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2300 */
2301int
2302ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2303{
2304
2305	if (idx >= ntb->spad_count)
2306		return (EINVAL);
2307
2308	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2309
2310	return (0);
2311}
2312
2313/**
2314 * ntb_spad_read() - read from the primary scratchpad register
2315 * @ntb: pointer to ntb_softc instance
2316 * @idx: index to scratchpad register, 0 based
2317 * @val: pointer to 32bit integer for storing the register value
2318 *
2319 * This function allows reading of the 32bit scratchpad register on
2320 * the primary (internal) side.
2321 *
2322 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2323 */
2324int
2325ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2326{
2327
2328	if (idx >= ntb->spad_count)
2329		return (EINVAL);
2330
2331	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2332
2333	return (0);
2334}
2335
2336/**
2337 * ntb_peer_spad_write() - write to the secondary scratchpad register
2338 * @ntb: pointer to ntb_softc instance
2339 * @idx: index to the scratchpad register, 0 based
2340 * @val: the data value to put into the register
2341 *
2342 * This function allows writing of a 32bit value to the indexed scratchpad
2343 * register. The register resides on the secondary (external) side.
2344 *
2345 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2346 */
2347int
2348ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2349{
2350
2351	if (idx >= ntb->spad_count)
2352		return (EINVAL);
2353
2354	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2355		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
2356	else
2357		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2358
2359	return (0);
2360}
2361
2362/**
2363 * ntb_peer_spad_read() - read from the primary scratchpad register
2364 * @ntb: pointer to ntb_softc instance
2365 * @idx: index to scratchpad register, 0 based
2366 * @val: pointer to 32bit integer for storing the register value
2367 *
2368 * This function allows reading of the 32bit scratchpad register on
2369 * the primary (internal) side.
2370 *
2371 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2372 */
2373int
2374ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2375{
2376
2377	if (idx >= ntb->spad_count)
2378		return (EINVAL);
2379
2380	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2381		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
2382	else
2383		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2384
2385	return (0);
2386}
2387
2388/*
2389 * ntb_mw_get_range() - get the range of a memory window
2390 * @ntb:        NTB device context
2391 * @idx:        Memory window number
2392 * @base:       OUT - the base address for mapping the memory window
2393 * @size:       OUT - the size for mapping the memory window
2394 * @align:      OUT - the base alignment for translating the memory window
2395 * @align_size: OUT - the size alignment for translating the memory window
2396 *
2397 * Get the range of a memory window.  NULL may be given for any output
2398 * parameter if the value is not needed.  The base and size may be used for
2399 * mapping the memory window, to access the peer memory.  The alignment and
2400 * size may be used for translating the memory window, for the peer to access
2401 * memory on the local system.
2402 *
2403 * Return: Zero on success, otherwise an error number.
2404 */
2405int
2406ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2407    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size)
2408{
2409	struct ntb_pci_bar_info *bar;
2410	size_t bar_b2b_off;
2411
2412	if (mw_idx >= ntb_mw_count(ntb))
2413		return (EINVAL);
2414
2415	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, mw_idx)];
2416	bar_b2b_off = 0;
2417	if (mw_idx == ntb->b2b_mw_idx) {
2418		KASSERT(ntb->b2b_off != 0,
2419		    ("user shouldn't get non-shared b2b mw"));
2420		bar_b2b_off = ntb->b2b_off;
2421	}
2422
2423	if (base != NULL)
2424		*base = bar->pbase + bar_b2b_off;
2425	if (vbase != NULL)
2426		*vbase = bar->vbase + bar_b2b_off;
2427	if (size != NULL)
2428		*size = bar->size - bar_b2b_off;
2429	if (align != NULL)
2430		*align = bar->size;
2431	if (align_size != NULL)
2432		*align_size = 1;
2433	return (0);
2434}
2435
2436/*
2437 * ntb_mw_set_trans() - set the translation of a memory window
2438 * @ntb:        NTB device context
2439 * @idx:        Memory window number
2440 * @addr:       The dma address local memory to expose to the peer
2441 * @size:       The size of the local memory to expose to the peer
2442 *
2443 * Set the translation of a memory window.  The peer may access local memory
2444 * through the window starting at the address, up to the size.  The address
2445 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2446 * must be aligned to the size alignment specified by ntb_mw_get_range().
2447 *
2448 * Return: Zero on success, otherwise an error number.
2449 */
2450int
2451ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2452    size_t size)
2453{
2454	struct ntb_pci_bar_info *bar;
2455	uint64_t base, limit, reg_val;
2456	size_t bar_size, mw_size;
2457	uint32_t base_reg, xlat_reg, limit_reg;
2458	enum ntb_bar bar_num;
2459
2460	if (idx >= ntb_mw_count(ntb))
2461		return (EINVAL);
2462
2463	bar_num = ntb_mw_to_bar(ntb, idx);
2464	bar = &ntb->bar_info[bar_num];
2465
2466	bar_size = bar->size;
2467	if (idx == ntb->b2b_mw_idx)
2468		mw_size = bar_size - ntb->b2b_off;
2469	else
2470		mw_size = bar_size;
2471
2472	/* Hardware requires that addr is aligned to bar size */
2473	if ((addr & (bar_size - 1)) != 0)
2474		return (EINVAL);
2475
2476	if (size > mw_size)
2477		return (EINVAL);
2478
2479	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2480
2481	limit = 0;
2482	if (bar_is_64bit(ntb, bar_num)) {
2483		base = ntb_reg_read(8, base_reg);
2484
2485		if (limit_reg != 0 && size != mw_size)
2486			limit = base + size;
2487
2488		/* Set and verify translation address */
2489		ntb_reg_write(8, xlat_reg, addr);
2490		reg_val = ntb_reg_read(8, xlat_reg);
2491		if (reg_val != addr) {
2492			ntb_reg_write(8, xlat_reg, 0);
2493			return (EIO);
2494		}
2495
2496		/* Set and verify the limit */
2497		ntb_reg_write(8, limit_reg, limit);
2498		reg_val = ntb_reg_read(8, limit_reg);
2499		if (reg_val != limit) {
2500			ntb_reg_write(8, limit_reg, base);
2501			ntb_reg_write(8, xlat_reg, 0);
2502			return (EIO);
2503		}
2504	} else {
2505		/* Configure 32-bit (split) BAR MW */
2506
2507		if ((addr & ~UINT32_MAX) != 0)
2508			return (EINVAL);
2509		if (((addr + size) & ~UINT32_MAX) != 0)
2510			return (EINVAL);
2511
2512		base = ntb_reg_read(4, base_reg);
2513
2514		if (limit_reg != 0 && size != mw_size)
2515			limit = base + size;
2516
2517		/* Set and verify translation address */
2518		ntb_reg_write(4, xlat_reg, addr);
2519		reg_val = ntb_reg_read(4, xlat_reg);
2520		if (reg_val != addr) {
2521			ntb_reg_write(4, xlat_reg, 0);
2522			return (EIO);
2523		}
2524
2525		/* Set and verify the limit */
2526		ntb_reg_write(4, limit_reg, limit);
2527		reg_val = ntb_reg_read(4, limit_reg);
2528		if (reg_val != limit) {
2529			ntb_reg_write(4, limit_reg, base);
2530			ntb_reg_write(4, xlat_reg, 0);
2531			return (EIO);
2532		}
2533	}
2534	return (0);
2535}
2536
2537/*
2538 * ntb_mw_clear_trans() - clear the translation of a memory window
2539 * @ntb:	NTB device context
2540 * @idx:	Memory window number
2541 *
2542 * Clear the translation of a memory window.  The peer may no longer access
2543 * local memory through the window.
2544 *
2545 * Return: Zero on success, otherwise an error number.
2546 */
2547int
2548ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2549{
2550
2551	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2552}
2553
2554/**
2555 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2556 * @ntb: pointer to ntb_softc instance
2557 * @bit: doorbell bits to ring
2558 *
2559 * This function allows triggering of a doorbell on the secondary/external
2560 * side that will initiate an interrupt on the remote host
2561 */
2562void
2563ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2564{
2565
2566	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2567		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
2568		return;
2569	}
2570
2571	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2572}
2573
2574/*
2575 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2576 * as well as the size of the register (via *sz_out).
2577 *
2578 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2579 * ring to its memory window write.
2580 *
2581 * Note that writing the peer doorbell via a memory window will *not* generate
2582 * an interrupt on the remote host; that must be done seperately.
2583 */
2584bus_addr_t
2585ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2586{
2587	struct ntb_pci_bar_info *bar;
2588	uint64_t regoff;
2589
2590	KASSERT(sz_out != NULL, ("must be non-NULL"));
2591
2592	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2593		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2594		regoff = ntb->peer_reg->db_bell;
2595	} else {
2596		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2597		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2598		    ("mw_count invalid after setup"));
2599		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2600		    ("invalid b2b idx"));
2601
2602		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2603		regoff = XEON_SHADOW_PDOORBELL_OFFSET;
2604	}
2605	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2606
2607	*sz_out = ntb->reg->db_size;
2608	/* HACK: Specific to current x86 bus implementation. */
2609	return ((uint64_t)bar->pci_bus_handle + regoff);
2610}
2611
2612/*
2613 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2614 * @ntb:	NTB device context
2615 *
2616 * Hardware may support different number or arrangement of doorbell bits.
2617 *
2618 * Return: A mask of doorbell bits supported by the ntb.
2619 */
2620uint64_t
2621ntb_db_valid_mask(struct ntb_softc *ntb)
2622{
2623
2624	return (ntb->db_valid_mask);
2625}
2626
2627/*
2628 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2629 * @ntb:	NTB device context
2630 * @vector:	Doorbell vector number
2631 *
2632 * Each interrupt vector may have a different number or arrangement of bits.
2633 *
2634 * Return: A mask of doorbell bits serviced by a vector.
2635 */
2636uint64_t
2637ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2638{
2639
2640	if (vector > ntb->db_vec_count)
2641		return (0);
2642	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2643}
2644
2645/**
2646 * ntb_link_is_up() - get the current ntb link state
2647 * @ntb:        NTB device context
2648 * @speed:      OUT - The link speed expressed as PCIe generation number
2649 * @width:      OUT - The link width expressed as the number of PCIe lanes
2650 *
2651 * RETURNS: true or false based on the hardware link state
2652 */
2653bool
2654ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2655    enum ntb_width *width)
2656{
2657
2658	if (speed != NULL)
2659		*speed = ntb_link_sta_speed(ntb);
2660	if (width != NULL)
2661		*width = ntb_link_sta_width(ntb);
2662	return (link_is_up(ntb));
2663}
2664
2665static void
2666save_bar_parameters(struct ntb_pci_bar_info *bar)
2667{
2668
2669	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2670	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2671	bar->pbase = rman_get_start(bar->pci_resource);
2672	bar->size = rman_get_size(bar->pci_resource);
2673	bar->vbase = rman_get_virtual(bar->pci_resource);
2674}
2675
2676device_t
2677ntb_get_device(struct ntb_softc *ntb)
2678{
2679
2680	return (ntb->device);
2681}
2682
2683/* Export HW-specific errata information. */
2684bool
2685ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2686{
2687
2688	return (HAS_FEATURE(feature));
2689}
2690