ntb_hw_intel.c revision 289774
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289774 2015-10-22 23:03:15Z cem $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/queue.h>
39#include <sys/rman.h>
40#include <sys/sbuf.h>
41#include <sys/sysctl.h>
42#include <vm/vm.h>
43#include <vm/pmap.h>
44#include <machine/bus.h>
45#include <machine/pmap.h>
46#include <machine/resource.h>
47#include <dev/pci/pcireg.h>
48#include <dev/pci/pcivar.h>
49
50#include "ntb_regs.h"
51#include "ntb_hw.h"
52
53/*
54 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
55 * allows you to connect two systems using a PCI-e link.
56 *
57 * This module contains the hardware abstraction layer for the NTB. It allows
58 * you to send and recieve interrupts, map the memory windows and send and
59 * receive messages in the scratch-pad registers.
60 *
61 * NOTE: Much of the code in this module is shared with Linux. Any patches may
62 * be picked up and redistributed in Linux with a dual GPL/BSD license.
63 */
64
65#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
66
67#define NTB_HB_TIMEOUT		1 /* second */
68#define ATOM_LINK_RECOVERY_TIME	500 /* ms */
69
70#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
71
72enum ntb_device_type {
73	NTB_XEON,
74	NTB_ATOM
75};
76
77/* ntb_conn_type are hardware numbers, cannot change. */
78enum ntb_conn_type {
79	NTB_CONN_TRANSPARENT = 0,
80	NTB_CONN_B2B = 1,
81	NTB_CONN_RP = 2,
82};
83
84enum ntb_b2b_direction {
85	NTB_DEV_USD = 0,
86	NTB_DEV_DSD = 1,
87};
88
89enum ntb_bar {
90	NTB_CONFIG_BAR = 0,
91	NTB_B2B_BAR_1,
92	NTB_B2B_BAR_2,
93	NTB_B2B_BAR_3,
94	NTB_MAX_BARS
95};
96
97/* Device features and workarounds */
98#define HAS_FEATURE(feature)	\
99	((ntb->features & (feature)) != 0)
100
101struct ntb_hw_info {
102	uint32_t		device_id;
103	const char		*desc;
104	enum ntb_device_type	type;
105	uint32_t		features;
106};
107
108struct ntb_pci_bar_info {
109	bus_space_tag_t		pci_bus_tag;
110	bus_space_handle_t	pci_bus_handle;
111	int			pci_resource_id;
112	struct resource		*pci_resource;
113	vm_paddr_t		pbase;
114	void			*vbase;
115	u_long			size;
116
117	/* Configuration register offsets */
118	uint32_t		psz_off;
119	uint32_t		ssz_off;
120	uint32_t		pbarxlat_off;
121};
122
123struct ntb_int_info {
124	struct resource	*res;
125	int		rid;
126	void		*tag;
127};
128
129struct ntb_vec {
130	struct ntb_softc	*ntb;
131	uint32_t		num;
132};
133
134struct ntb_reg {
135	uint32_t	ntb_ctl;
136	uint32_t	lnk_sta;
137	uint8_t		db_size;
138	unsigned	mw_bar[NTB_MAX_BARS];
139};
140
141struct ntb_alt_reg {
142	uint32_t	db_bell;
143	uint32_t	db_mask;
144	uint32_t	spad;
145};
146
147struct ntb_xlat_reg {
148	uint32_t	bar0_base;
149	uint32_t	bar2_base;
150	uint32_t	bar4_base;
151	uint32_t	bar5_base;
152
153	uint32_t	bar2_xlat;
154	uint32_t	bar4_xlat;
155	uint32_t	bar5_xlat;
156
157	uint32_t	bar2_limit;
158	uint32_t	bar4_limit;
159	uint32_t	bar5_limit;
160};
161
162struct ntb_b2b_addr {
163	uint64_t	bar0_addr;
164	uint64_t	bar2_addr64;
165	uint64_t	bar4_addr64;
166	uint64_t	bar4_addr32;
167	uint64_t	bar5_addr32;
168};
169
170struct ntb_softc {
171	device_t		device;
172	enum ntb_device_type	type;
173	uint32_t		features;
174
175	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
176	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
177	uint32_t		allocated_interrupts;
178
179	struct callout		heartbeat_timer;
180	struct callout		lr_timer;
181
182	void			*ntb_ctx;
183	const struct ntb_ctx_ops *ctx_ops;
184	struct ntb_vec		*msix_vec;
185#define CTX_LOCK(sc)		mtx_lock_spin(&(sc)->ctx_lock)
186#define CTX_UNLOCK(sc)		mtx_unlock_spin(&(sc)->ctx_lock)
187#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
188	struct mtx		ctx_lock;
189
190	uint32_t		ppd;
191	enum ntb_conn_type	conn_type;
192	enum ntb_b2b_direction	dev_type;
193
194	/* Offset of peer bar0 in B2B BAR */
195	uint64_t			b2b_off;
196	/* Memory window used to access peer bar0 */
197#define B2B_MW_DISABLED			UINT8_MAX
198	uint8_t				b2b_mw_idx;
199
200	uint8_t				mw_count;
201	uint8_t				spad_count;
202	uint8_t				db_count;
203	uint8_t				db_vec_count;
204	uint8_t				db_vec_shift;
205
206	/* Protects local db_mask. */
207#define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
208#define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
209#define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
210	struct mtx			db_mask_lock;
211
212	uint32_t			ntb_ctl;
213	uint32_t			lnk_sta;
214
215	uint64_t			db_valid_mask;
216	uint64_t			db_link_mask;
217	uint64_t			db_mask;
218
219	int				last_ts;	/* ticks @ last irq */
220
221	const struct ntb_reg		*reg;
222	const struct ntb_alt_reg	*self_reg;
223	const struct ntb_alt_reg	*peer_reg;
224	const struct ntb_xlat_reg	*xlat_reg;
225};
226
227#ifdef __i386__
228static __inline uint64_t
229bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
230    bus_size_t offset)
231{
232
233	return (bus_space_read_4(tag, handle, offset) |
234	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
235}
236
237static __inline void
238bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
239    bus_size_t offset, uint64_t val)
240{
241
242	bus_space_write_4(tag, handle, offset, val);
243	bus_space_write_4(tag, handle, offset + 4, val >> 32);
244}
245#endif
246
247#define ntb_bar_read(SIZE, bar, offset) \
248	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
249	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
250#define ntb_bar_write(SIZE, bar, offset, val) \
251	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
252	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
253#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
254#define ntb_reg_write(SIZE, offset, val) \
255	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
256#define ntb_mw_read(SIZE, offset) \
257	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
258#define ntb_mw_write(SIZE, offset, val) \
259	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
260		offset, val)
261
262static int ntb_probe(device_t device);
263static int ntb_attach(device_t device);
264static int ntb_detach(device_t device);
265static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
266static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
267static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
268    uint32_t *base, uint32_t *xlat, uint32_t *lmt);
269static int ntb_map_pci_bars(struct ntb_softc *ntb);
270static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
271    const char *);
272static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
273static int map_memory_window_bar(struct ntb_softc *ntb,
274    struct ntb_pci_bar_info *bar);
275static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
276static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
277static int ntb_init_isr(struct ntb_softc *ntb);
278static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
279static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
280static void ntb_teardown_interrupts(struct ntb_softc *ntb);
281static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
282static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
283static void ndev_vec_isr(void *arg);
284static void ndev_irq_isr(void *arg);
285static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
286static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t val);
287static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
288static void ntb_free_msix_vec(struct ntb_softc *ntb);
289static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
290static void ntb_detect_max_mw(struct ntb_softc *ntb);
291static int ntb_detect_xeon(struct ntb_softc *ntb);
292static int ntb_detect_atom(struct ntb_softc *ntb);
293static int ntb_xeon_init_dev(struct ntb_softc *ntb);
294static int ntb_atom_init_dev(struct ntb_softc *ntb);
295static void ntb_teardown_xeon(struct ntb_softc *ntb);
296static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
297static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
298    enum ntb_bar regbar);
299static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
300    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
301static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
302    enum ntb_bar idx);
303static int xeon_setup_b2b_mw(struct ntb_softc *,
304    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
305static inline bool link_is_up(struct ntb_softc *ntb);
306static inline bool atom_link_is_err(struct ntb_softc *ntb);
307static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
308static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
309static void atom_link_hb(void *arg);
310static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
311static void recover_atom_link(void *arg);
312static bool ntb_poll_link(struct ntb_softc *ntb);
313static void save_bar_parameters(struct ntb_pci_bar_info *bar);
314static void ntb_sysctl_init(struct ntb_softc *);
315static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
316static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
317static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
318
319static struct ntb_hw_info pci_ids[] = {
320	/* XXX: PS/SS IDs left out until they are supported. */
321	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
322		NTB_ATOM, 0 },
323
324	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
325		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
326	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
327		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
328	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
329		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
330		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
331	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
332		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
333		    NTB_SB01BASE_LOCKUP },
334	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
335		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
336		    NTB_SB01BASE_LOCKUP },
337
338	{ 0x00000000, NULL, NTB_ATOM, 0 }
339};
340
341static const struct ntb_reg atom_reg = {
342	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
343	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
344	.db_size = sizeof(uint64_t),
345	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
346};
347
348static const struct ntb_alt_reg atom_pri_reg = {
349	.db_bell = ATOM_PDOORBELL_OFFSET,
350	.db_mask = ATOM_PDBMSK_OFFSET,
351	.spad = ATOM_SPAD_OFFSET,
352};
353
354static const struct ntb_alt_reg atom_b2b_reg = {
355	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
356	.spad = ATOM_B2B_SPAD_OFFSET,
357};
358
359static const struct ntb_xlat_reg atom_sec_xlat = {
360#if 0
361	/* "FIXME" says the Linux driver. */
362	.bar0_base = ATOM_SBAR0BASE_OFFSET,
363	.bar2_base = ATOM_SBAR2BASE_OFFSET,
364	.bar4_base = ATOM_SBAR4BASE_OFFSET,
365
366	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
367	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
368#endif
369
370	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
371	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
372};
373
374static const struct ntb_reg xeon_reg = {
375	.ntb_ctl = XEON_NTBCNTL_OFFSET,
376	.lnk_sta = XEON_LINK_STATUS_OFFSET,
377	.db_size = sizeof(uint16_t),
378	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
379};
380
381static const struct ntb_alt_reg xeon_pri_reg = {
382	.db_bell = XEON_PDOORBELL_OFFSET,
383	.db_mask = XEON_PDBMSK_OFFSET,
384	.spad = XEON_SPAD_OFFSET,
385};
386
387static const struct ntb_alt_reg xeon_b2b_reg = {
388	.db_bell = XEON_B2B_DOORBELL_OFFSET,
389	.spad = XEON_B2B_SPAD_OFFSET,
390};
391
392static const struct ntb_xlat_reg xeon_sec_xlat = {
393	.bar0_base = XEON_SBAR0BASE_OFFSET,
394	.bar2_base = XEON_SBAR2BASE_OFFSET,
395	.bar4_base = XEON_SBAR4BASE_OFFSET,
396	.bar5_base = XEON_SBAR5BASE_OFFSET,
397
398	.bar2_limit = XEON_SBAR2LMT_OFFSET,
399	.bar4_limit = XEON_SBAR4LMT_OFFSET,
400	.bar5_limit = XEON_SBAR5LMT_OFFSET,
401
402	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
403	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
404	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
405};
406
407static struct ntb_b2b_addr xeon_b2b_usd_addr = {
408	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
409	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
410	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
411	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
412	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
413};
414
415static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
416	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
417	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
418	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
419	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
420	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
421};
422
423SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
424    "B2B MW segment overrides -- MUST be the same on both sides");
425
426SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
427    &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
428    "hardware, use this 64-bit address on the bus between the NTB devices for "
429    "the window at BAR2, on the upstream side of the link.  MUST be the same "
430    "address on both sides.");
431SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
432    &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
433SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
434    &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
435    "(split-BAR mode).");
436SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
437    &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
438    "(split-BAR mode).");
439
440SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
441    &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
442    "hardware, use this 64-bit address on the bus between the NTB devices for "
443    "the window at BAR2, on the downstream side of the link.  MUST be the same"
444    " address on both sides.");
445SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
446    &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
447SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
448    &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
449    "(split-BAR mode).");
450SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
451    &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
452    "(split-BAR mode).");
453
454/*
455 * OS <-> Driver interface structures
456 */
457MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
458
459static device_method_t ntb_pci_methods[] = {
460	/* Device interface */
461	DEVMETHOD(device_probe,     ntb_probe),
462	DEVMETHOD(device_attach,    ntb_attach),
463	DEVMETHOD(device_detach,    ntb_detach),
464	DEVMETHOD_END
465};
466
467static driver_t ntb_pci_driver = {
468	"ntb_hw",
469	ntb_pci_methods,
470	sizeof(struct ntb_softc),
471};
472
473static devclass_t ntb_devclass;
474DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
475MODULE_VERSION(ntb_hw, 1);
476
477SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
478
479/*
480 * OS <-> Driver linkage functions
481 */
482static int
483ntb_probe(device_t device)
484{
485	struct ntb_hw_info *p;
486
487	p = ntb_get_device_info(pci_get_devid(device));
488	if (p == NULL)
489		return (ENXIO);
490
491	device_set_desc(device, p->desc);
492	return (0);
493}
494
495static int
496ntb_attach(device_t device)
497{
498	struct ntb_softc *ntb;
499	struct ntb_hw_info *p;
500	int error;
501
502	ntb = DEVICE2SOFTC(device);
503	p = ntb_get_device_info(pci_get_devid(device));
504
505	ntb->device = device;
506	ntb->type = p->type;
507	ntb->features = p->features;
508	ntb->b2b_mw_idx = B2B_MW_DISABLED;
509
510	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
511	callout_init(&ntb->heartbeat_timer, 1);
512	callout_init(&ntb->lr_timer, 1);
513	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
514	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_SPIN);
515
516	if (ntb->type == NTB_ATOM)
517		error = ntb_detect_atom(ntb);
518	else
519		error = ntb_detect_xeon(ntb);
520	if (error)
521		goto out;
522
523	ntb_detect_max_mw(ntb);
524
525	error = ntb_map_pci_bars(ntb);
526	if (error)
527		goto out;
528	if (ntb->type == NTB_ATOM)
529		error = ntb_atom_init_dev(ntb);
530	else
531		error = ntb_xeon_init_dev(ntb);
532	if (error)
533		goto out;
534	error = ntb_init_isr(ntb);
535	if (error)
536		goto out;
537	ntb_sysctl_init(ntb);
538
539	pci_enable_busmaster(ntb->device);
540
541out:
542	if (error != 0)
543		ntb_detach(device);
544	return (error);
545}
546
547static int
548ntb_detach(device_t device)
549{
550	struct ntb_softc *ntb;
551
552	ntb = DEVICE2SOFTC(device);
553
554	if (ntb->self_reg != NULL)
555		ntb_db_set_mask(ntb, ntb->db_valid_mask);
556	callout_drain(&ntb->heartbeat_timer);
557	callout_drain(&ntb->lr_timer);
558	if (ntb->type == NTB_XEON)
559		ntb_teardown_xeon(ntb);
560	ntb_teardown_interrupts(ntb);
561
562	mtx_destroy(&ntb->db_mask_lock);
563	mtx_destroy(&ntb->ctx_lock);
564
565	/*
566	 * Redetect total MWs so we unmap properly -- in case we lowered the
567	 * maximum to work around Xeon errata.
568	 */
569	ntb_detect_max_mw(ntb);
570	ntb_unmap_pci_bar(ntb);
571
572	return (0);
573}
574
575/*
576 * Driver internal routines
577 */
578static inline enum ntb_bar
579ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
580{
581
582	KASSERT(mw < ntb->mw_count ||
583	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
584	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
585	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
586
587	return (ntb->reg->mw_bar[mw]);
588}
589
590static inline bool
591bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
592{
593	/* XXX This assertion could be stronger. */
594	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
595	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
596}
597
598static inline void
599bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
600    uint32_t *xlat, uint32_t *lmt)
601{
602	uint32_t basev, lmtv, xlatv;
603
604	switch (bar) {
605	case NTB_B2B_BAR_1:
606		basev = ntb->xlat_reg->bar2_base;
607		lmtv = ntb->xlat_reg->bar2_limit;
608		xlatv = ntb->xlat_reg->bar2_xlat;
609		break;
610	case NTB_B2B_BAR_2:
611		basev = ntb->xlat_reg->bar4_base;
612		lmtv = ntb->xlat_reg->bar4_limit;
613		xlatv = ntb->xlat_reg->bar4_xlat;
614		break;
615	case NTB_B2B_BAR_3:
616		basev = ntb->xlat_reg->bar5_base;
617		lmtv = ntb->xlat_reg->bar5_limit;
618		xlatv = ntb->xlat_reg->bar5_xlat;
619		break;
620	default:
621		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
622		    ("bad bar"));
623		basev = lmtv = xlatv = 0;
624		break;
625	}
626
627	if (base != NULL)
628		*base = basev;
629	if (xlat != NULL)
630		*xlat = xlatv;
631	if (lmt != NULL)
632		*lmt = lmtv;
633}
634
635static int
636ntb_map_pci_bars(struct ntb_softc *ntb)
637{
638	int rc;
639
640	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
641	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
642	if (rc != 0)
643		goto out;
644
645	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
646	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
647	if (rc != 0)
648		goto out;
649	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
650	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
651	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
652
653	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
654	/* XXX Are shared MW B2Bs write-combining? */
655	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
656		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
657	else
658		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
659	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
660	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
661	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
662
663	if (!HAS_FEATURE(NTB_SPLIT_BAR))
664		goto out;
665
666	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
667	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
668		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
669	else
670		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
671	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
672	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
673	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
674
675out:
676	if (rc != 0)
677		device_printf(ntb->device,
678		    "unable to allocate pci resource\n");
679	return (rc);
680}
681
682static void
683print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
684    const char *kind)
685{
686
687	device_printf(ntb->device,
688	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
689	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
690	    (char *)bar->vbase + bar->size - 1,
691	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
692	    (uintmax_t)bar->size, kind);
693}
694
695static int
696map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
697{
698
699	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
700	    &bar->pci_resource_id, RF_ACTIVE);
701	if (bar->pci_resource == NULL)
702		return (ENXIO);
703
704	save_bar_parameters(bar);
705	print_map_success(ntb, bar, "mmr");
706	return (0);
707}
708
709static int
710map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
711{
712	int rc;
713	uint8_t bar_size_bits = 0;
714
715	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
716	    &bar->pci_resource_id, RF_ACTIVE);
717
718	if (bar->pci_resource == NULL)
719		return (ENXIO);
720
721	save_bar_parameters(bar);
722	/*
723	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
724	 * hardware issue. To work around this, query the size it should be
725	 * configured to by the device and modify the resource to correspond to
726	 * this new size. The BIOS on systems with this problem is required to
727	 * provide enough address space to allow the driver to make this change
728	 * safely.
729	 *
730	 * Ideally I could have just specified the size when I allocated the
731	 * resource like:
732	 *  bus_alloc_resource(ntb->device,
733	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
734	 *	1ul << bar_size_bits, RF_ACTIVE);
735	 * but the PCI driver does not honor the size in this call, so we have
736	 * to modify it after the fact.
737	 */
738	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
739		if (bar->pci_resource_id == PCIR_BAR(2))
740			bar_size_bits = pci_read_config(ntb->device,
741			    XEON_PBAR23SZ_OFFSET, 1);
742		else
743			bar_size_bits = pci_read_config(ntb->device,
744			    XEON_PBAR45SZ_OFFSET, 1);
745
746		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
747		    bar->pci_resource, bar->pbase,
748		    bar->pbase + (1ul << bar_size_bits) - 1);
749		if (rc != 0) {
750			device_printf(ntb->device,
751			    "unable to resize bar\n");
752			return (rc);
753		}
754
755		save_bar_parameters(bar);
756	}
757
758	/* Mark bar region as write combining to improve performance. */
759	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
760	    VM_MEMATTR_WRITE_COMBINING);
761	print_map_success(ntb, bar, "mw");
762	if (rc == 0)
763		device_printf(ntb->device,
764		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
765		    "WRITE_COMBINING.\n",
766		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
767		    (char *)bar->vbase + bar->size - 1,
768		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1));
769	else
770		device_printf(ntb->device,
771		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
772		    "WRITE_COMBINING: %d\n",
773		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
774		    (char *)bar->vbase + bar->size - 1,
775		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
776		    rc);
777		/* Proceed anyway */
778	return (0);
779}
780
781static void
782ntb_unmap_pci_bar(struct ntb_softc *ntb)
783{
784	struct ntb_pci_bar_info *current_bar;
785	int i;
786
787	for (i = 0; i < NTB_MAX_BARS; i++) {
788		current_bar = &ntb->bar_info[i];
789		if (current_bar->pci_resource != NULL)
790			bus_release_resource(ntb->device, SYS_RES_MEMORY,
791			    current_bar->pci_resource_id,
792			    current_bar->pci_resource);
793	}
794}
795
796static int
797ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
798{
799	uint32_t i;
800	int rc;
801
802	for (i = 0; i < num_vectors; i++) {
803		ntb->int_info[i].rid = i + 1;
804		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
805		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
806		if (ntb->int_info[i].res == NULL) {
807			device_printf(ntb->device,
808			    "bus_alloc_resource failed\n");
809			return (ENOMEM);
810		}
811		ntb->int_info[i].tag = NULL;
812		ntb->allocated_interrupts++;
813		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
814		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
815		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
816		if (rc != 0) {
817			device_printf(ntb->device, "bus_setup_intr failed\n");
818			return (ENXIO);
819		}
820	}
821	return (0);
822}
823
824/*
825 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
826 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
827 * should be okay.  This tunable should enable us to test that hypothesis
828 * when someone gets their hands on some Xeon hardware.
829 */
830static int ntb_force_remap_mode;
831SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
832    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
833    " to a smaller number of ithreads, even if the desired number are "
834    "available");
835
836/*
837 * In case it is NOT ok, give consumers an abort button.
838 */
839static int ntb_prefer_intx;
840SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
841    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
842    "than remapping MSI-X messages over available slots (match Linux driver "
843    "behavior)");
844
845/*
846 * Remap the desired number of MSI-X messages to available ithreads in a simple
847 * round-robin fashion.
848 */
849static int
850ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
851{
852	u_int *vectors;
853	uint32_t i;
854	int rc;
855
856	if (ntb_prefer_intx != 0)
857		return (ENXIO);
858
859	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
860
861	for (i = 0; i < desired; i++)
862		vectors[i] = (i % avail) + 1;
863
864	rc = pci_remap_msix(dev, desired, vectors);
865	free(vectors, M_NTB);
866	return (rc);
867}
868
869static int
870ntb_init_isr(struct ntb_softc *ntb)
871{
872	uint32_t desired_vectors, num_vectors;
873	int rc;
874
875	ntb->allocated_interrupts = 0;
876	ntb->last_ts = ticks;
877
878	/*
879	 * Mask all doorbell interrupts.
880	 */
881	ntb_db_set_mask(ntb, ntb->db_valid_mask);
882
883	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
884	    ntb->db_count);
885	if (desired_vectors >= 1) {
886		rc = pci_alloc_msix(ntb->device, &num_vectors);
887
888		if (ntb_force_remap_mode != 0 && rc == 0 &&
889		    num_vectors == desired_vectors)
890			num_vectors--;
891
892		if (rc == 0 && num_vectors < desired_vectors) {
893			rc = ntb_remap_msix(ntb->device, desired_vectors,
894			    num_vectors);
895			if (rc == 0)
896				num_vectors = desired_vectors;
897			else
898				pci_release_msi(ntb->device);
899		}
900		if (rc != 0)
901			num_vectors = 1;
902	} else
903		num_vectors = 1;
904
905	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
906		ntb->db_vec_count = 1;
907		ntb->db_vec_shift = ntb->db_count;
908		rc = ntb_setup_legacy_interrupt(ntb);
909	} else {
910		ntb_create_msix_vec(ntb, num_vectors);
911		rc = ntb_setup_msix(ntb, num_vectors);
912	}
913	if (rc != 0) {
914		device_printf(ntb->device,
915		    "Error allocating interrupts: %d\n", rc);
916		ntb_free_msix_vec(ntb);
917	}
918
919	return (rc);
920}
921
922static int
923ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
924{
925	int rc;
926
927	ntb->int_info[0].rid = 0;
928	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
929	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
930	if (ntb->int_info[0].res == NULL) {
931		device_printf(ntb->device, "bus_alloc_resource failed\n");
932		return (ENOMEM);
933	}
934
935	ntb->int_info[0].tag = NULL;
936	ntb->allocated_interrupts = 1;
937
938	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
939	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
940	    ntb, &ntb->int_info[0].tag);
941	if (rc != 0) {
942		device_printf(ntb->device, "bus_setup_intr failed\n");
943		return (ENXIO);
944	}
945
946	return (0);
947}
948
949static void
950ntb_teardown_interrupts(struct ntb_softc *ntb)
951{
952	struct ntb_int_info *current_int;
953	int i;
954
955	for (i = 0; i < ntb->allocated_interrupts; i++) {
956		current_int = &ntb->int_info[i];
957		if (current_int->tag != NULL)
958			bus_teardown_intr(ntb->device, current_int->res,
959			    current_int->tag);
960
961		if (current_int->res != NULL)
962			bus_release_resource(ntb->device, SYS_RES_IRQ,
963			    rman_get_rid(current_int->res), current_int->res);
964	}
965
966	ntb_free_msix_vec(ntb);
967	pci_release_msi(ntb->device);
968}
969
970/*
971 * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
972 * out to make code clearer.
973 */
974static inline uint64_t
975db_ioread(struct ntb_softc *ntb, uint64_t regoff)
976{
977
978	if (ntb->type == NTB_ATOM)
979		return (ntb_reg_read(8, regoff));
980
981	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
982
983	return (ntb_reg_read(2, regoff));
984}
985
986static inline void
987db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
988{
989
990	KASSERT((val & ~ntb->db_valid_mask) == 0,
991	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
992	     (uintmax_t)(val & ~ntb->db_valid_mask),
993	     (uintmax_t)ntb->db_valid_mask));
994
995	if (regoff == ntb->self_reg->db_mask)
996		DB_MASK_ASSERT(ntb, MA_OWNED);
997
998	if (ntb->type == NTB_ATOM) {
999		ntb_reg_write(8, regoff, val);
1000		return;
1001	}
1002
1003	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
1004	ntb_reg_write(2, regoff, (uint16_t)val);
1005}
1006
1007void
1008ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
1009{
1010
1011	DB_MASK_LOCK(ntb);
1012	ntb->db_mask |= bits;
1013	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1014	DB_MASK_UNLOCK(ntb);
1015}
1016
1017void
1018ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
1019{
1020
1021	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1022	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1023	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1024	     (uintmax_t)ntb->db_valid_mask));
1025
1026	DB_MASK_LOCK(ntb);
1027	ntb->db_mask &= ~bits;
1028	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
1029	DB_MASK_UNLOCK(ntb);
1030}
1031
1032uint64_t
1033ntb_db_read(struct ntb_softc *ntb)
1034{
1035
1036	return (db_ioread(ntb, ntb->self_reg->db_bell));
1037}
1038
1039void
1040ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
1041{
1042
1043	KASSERT((bits & ~ntb->db_valid_mask) == 0,
1044	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
1045	     (uintmax_t)(bits & ~ntb->db_valid_mask),
1046	     (uintmax_t)ntb->db_valid_mask));
1047
1048	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
1049}
1050
1051static inline uint64_t
1052ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
1053{
1054	uint64_t shift, mask;
1055
1056	shift = ntb->db_vec_shift;
1057	mask = (1ull << shift) - 1;
1058	return (mask << (shift * db_vector));
1059}
1060
1061static void
1062ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
1063{
1064	uint64_t vec_mask;
1065
1066	ntb->last_ts = ticks;
1067	vec_mask = ntb_vec_mask(ntb, vec);
1068
1069	if ((vec_mask & ntb->db_link_mask) != 0) {
1070		if (ntb_poll_link(ntb))
1071			ntb_link_event(ntb);
1072	}
1073
1074	if ((vec_mask & ntb->db_valid_mask) != 0)
1075		ntb_db_event(ntb, vec);
1076}
1077
1078static void
1079ndev_vec_isr(void *arg)
1080{
1081	struct ntb_vec *nvec = arg;
1082
1083	ntb_interrupt(nvec->ntb, nvec->num);
1084}
1085
1086static void
1087ndev_irq_isr(void *arg)
1088{
1089	/* If we couldn't set up MSI-X, we only have the one vector. */
1090	ntb_interrupt(arg, 0);
1091}
1092
1093static int
1094ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
1095{
1096	uint32_t i;
1097
1098	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
1099	    M_ZERO | M_WAITOK);
1100	for (i = 0; i < num_vectors; i++) {
1101		ntb->msix_vec[i].num = i;
1102		ntb->msix_vec[i].ntb = ntb;
1103	}
1104
1105	return (0);
1106}
1107
1108static void
1109ntb_free_msix_vec(struct ntb_softc *ntb)
1110{
1111
1112	if (ntb->msix_vec == NULL)
1113		return;
1114
1115	free(ntb->msix_vec, M_NTB);
1116	ntb->msix_vec = NULL;
1117}
1118
1119static struct ntb_hw_info *
1120ntb_get_device_info(uint32_t device_id)
1121{
1122	struct ntb_hw_info *ep = pci_ids;
1123
1124	while (ep->device_id) {
1125		if (ep->device_id == device_id)
1126			return (ep);
1127		++ep;
1128	}
1129	return (NULL);
1130}
1131
1132static void
1133ntb_teardown_xeon(struct ntb_softc *ntb)
1134{
1135
1136	if (ntb->reg != NULL)
1137		ntb_link_disable(ntb);
1138}
1139
1140static void
1141ntb_detect_max_mw(struct ntb_softc *ntb)
1142{
1143
1144	if (ntb->type == NTB_ATOM) {
1145		ntb->mw_count = ATOM_MW_COUNT;
1146		return;
1147	}
1148
1149	if (HAS_FEATURE(NTB_SPLIT_BAR))
1150		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1151	else
1152		ntb->mw_count = XEON_SNB_MW_COUNT;
1153}
1154
1155static int
1156ntb_detect_xeon(struct ntb_softc *ntb)
1157{
1158	uint8_t ppd, conn_type;
1159
1160	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1161	ntb->ppd = ppd;
1162
1163	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1164		ntb->dev_type = NTB_DEV_USD;
1165	else
1166		ntb->dev_type = NTB_DEV_DSD;
1167
1168	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1169		ntb->features |= NTB_SPLIT_BAR;
1170
1171	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1172	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1173		ntb->features |= NTB_SDOORBELL_LOCKUP;
1174
1175	conn_type = ppd & XEON_PPD_CONN_TYPE;
1176	switch (conn_type) {
1177	case NTB_CONN_B2B:
1178		ntb->conn_type = conn_type;
1179		break;
1180	case NTB_CONN_RP:
1181	case NTB_CONN_TRANSPARENT:
1182	default:
1183		device_printf(ntb->device, "Unsupported connection type: %u\n",
1184		    (unsigned)conn_type);
1185		return (ENXIO);
1186	}
1187	return (0);
1188}
1189
1190static int
1191ntb_detect_atom(struct ntb_softc *ntb)
1192{
1193	uint32_t ppd, conn_type;
1194
1195	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1196	ntb->ppd = ppd;
1197
1198	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
1199		ntb->dev_type = NTB_DEV_DSD;
1200	else
1201		ntb->dev_type = NTB_DEV_USD;
1202
1203	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
1204	switch (conn_type) {
1205	case NTB_CONN_B2B:
1206		ntb->conn_type = conn_type;
1207		break;
1208	default:
1209		device_printf(ntb->device, "Unsupported NTB configuration\n");
1210		return (ENXIO);
1211	}
1212	return (0);
1213}
1214
1215static int
1216ntb_xeon_init_dev(struct ntb_softc *ntb)
1217{
1218	int rc;
1219
1220	ntb->spad_count		= XEON_SPAD_COUNT;
1221	ntb->db_count		= XEON_DB_COUNT;
1222	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1223	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1224	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1225
1226	if (ntb->conn_type != NTB_CONN_B2B) {
1227		device_printf(ntb->device, "Connection type %d not supported\n",
1228		    ntb->conn_type);
1229		return (ENXIO);
1230	}
1231
1232	ntb->reg = &xeon_reg;
1233	ntb->self_reg = &xeon_pri_reg;
1234	ntb->peer_reg = &xeon_b2b_reg;
1235	ntb->xlat_reg = &xeon_sec_xlat;
1236
1237	/*
1238	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1239	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1240	 * which may hang the system.  To workaround this use the second memory
1241	 * window to access the interrupt and scratch pad registers on the
1242	 * remote system.
1243	 */
1244	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1245		/* Use the last MW for mapping remote spad */
1246		ntb->b2b_mw_idx = ntb->mw_count - 1;
1247	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1248		/*
1249		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1250		 * mirrored to the remote system.  Shrink the number of bits by one,
1251		 * since bit 14 is the last bit.
1252		 *
1253		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1254		 * anyway.  Nor for non-B2B connection types.
1255		 */
1256		ntb->db_count = XEON_DB_COUNT - 1;
1257
1258	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1259
1260	if (ntb->dev_type == NTB_DEV_USD)
1261		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1262		    &xeon_b2b_usd_addr);
1263	else
1264		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1265		    &xeon_b2b_dsd_addr);
1266	if (rc != 0)
1267		return (rc);
1268
1269	/* Enable Bus Master and Memory Space on the secondary side */
1270	ntb_reg_write(2, XEON_PCICMD_OFFSET,
1271	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1272
1273	/* Enable link training */
1274	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1275
1276	return (0);
1277}
1278
1279static int
1280ntb_atom_init_dev(struct ntb_softc *ntb)
1281{
1282
1283	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1284	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1285
1286	ntb->spad_count		 = ATOM_SPAD_COUNT;
1287	ntb->db_count		 = ATOM_DB_COUNT;
1288	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
1289	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
1290	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1291
1292	ntb->reg = &atom_reg;
1293	ntb->self_reg = &atom_pri_reg;
1294	ntb->peer_reg = &atom_b2b_reg;
1295	ntb->xlat_reg = &atom_sec_xlat;
1296
1297	/*
1298	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
1299	 * resolved.  Mask transaction layer internal parity errors.
1300	 */
1301	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1302
1303	configure_atom_secondary_side_bars(ntb);
1304
1305	/* Enable Bus Master and Memory Space on the secondary side */
1306	ntb_reg_write(2, ATOM_PCICMD_OFFSET,
1307	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1308
1309	/* Initiate PCI-E link training */
1310	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
1311
1312	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
1313
1314	return (0);
1315}
1316
1317/* XXX: Linux driver doesn't seem to do any of this for Atom. */
1318static void
1319configure_atom_secondary_side_bars(struct ntb_softc *ntb)
1320{
1321
1322	if (ntb->dev_type == NTB_DEV_USD) {
1323		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1324		    XEON_B2B_BAR2_DSD_ADDR64);
1325		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1326		    XEON_B2B_BAR4_DSD_ADDR64);
1327		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1328		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1329	} else {
1330		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
1331		    XEON_B2B_BAR2_USD_ADDR64);
1332		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
1333		    XEON_B2B_BAR4_USD_ADDR64);
1334		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1335		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1336	}
1337}
1338
1339
1340/*
1341 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1342 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1343 * remains for use by a higher layer.
1344 *
1345 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1346 * MW size is sufficiently large.
1347 */
1348static unsigned int ntb_b2b_mw_share;
1349SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1350    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1351    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1352    "value here.");
1353
1354static void
1355xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1356    enum ntb_bar regbar)
1357{
1358	struct ntb_pci_bar_info *bar;
1359	uint8_t bar_sz;
1360
1361	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1362		return;
1363
1364	bar = &ntb->bar_info[idx];
1365	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1366	if (idx == regbar) {
1367		if (ntb->b2b_off != 0)
1368			bar_sz--;
1369		else
1370			bar_sz = 0;
1371	}
1372	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1373	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1374	(void)bar_sz;
1375}
1376
1377static void
1378xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
1379    enum ntb_bar idx, enum ntb_bar regbar)
1380{
1381	uint64_t reg_val;
1382	uint32_t base_reg, lmt_reg;
1383
1384	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
1385	if (idx == regbar)
1386		bar_addr += ntb->b2b_off;
1387
1388	if (!bar_is_64bit(ntb, idx)) {
1389		ntb_reg_write(4, base_reg, bar_addr);
1390		reg_val = ntb_reg_read(4, base_reg);
1391		(void)reg_val;
1392
1393		ntb_reg_write(4, lmt_reg, bar_addr);
1394		reg_val = ntb_reg_read(4, lmt_reg);
1395		(void)reg_val;
1396	} else {
1397		ntb_reg_write(8, base_reg, bar_addr);
1398		reg_val = ntb_reg_read(8, base_reg);
1399		(void)reg_val;
1400
1401		ntb_reg_write(8, lmt_reg, bar_addr);
1402		reg_val = ntb_reg_read(8, lmt_reg);
1403		(void)reg_val;
1404	}
1405}
1406
1407static void
1408xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1409{
1410	struct ntb_pci_bar_info *bar;
1411
1412	bar = &ntb->bar_info[idx];
1413	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1414		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1415		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1416	} else {
1417		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1418		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1419	}
1420	(void)base_addr;
1421}
1422
1423static int
1424xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1425    const struct ntb_b2b_addr *peer_addr)
1426{
1427	struct ntb_pci_bar_info *b2b_bar;
1428	vm_size_t bar_size;
1429	uint64_t bar_addr;
1430	enum ntb_bar b2b_bar_num, i;
1431
1432	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1433		b2b_bar = NULL;
1434		b2b_bar_num = NTB_CONFIG_BAR;
1435		ntb->b2b_off = 0;
1436	} else {
1437		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1438		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1439		    ("invalid b2b mw bar"));
1440
1441		b2b_bar = &ntb->bar_info[b2b_bar_num];
1442		bar_size = b2b_bar->size;
1443
1444		if (ntb_b2b_mw_share != 0 &&
1445		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1446			ntb->b2b_off = bar_size >> 1;
1447		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1448			ntb->b2b_off = 0;
1449			ntb->mw_count--;
1450		} else {
1451			device_printf(ntb->device,
1452			    "B2B bar size is too small!\n");
1453			return (EIO);
1454		}
1455	}
1456
1457	/*
1458	 * Reset the secondary bar sizes to match the primary bar sizes.
1459	 * (Except, disable or halve the size of the B2B secondary bar.)
1460	 */
1461	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1462		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1463
1464	bar_addr = 0;
1465	if (b2b_bar_num == NTB_CONFIG_BAR)
1466		bar_addr = addr->bar0_addr;
1467	else if (b2b_bar_num == NTB_B2B_BAR_1)
1468		bar_addr = addr->bar2_addr64;
1469	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1470		bar_addr = addr->bar4_addr64;
1471	else if (b2b_bar_num == NTB_B2B_BAR_2)
1472		bar_addr = addr->bar4_addr32;
1473	else if (b2b_bar_num == NTB_B2B_BAR_3)
1474		bar_addr = addr->bar5_addr32;
1475	else
1476		KASSERT(false, ("invalid bar"));
1477
1478	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1479
1480	/*
1481	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1482	 * register BAR.  The B2B BAR is either disabled above or configured
1483	 * half-size.  It starts at PBAR xlat + offset.
1484	 *
1485	 * Also set up incoming BAR limits == base (zero length window).
1486	 */
1487	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1488	    b2b_bar_num);
1489	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1490		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1491		    NTB_B2B_BAR_2, b2b_bar_num);
1492		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1493		    NTB_B2B_BAR_3, b2b_bar_num);
1494	} else
1495		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1496		    NTB_B2B_BAR_2, b2b_bar_num);
1497
1498	/* Zero incoming translation addrs */
1499	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1500	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1501
1502	/* Zero outgoing translation limits (whole bar size windows) */
1503	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1504	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1505
1506	/* Set outgoing translation offsets */
1507	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1508	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1509		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1510		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1511	} else
1512		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1513
1514	/* Set the translation offset for B2B registers */
1515	bar_addr = 0;
1516	if (b2b_bar_num == NTB_CONFIG_BAR)
1517		bar_addr = peer_addr->bar0_addr;
1518	else if (b2b_bar_num == NTB_B2B_BAR_1)
1519		bar_addr = peer_addr->bar2_addr64;
1520	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1521		bar_addr = peer_addr->bar4_addr64;
1522	else if (b2b_bar_num == NTB_B2B_BAR_2)
1523		bar_addr = peer_addr->bar4_addr32;
1524	else if (b2b_bar_num == NTB_B2B_BAR_3)
1525		bar_addr = peer_addr->bar5_addr32;
1526	else
1527		KASSERT(false, ("invalid bar"));
1528
1529	/*
1530	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1531	 * at a time.
1532	 */
1533	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1534	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1535	return (0);
1536}
1537
1538static inline bool
1539link_is_up(struct ntb_softc *ntb)
1540{
1541
1542	if (ntb->type == NTB_XEON) {
1543		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
1544			return (true);
1545		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
1546	}
1547
1548	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1549	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
1550}
1551
1552static inline bool
1553atom_link_is_err(struct ntb_softc *ntb)
1554{
1555	uint32_t status;
1556
1557	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
1558
1559	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1560	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
1561		return (true);
1562
1563	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1564	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
1565}
1566
1567/* Atom does not have link status interrupt, poll on that platform */
1568static void
1569atom_link_hb(void *arg)
1570{
1571	struct ntb_softc *ntb = arg;
1572	sbintime_t timo, poll_ts;
1573
1574	timo = NTB_HB_TIMEOUT * hz;
1575	poll_ts = ntb->last_ts + timo;
1576
1577	/*
1578	 * Delay polling the link status if an interrupt was received, unless
1579	 * the cached link status says the link is down.
1580	 */
1581	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
1582		timo = poll_ts - ticks;
1583		goto out;
1584	}
1585
1586	if (ntb_poll_link(ntb))
1587		ntb_link_event(ntb);
1588
1589	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
1590		/* Link is down with error, proceed with recovery */
1591		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
1592		return;
1593	}
1594
1595out:
1596	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
1597}
1598
1599static void
1600atom_perform_link_restart(struct ntb_softc *ntb)
1601{
1602	uint32_t status;
1603
1604	/* Driver resets the NTB ModPhy lanes - magic! */
1605	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
1606	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
1607	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
1608	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
1609
1610	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1611	pause("ModPhy", hz / 10);
1612
1613	/* Clear AER Errors, write to clear */
1614	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
1615	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1616	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
1617
1618	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1619	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
1620	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
1621	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
1622
1623	/* Clear DeSkew Buffer error, write to clear */
1624	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
1625	status |= ATOM_DESKEWSTS_DBERR;
1626	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
1627
1628	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
1629	status &= ATOM_IBIST_ERR_OFLOW;
1630	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
1631
1632	/* Releases the NTB state machine to allow the link to retrain */
1633	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
1634	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
1635	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
1636}
1637
1638/*
1639 * ntb_set_ctx() - associate a driver context with an ntb device
1640 * @ntb:        NTB device context
1641 * @ctx:        Driver context
1642 * @ctx_ops:    Driver context operations
1643 *
1644 * Associate a driver context and operations with a ntb device.  The context is
1645 * provided by the client driver, and the driver may associate a different
1646 * context with each ntb device.
1647 *
1648 * Return: Zero if the context is associated, otherwise an error number.
1649 */
1650int
1651ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
1652{
1653
1654	if (ctx == NULL || ops == NULL)
1655		return (EINVAL);
1656	if (ntb->ctx_ops != NULL)
1657		return (EINVAL);
1658
1659	CTX_LOCK(ntb);
1660	if (ntb->ctx_ops != NULL) {
1661		CTX_UNLOCK(ntb);
1662		return (EINVAL);
1663	}
1664	ntb->ntb_ctx = ctx;
1665	ntb->ctx_ops = ops;
1666	CTX_UNLOCK(ntb);
1667
1668	return (0);
1669}
1670
1671/*
1672 * It is expected that this will only be used from contexts where the ctx_lock
1673 * is not needed to protect ntb_ctx lifetime.
1674 */
1675void *
1676ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
1677{
1678
1679	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
1680	if (ops != NULL)
1681		*ops = ntb->ctx_ops;
1682	return (ntb->ntb_ctx);
1683}
1684
1685/*
1686 * ntb_clear_ctx() - disassociate any driver context from an ntb device
1687 * @ntb:        NTB device context
1688 *
1689 * Clear any association that may exist between a driver context and the ntb
1690 * device.
1691 */
1692void
1693ntb_clear_ctx(struct ntb_softc *ntb)
1694{
1695
1696	CTX_LOCK(ntb);
1697	ntb->ntb_ctx = NULL;
1698	ntb->ctx_ops = NULL;
1699	CTX_UNLOCK(ntb);
1700}
1701
1702/*
1703 * ntb_link_event() - notify driver context of a change in link status
1704 * @ntb:        NTB device context
1705 *
1706 * Notify the driver context that the link status may have changed.  The driver
1707 * should call ntb_link_is_up() to get the current status.
1708 */
1709void
1710ntb_link_event(struct ntb_softc *ntb)
1711{
1712
1713	CTX_LOCK(ntb);
1714	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
1715		ntb->ctx_ops->link_event(ntb->ntb_ctx);
1716	CTX_UNLOCK(ntb);
1717}
1718
1719/*
1720 * ntb_db_event() - notify driver context of a doorbell event
1721 * @ntb:        NTB device context
1722 * @vector:     Interrupt vector number
1723 *
1724 * Notify the driver context of a doorbell event.  If hardware supports
1725 * multiple interrupt vectors for doorbells, the vector number indicates which
1726 * vector received the interrupt.  The vector number is relative to the first
1727 * vector used for doorbells, starting at zero, and must be less than
1728 * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
1729 * doorbell bits need service, and ntb_db_vector_mask() to determine which of
1730 * those bits are associated with the vector number.
1731 */
1732static void
1733ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
1734{
1735
1736	CTX_LOCK(ntb);
1737	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
1738		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
1739	CTX_UNLOCK(ntb);
1740}
1741
1742/*
1743 * ntb_link_enable() - enable the link on the secondary side of the ntb
1744 * @ntb:        NTB device context
1745 * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
1746 * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
1747 *
1748 * Enable the link on the secondary side of the ntb.  This can only be done
1749 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1750 * should train the link to its maximum speed and width, or the requested speed
1751 * and width, whichever is smaller, if supported.
1752 *
1753 * Return: Zero on success, otherwise an error number.
1754 *
1755 * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
1756 *      and width input will be ignored.
1757 */
1758int
1759ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
1760    enum ntb_width w __unused)
1761{
1762	uint32_t cntl;
1763
1764	if (ntb->type == NTB_ATOM) {
1765		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1766		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
1767		return (0);
1768	}
1769
1770	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1771		ntb_link_event(ntb);
1772		return (0);
1773	}
1774
1775	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1776	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1777	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1778	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1779	if (HAS_FEATURE(NTB_SPLIT_BAR))
1780		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1781	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1782	return (0);
1783}
1784
1785/*
1786 * ntb_link_disable() - disable the link on the secondary side of the ntb
1787 * @ntb:        NTB device context
1788 *
1789 * Disable the link on the secondary side of the ntb.  This can only be done
1790 * from the primary side of the ntb in primary or b2b topology.  The ntb device
1791 * should disable the link.  Returning from this call must indicate that a
1792 * barrier has passed, though with no more writes may pass in either direction
1793 * across the link, except if this call returns an error number.
1794 *
1795 * Return: Zero on success, otherwise an error number.
1796 */
1797int
1798ntb_link_disable(struct ntb_softc *ntb)
1799{
1800	uint32_t cntl;
1801
1802	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1803		ntb_link_event(ntb);
1804		return (0);
1805	}
1806
1807	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1808	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1809	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1810	if (HAS_FEATURE(NTB_SPLIT_BAR))
1811		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1812	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1813	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1814	return (0);
1815}
1816
1817static void
1818recover_atom_link(void *arg)
1819{
1820	struct ntb_softc *ntb = arg;
1821	unsigned speed, width, oldspeed, oldwidth;
1822	uint32_t status32;
1823
1824	atom_perform_link_restart(ntb);
1825
1826	/*
1827	 * There is a potential race between the 2 NTB devices recovering at
1828	 * the same time.  If the times are the same, the link will not recover
1829	 * and the driver will be stuck in this loop forever.  Add a random
1830	 * interval to the recovery time to prevent this race.
1831	 */
1832	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
1833	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
1834
1835	if (atom_link_is_err(ntb))
1836		goto retry;
1837
1838	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1839	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
1840		goto out;
1841
1842	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1843	width = NTB_LNK_STA_WIDTH(status32);
1844	speed = status32 & NTB_LINK_SPEED_MASK;
1845
1846	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
1847	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
1848	if (oldwidth != width || oldspeed != speed)
1849		goto retry;
1850
1851out:
1852	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
1853	    ntb);
1854	return;
1855
1856retry:
1857	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
1858	    ntb);
1859}
1860
1861/*
1862 * Polls the HW link status register(s); returns true if something has changed.
1863 */
1864static bool
1865ntb_poll_link(struct ntb_softc *ntb)
1866{
1867	uint32_t ntb_cntl;
1868	uint16_t reg_val;
1869
1870	if (ntb->type == NTB_ATOM) {
1871		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1872		if (ntb_cntl == ntb->ntb_ctl)
1873			return (false);
1874
1875		ntb->ntb_ctl = ntb_cntl;
1876		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1877	} else {
1878		db_iowrite(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
1879
1880		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1881		if (reg_val == ntb->lnk_sta)
1882			return (false);
1883
1884		ntb->lnk_sta = reg_val;
1885	}
1886	return (true);
1887}
1888
1889static inline enum ntb_speed
1890ntb_link_sta_speed(struct ntb_softc *ntb)
1891{
1892
1893	if (!link_is_up(ntb))
1894		return (NTB_SPEED_NONE);
1895	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
1896}
1897
1898static inline enum ntb_width
1899ntb_link_sta_width(struct ntb_softc *ntb)
1900{
1901
1902	if (!link_is_up(ntb))
1903		return (NTB_WIDTH_NONE);
1904	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
1905}
1906
1907SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
1908    "Driver state, statistics, and HW registers");
1909
1910#define NTB_REGSZ_MASK	(3ul << 30)
1911#define NTB_REG_64	(1ul << 30)
1912#define NTB_REG_32	(2ul << 30)
1913#define NTB_REG_16	(3ul << 30)
1914#define NTB_REG_8	(0ul << 30)
1915
1916#define NTB_DB_READ	(1ul << 29)
1917#define NTB_PCI_REG	(1ul << 28)
1918#define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
1919
1920static void
1921ntb_sysctl_init(struct ntb_softc *ntb)
1922{
1923	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
1924	struct sysctl_ctx_list *ctx;
1925	struct sysctl_oid *tree, *tmptree;
1926
1927	ctx = device_get_sysctl_ctx(ntb->device);
1928
1929	tree = SYSCTL_ADD_NODE(ctx,
1930	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
1931	    "debug_info", CTLFLAG_RD, NULL,
1932	    "Driver state, statistics, and HW registers");
1933	tree_par = SYSCTL_CHILDREN(tree);
1934
1935	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
1936	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
1937	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
1938	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
1939
1940	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
1941		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
1942		    &ntb->b2b_mw_idx, 0,
1943		    "Index of the MW used for B2B remote register access");
1944		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
1945		    CTLFLAG_RD, &ntb->b2b_off,
1946		    "If non-zero, offset of B2B register region in shared MW");
1947	}
1948
1949	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
1950	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
1951	    "Features/errata of this NTB device");
1952
1953	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
1954	    &ntb->ntb_ctl, 0, "NTB CTL register (cached)");
1955	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
1956	    &ntb->lnk_sta, 0, "LNK STA register (cached)");
1957
1958	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
1959	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
1960	    "A", "Link status");
1961
1962	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
1963	    &ntb->mw_count, 0, "MW count (excl. non-shared B2B register BAR)");
1964	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
1965	    &ntb->spad_count, 0, "Scratchpad count");
1966	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
1967	    &ntb->db_count, 0, "Doorbell count");
1968	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
1969	    &ntb->db_vec_count, 0, "Doorbell vector count");
1970	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
1971	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
1972
1973	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
1974	    &ntb->db_valid_mask, "Doorbell valid mask");
1975	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
1976	    &ntb->db_link_mask, "Doorbell link mask");
1977	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
1978	    &ntb->db_mask, "Doorbell mask (cached)");
1979
1980	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
1981	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
1982	regpar = SYSCTL_CHILDREN(tmptree);
1983
1984	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
1985	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1986	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
1987	    sysctl_handle_register, "QU", "Doorbell mask register");
1988	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
1989	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1990	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
1991	    sysctl_handle_register, "QU", "Doorbell register");
1992
1993	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
1994	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
1995	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
1996	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
1997	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1998		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
1999		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2000		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
2001		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
2002		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
2003		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2004		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
2005		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
2006	} else {
2007		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
2008		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2009		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
2010		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
2011	}
2012
2013	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
2014	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2015	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
2016	    sysctl_handle_register, "QU", "Incoming LMT23 register");
2017	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2018		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
2019		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2020		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
2021		    sysctl_handle_register, "IU", "Incoming LMT4 register");
2022		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
2023		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2024		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
2025		    sysctl_handle_register, "IU", "Incoming LMT5 register");
2026	} else {
2027		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
2028		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2029		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
2030		    sysctl_handle_register, "QU", "Incoming LMT45 register");
2031	}
2032
2033	if (ntb->type == NTB_ATOM)
2034		return;
2035
2036	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
2037	    CTLFLAG_RD, NULL, "Xeon HW statistics");
2038	statpar = SYSCTL_CHILDREN(tmptree);
2039	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
2040	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2041	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
2042	    sysctl_handle_register, "SU", "Upstream Memory Miss");
2043
2044	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
2045	    CTLFLAG_RD, NULL, "Xeon HW errors");
2046	errpar = SYSCTL_CHILDREN(tmptree);
2047
2048	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "devsts",
2049	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2050	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
2051	    sysctl_handle_register, "SU", "DEVSTS");
2052	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "lnksts",
2053	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2054	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
2055	    sysctl_handle_register, "SU", "LNKSTS");
2056	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
2057	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2058	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
2059	    sysctl_handle_register, "IU", "UNCERRSTS");
2060	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
2061	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2062	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
2063	    sysctl_handle_register, "IU", "CORERRSTS");
2064
2065	if (ntb->conn_type != NTB_CONN_B2B)
2066		return;
2067
2068	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
2069	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2070	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
2071	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
2072	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2073		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
2074		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2075		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2076		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
2077		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
2078		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2079		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
2080		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
2081	} else {
2082		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
2083		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2084		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
2085		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
2086	}
2087
2088	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
2089	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2090	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
2091	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
2092	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2093		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
2094		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2095		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
2096		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
2097		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
2098		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2099		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
2100		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
2101	} else {
2102		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
2103		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2104		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
2105		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
2106	}
2107
2108	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
2109	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2110	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
2111	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
2112	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
2113	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2114	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
2115	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
2116	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
2117		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
2118		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2119		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
2120		    sysctl_handle_register, "IU",
2121		    "Secondary BAR4 base register");
2122		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
2123		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2124		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
2125		    sysctl_handle_register, "IU",
2126		    "Secondary BAR5 base register");
2127	} else {
2128		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
2129		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
2130		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
2131		    sysctl_handle_register, "QU",
2132		    "Secondary BAR45 base register");
2133	}
2134}
2135
2136static int
2137sysctl_handle_features(SYSCTL_HANDLER_ARGS)
2138{
2139	struct ntb_softc *ntb;
2140	struct sbuf sb;
2141	int error;
2142
2143	error = 0;
2144	ntb = arg1;
2145
2146	sbuf_new_for_sysctl(&sb, NULL, 256, req);
2147
2148	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
2149	error = sbuf_finish(&sb);
2150	sbuf_delete(&sb);
2151
2152	if (error || !req->newptr)
2153		return (error);
2154	return (EINVAL);
2155}
2156
2157static int
2158sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
2159{
2160	struct ntb_softc *ntb;
2161	struct sbuf sb;
2162	enum ntb_speed speed;
2163	enum ntb_width width;
2164	int error;
2165
2166	error = 0;
2167	ntb = arg1;
2168
2169	sbuf_new_for_sysctl(&sb, NULL, 32, req);
2170
2171	if (ntb_link_is_up(ntb, &speed, &width))
2172		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
2173		    (unsigned)speed, (unsigned)width);
2174	else
2175		sbuf_printf(&sb, "down");
2176
2177	error = sbuf_finish(&sb);
2178	sbuf_delete(&sb);
2179
2180	if (error || !req->newptr)
2181		return (error);
2182	return (EINVAL);
2183}
2184
2185static int
2186sysctl_handle_register(SYSCTL_HANDLER_ARGS)
2187{
2188	struct ntb_softc *ntb;
2189	const void *outp;
2190	uintptr_t sz;
2191	uint64_t umv;
2192	char be[sizeof(umv)];
2193	size_t outsz;
2194	uint32_t reg;
2195	bool db, pci;
2196	int error;
2197
2198	ntb = arg1;
2199	reg = arg2 & ~NTB_REGFLAGS_MASK;
2200	sz = arg2 & NTB_REGSZ_MASK;
2201	db = (arg2 & NTB_DB_READ) != 0;
2202	pci = (arg2 & NTB_PCI_REG) != 0;
2203
2204	KASSERT(!(db && pci), ("bogus"));
2205
2206	if (db) {
2207		KASSERT(sz == NTB_REG_64, ("bogus"));
2208		umv = db_ioread(ntb, reg);
2209		outsz = sizeof(uint64_t);
2210	} else {
2211		switch (sz) {
2212		case NTB_REG_64:
2213			if (pci)
2214				umv = pci_read_config(ntb->device, reg, 8);
2215			else
2216				umv = ntb_reg_read(8, reg);
2217			outsz = sizeof(uint64_t);
2218			break;
2219		case NTB_REG_32:
2220			if (pci)
2221				umv = pci_read_config(ntb->device, reg, 4);
2222			else
2223				umv = ntb_reg_read(4, reg);
2224			outsz = sizeof(uint32_t);
2225			break;
2226		case NTB_REG_16:
2227			if (pci)
2228				umv = pci_read_config(ntb->device, reg, 2);
2229			else
2230				umv = ntb_reg_read(2, reg);
2231			outsz = sizeof(uint16_t);
2232			break;
2233		case NTB_REG_8:
2234			if (pci)
2235				umv = pci_read_config(ntb->device, reg, 1);
2236			else
2237				umv = ntb_reg_read(1, reg);
2238			outsz = sizeof(uint8_t);
2239			break;
2240		default:
2241			panic("bogus");
2242			break;
2243		}
2244	}
2245
2246	/* Encode bigendian so that sysctl -x is legible. */
2247	be64enc(be, umv);
2248	outp = ((char *)be) + sizeof(umv) - outsz;
2249
2250	error = SYSCTL_OUT(req, outp, outsz);
2251	if (error || !req->newptr)
2252		return (error);
2253	return (EINVAL);
2254}
2255
2256/*
2257 * Public API to the rest of the OS
2258 */
2259
2260/**
2261 * ntb_get_max_spads() - get the total scratch regs usable
2262 * @ntb: pointer to ntb_softc instance
2263 *
2264 * This function returns the max 32bit scratchpad registers usable by the
2265 * upper layer.
2266 *
2267 * RETURNS: total number of scratch pad registers available
2268 */
2269uint8_t
2270ntb_get_max_spads(struct ntb_softc *ntb)
2271{
2272
2273	return (ntb->spad_count);
2274}
2275
2276uint8_t
2277ntb_mw_count(struct ntb_softc *ntb)
2278{
2279
2280	return (ntb->mw_count);
2281}
2282
2283/**
2284 * ntb_spad_write() - write to the secondary scratchpad register
2285 * @ntb: pointer to ntb_softc instance
2286 * @idx: index to the scratchpad register, 0 based
2287 * @val: the data value to put into the register
2288 *
2289 * This function allows writing of a 32bit value to the indexed scratchpad
2290 * register. The register resides on the secondary (external) side.
2291 *
2292 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2293 */
2294int
2295ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2296{
2297
2298	if (idx >= ntb->spad_count)
2299		return (EINVAL);
2300
2301	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
2302
2303	return (0);
2304}
2305
2306/**
2307 * ntb_spad_read() - read from the primary scratchpad register
2308 * @ntb: pointer to ntb_softc instance
2309 * @idx: index to scratchpad register, 0 based
2310 * @val: pointer to 32bit integer for storing the register value
2311 *
2312 * This function allows reading of the 32bit scratchpad register on
2313 * the primary (internal) side.
2314 *
2315 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2316 */
2317int
2318ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2319{
2320
2321	if (idx >= ntb->spad_count)
2322		return (EINVAL);
2323
2324	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
2325
2326	return (0);
2327}
2328
2329/**
2330 * ntb_peer_spad_write() - write to the secondary scratchpad register
2331 * @ntb: pointer to ntb_softc instance
2332 * @idx: index to the scratchpad register, 0 based
2333 * @val: the data value to put into the register
2334 *
2335 * This function allows writing of a 32bit value to the indexed scratchpad
2336 * register. The register resides on the secondary (external) side.
2337 *
2338 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2339 */
2340int
2341ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
2342{
2343
2344	if (idx >= ntb->spad_count)
2345		return (EINVAL);
2346
2347	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2348		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
2349	else
2350		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
2351
2352	return (0);
2353}
2354
2355/**
2356 * ntb_peer_spad_read() - read from the primary scratchpad register
2357 * @ntb: pointer to ntb_softc instance
2358 * @idx: index to scratchpad register, 0 based
2359 * @val: pointer to 32bit integer for storing the register value
2360 *
2361 * This function allows reading of the 32bit scratchpad register on
2362 * the primary (internal) side.
2363 *
2364 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
2365 */
2366int
2367ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
2368{
2369
2370	if (idx >= ntb->spad_count)
2371		return (EINVAL);
2372
2373	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
2374		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
2375	else
2376		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
2377
2378	return (0);
2379}
2380
2381/*
2382 * ntb_mw_get_range() - get the range of a memory window
2383 * @ntb:        NTB device context
2384 * @idx:        Memory window number
2385 * @base:       OUT - the base address for mapping the memory window
2386 * @size:       OUT - the size for mapping the memory window
2387 * @align:      OUT - the base alignment for translating the memory window
2388 * @align_size: OUT - the size alignment for translating the memory window
2389 *
2390 * Get the range of a memory window.  NULL may be given for any output
2391 * parameter if the value is not needed.  The base and size may be used for
2392 * mapping the memory window, to access the peer memory.  The alignment and
2393 * size may be used for translating the memory window, for the peer to access
2394 * memory on the local system.
2395 *
2396 * Return: Zero on success, otherwise an error number.
2397 */
2398int
2399ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
2400    void **vbase, size_t *size, size_t *align, size_t *align_size)
2401{
2402	struct ntb_pci_bar_info *bar;
2403	size_t bar_b2b_off;
2404
2405	if (mw_idx >= ntb_mw_count(ntb))
2406		return (EINVAL);
2407
2408	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, mw_idx)];
2409	bar_b2b_off = 0;
2410	if (mw_idx == ntb->b2b_mw_idx) {
2411		KASSERT(ntb->b2b_off != 0,
2412		    ("user shouldn't get non-shared b2b mw"));
2413		bar_b2b_off = ntb->b2b_off;
2414	}
2415
2416	if (base != NULL)
2417		*base = bar->pbase + bar_b2b_off;
2418	if (vbase != NULL)
2419		*vbase = (char *)bar->vbase + bar_b2b_off;
2420	if (size != NULL)
2421		*size = bar->size - bar_b2b_off;
2422	if (align != NULL)
2423		*align = bar->size;
2424	if (align_size != NULL)
2425		*align_size = 1;
2426	return (0);
2427}
2428
2429/*
2430 * ntb_mw_set_trans() - set the translation of a memory window
2431 * @ntb:        NTB device context
2432 * @idx:        Memory window number
2433 * @addr:       The dma address local memory to expose to the peer
2434 * @size:       The size of the local memory to expose to the peer
2435 *
2436 * Set the translation of a memory window.  The peer may access local memory
2437 * through the window starting at the address, up to the size.  The address
2438 * must be aligned to the alignment specified by ntb_mw_get_range().  The size
2439 * must be aligned to the size alignment specified by ntb_mw_get_range().
2440 *
2441 * Return: Zero on success, otherwise an error number.
2442 */
2443int
2444ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
2445    size_t size)
2446{
2447	struct ntb_pci_bar_info *bar;
2448	uint64_t base, limit, reg_val;
2449	size_t bar_size, mw_size;
2450	uint32_t base_reg, xlat_reg, limit_reg;
2451	enum ntb_bar bar_num;
2452
2453	if (idx >= ntb_mw_count(ntb))
2454		return (EINVAL);
2455
2456	bar_num = ntb_mw_to_bar(ntb, idx);
2457	bar = &ntb->bar_info[bar_num];
2458
2459	bar_size = bar->size;
2460	if (idx == ntb->b2b_mw_idx)
2461		mw_size = bar_size - ntb->b2b_off;
2462	else
2463		mw_size = bar_size;
2464
2465	/* Hardware requires that addr is aligned to bar size */
2466	if ((addr & (bar_size - 1)) != 0)
2467		return (EINVAL);
2468
2469	if (size > mw_size)
2470		return (EINVAL);
2471
2472	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
2473
2474	limit = 0;
2475	if (bar_is_64bit(ntb, bar_num)) {
2476		base = ntb_reg_read(8, base_reg);
2477
2478		if (limit_reg != 0 && size != mw_size)
2479			limit = base + size;
2480
2481		/* Set and verify translation address */
2482		ntb_reg_write(8, xlat_reg, addr);
2483		reg_val = ntb_reg_read(8, xlat_reg);
2484		if (reg_val != addr) {
2485			ntb_reg_write(8, xlat_reg, 0);
2486			return (EIO);
2487		}
2488
2489		/* Set and verify the limit */
2490		ntb_reg_write(8, limit_reg, limit);
2491		reg_val = ntb_reg_read(8, limit_reg);
2492		if (reg_val != limit) {
2493			ntb_reg_write(8, limit_reg, base);
2494			ntb_reg_write(8, xlat_reg, 0);
2495			return (EIO);
2496		}
2497	} else {
2498		/* Configure 32-bit (split) BAR MW */
2499
2500		if ((addr & ~UINT32_MAX) != 0)
2501			return (EINVAL);
2502		if (((addr + size) & ~UINT32_MAX) != 0)
2503			return (EINVAL);
2504
2505		base = ntb_reg_read(4, base_reg);
2506
2507		if (limit_reg != 0 && size != mw_size)
2508			limit = base + size;
2509
2510		/* Set and verify translation address */
2511		ntb_reg_write(4, xlat_reg, addr);
2512		reg_val = ntb_reg_read(4, xlat_reg);
2513		if (reg_val != addr) {
2514			ntb_reg_write(4, xlat_reg, 0);
2515			return (EIO);
2516		}
2517
2518		/* Set and verify the limit */
2519		ntb_reg_write(4, limit_reg, limit);
2520		reg_val = ntb_reg_read(4, limit_reg);
2521		if (reg_val != limit) {
2522			ntb_reg_write(4, limit_reg, base);
2523			ntb_reg_write(4, xlat_reg, 0);
2524			return (EIO);
2525		}
2526	}
2527	return (0);
2528}
2529
2530/*
2531 * ntb_mw_clear_trans() - clear the translation of a memory window
2532 * @ntb:	NTB device context
2533 * @idx:	Memory window number
2534 *
2535 * Clear the translation of a memory window.  The peer may no longer access
2536 * local memory through the window.
2537 *
2538 * Return: Zero on success, otherwise an error number.
2539 */
2540int
2541ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
2542{
2543
2544	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
2545}
2546
2547/**
2548 * ntb_peer_db_set() - Set the doorbell on the secondary/external side
2549 * @ntb: pointer to ntb_softc instance
2550 * @bit: doorbell bits to ring
2551 *
2552 * This function allows triggering of a doorbell on the secondary/external
2553 * side that will initiate an interrupt on the remote host
2554 */
2555void
2556ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
2557{
2558
2559	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2560		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
2561		return;
2562	}
2563
2564	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
2565}
2566
2567/*
2568 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2569 * as well as the size of the register (via *sz_out).
2570 *
2571 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2572 * ring to its memory window write.
2573 *
2574 * Note that writing the peer doorbell via a memory window will *not* generate
2575 * an interrupt on the remote host; that must be done seperately.
2576 */
2577bus_addr_t
2578ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2579{
2580	struct ntb_pci_bar_info *bar;
2581	uint64_t regoff;
2582
2583	KASSERT(sz_out != NULL, ("must be non-NULL"));
2584
2585	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2586		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2587		regoff = ntb->peer_reg->db_bell;
2588	} else {
2589		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2590		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2591		    ("mw_count invalid after setup"));
2592		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2593		    ("invalid b2b idx"));
2594
2595		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2596		regoff = XEON_SHADOW_PDOORBELL_OFFSET;
2597	}
2598	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2599
2600	*sz_out = ntb->reg->db_size;
2601	/* HACK: Specific to current x86 bus implementation. */
2602	return ((uint64_t)bar->pci_bus_handle + regoff);
2603}
2604
2605/*
2606 * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
2607 * @ntb:	NTB device context
2608 *
2609 * Hardware may support different number or arrangement of doorbell bits.
2610 *
2611 * Return: A mask of doorbell bits supported by the ntb.
2612 */
2613uint64_t
2614ntb_db_valid_mask(struct ntb_softc *ntb)
2615{
2616
2617	return (ntb->db_valid_mask);
2618}
2619
2620/*
2621 * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
2622 * @ntb:	NTB device context
2623 * @vector:	Doorbell vector number
2624 *
2625 * Each interrupt vector may have a different number or arrangement of bits.
2626 *
2627 * Return: A mask of doorbell bits serviced by a vector.
2628 */
2629uint64_t
2630ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
2631{
2632
2633	if (vector > ntb->db_vec_count)
2634		return (0);
2635	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
2636}
2637
2638/**
2639 * ntb_link_is_up() - get the current ntb link state
2640 * @ntb:        NTB device context
2641 * @speed:      OUT - The link speed expressed as PCIe generation number
2642 * @width:      OUT - The link width expressed as the number of PCIe lanes
2643 *
2644 * RETURNS: true or false based on the hardware link state
2645 */
2646bool
2647ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
2648    enum ntb_width *width)
2649{
2650
2651	if (speed != NULL)
2652		*speed = ntb_link_sta_speed(ntb);
2653	if (width != NULL)
2654		*width = ntb_link_sta_width(ntb);
2655	return (link_is_up(ntb));
2656}
2657
2658static void
2659save_bar_parameters(struct ntb_pci_bar_info *bar)
2660{
2661
2662	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2663	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2664	bar->pbase = rman_get_start(bar->pci_resource);
2665	bar->size = rman_get_size(bar->pci_resource);
2666	bar->vbase = rman_get_virtual(bar->pci_resource);
2667}
2668
2669device_t
2670ntb_get_device(struct ntb_softc *ntb)
2671{
2672
2673	return (ntb->device);
2674}
2675
2676/* Export HW-specific errata information. */
2677bool
2678ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
2679{
2680
2681	return (HAS_FEATURE(feature));
2682}
2683