ntb_hw_intel.c revision 289543
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * Copyright (C) 2015 EMC Corporation
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289543 2015-10-18 20:20:29Z cem $");
30
31#include <sys/param.h>
32#include <sys/kernel.h>
33#include <sys/systm.h>
34#include <sys/bus.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/queue.h>
38#include <sys/rman.h>
39#include <sys/sysctl.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>
42#include <machine/bus.h>
43#include <machine/pmap.h>
44#include <machine/resource.h>
45#include <dev/pci/pcireg.h>
46#include <dev/pci/pcivar.h>
47
48#include "ntb_regs.h"
49#include "ntb_hw.h"
50
51/*
52 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
53 * allows you to connect two systems using a PCI-e link.
54 *
55 * This module contains the hardware abstraction layer for the NTB. It allows
56 * you to send and recieve interrupts, map the memory windows and send and
57 * receive messages in the scratch-pad registers.
58 *
59 * NOTE: Much of the code in this module is shared with Linux. Any patches may
60 * be picked up and redistributed in Linux with a dual GPL/BSD license.
61 */
62
63#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, SOC_DB_COUNT)
64
65#define NTB_HB_TIMEOUT		1 /* second */
66#define SOC_LINK_RECOVERY_TIME	500 /* ms */
67
68#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
69
70enum ntb_device_type {
71	NTB_XEON,
72	NTB_SOC
73};
74
75enum ntb_bar {
76	NTB_CONFIG_BAR = 0,
77	NTB_B2B_BAR_1,
78	NTB_B2B_BAR_2,
79	NTB_B2B_BAR_3,
80	NTB_MAX_BARS
81};
82
83/* Device features and workarounds */
84#define HAS_FEATURE(feature)	\
85	((ntb->features & (feature)) != 0)
86
87struct ntb_hw_info {
88	uint32_t		device_id;
89	const char		*desc;
90	enum ntb_device_type	type;
91	uint32_t		features;
92};
93
94struct ntb_pci_bar_info {
95	bus_space_tag_t		pci_bus_tag;
96	bus_space_handle_t	pci_bus_handle;
97	int			pci_resource_id;
98	struct resource		*pci_resource;
99	vm_paddr_t		pbase;
100	void			*vbase;
101	u_long			size;
102
103	/* Configuration register offsets */
104	uint32_t		psz_off;
105	uint32_t		ssz_off;
106	uint32_t		sbarbase_off;
107	uint32_t		sbarlmt_off;
108	uint32_t		pbarxlat_off;
109};
110
111struct ntb_int_info {
112	struct resource	*res;
113	int		rid;
114	void		*tag;
115};
116
117struct ntb_db_cb {
118	ntb_db_callback		callback;
119	unsigned int		db_num;
120	void			*data;
121	struct ntb_softc	*ntb;
122	struct callout		irq_work;
123	bool			reserved;
124};
125
126struct ntb_reg {
127	uint32_t	ntb_ctl;
128	uint32_t	lnk_sta;
129	uint8_t		db_size;
130	unsigned	mw_bar[NTB_MAX_BARS];
131};
132
133struct ntb_alt_reg {
134	uint32_t	db_bell;
135	uint32_t	db_mask;
136	uint32_t	spad;
137};
138
139struct ntb_xlat_reg {
140	uint64_t	bar0_base;
141	uint64_t	bar2_xlat;
142	uint64_t	bar2_limit;
143};
144
145struct ntb_b2b_addr {
146	uint64_t	bar0_addr;
147	uint64_t	bar2_addr64;
148	uint64_t	bar4_addr64;
149	uint64_t	bar4_addr32;
150	uint64_t	bar5_addr32;
151};
152
153struct ntb_softc {
154	device_t		device;
155	enum ntb_device_type	type;
156	uint64_t		features;
157
158	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
159	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
160	uint32_t		allocated_interrupts;
161
162	struct callout		heartbeat_timer;
163	struct callout		lr_timer;
164
165	void			*ntb_transport;
166	ntb_event_callback	event_cb;
167	struct ntb_db_cb	*db_cb;
168	uint8_t			max_cbs;
169
170	struct {
171		uint32_t ldb;
172		uint32_t ldb_mask;
173		uint32_t bar4_xlat;
174		uint32_t bar5_xlat;
175		uint32_t spad_local;
176		uint32_t spci_cmd;
177	} reg_ofs;
178	uint32_t ppd;
179	uint8_t conn_type;
180	uint8_t dev_type;
181	uint8_t link_status;
182	uint8_t link_width;
183	uint8_t link_speed;
184
185	/* Offset of peer bar0 in B2B BAR */
186	uint64_t			b2b_off;
187	/* Memory window used to access peer bar0 */
188#define B2B_MW_DISABLED			UINT8_MAX
189	uint8_t				b2b_mw_idx;
190
191	uint8_t				mw_count;
192	uint8_t				spad_count;
193	uint8_t				db_count;
194	uint8_t				db_vec_count;
195	uint8_t				db_vec_shift;
196
197	/* Protects local DB mask and (h). */
198#define HW_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
199#define HW_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
200#define HW_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
201	struct mtx			db_mask_lock;
202
203	uint32_t			ntb_ctl;	/* (h) - SOC only */
204	uint32_t			lnk_sta;	/* (h) - SOC only */
205
206	uint64_t			db_valid_mask;
207	uint64_t			db_link_mask;
208	uint64_t			db_mask;	/* (h) */
209
210	int				last_ts;	/* ticks @ last irq */
211
212	const struct ntb_reg		*reg;
213	const struct ntb_alt_reg	*self_reg;
214	const struct ntb_alt_reg	*peer_reg;
215	const struct ntb_xlat_reg	*xlat_reg;
216};
217
218#ifdef __i386__
219static __inline uint64_t
220bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
221    bus_size_t offset)
222{
223
224	return (bus_space_read_4(tag, handle, offset) |
225	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
226}
227
228static __inline void
229bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
230    bus_size_t offset, uint64_t val)
231{
232
233	bus_space_write_4(tag, handle, offset, val);
234	bus_space_write_4(tag, handle, offset + 4, val >> 32);
235}
236#endif
237
238#define ntb_bar_read(SIZE, bar, offset) \
239	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
240	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
241#define ntb_bar_write(SIZE, bar, offset, val) \
242	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
243	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
244#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
245#define ntb_reg_write(SIZE, offset, val) \
246	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
247#define ntb_mw_read(SIZE, offset) \
248	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
249#define ntb_mw_write(SIZE, offset, val) \
250	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
251		offset, val)
252
253static int ntb_probe(device_t device);
254static int ntb_attach(device_t device);
255static int ntb_detach(device_t device);
256static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
257static int ntb_map_pci_bars(struct ntb_softc *ntb);
258static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *);
259static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
260static int map_memory_window_bar(struct ntb_softc *ntb,
261    struct ntb_pci_bar_info *bar);
262static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
263static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
264static int ntb_init_isr(struct ntb_softc *ntb);
265static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
266static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
267static void ntb_teardown_interrupts(struct ntb_softc *ntb);
268static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
269static void handle_irq(void *arg);
270static void ntb_handle_legacy_interrupt(void *arg);
271static void ntb_irq_work(void *arg);
272static inline uint64_t ntb_db_read(struct ntb_softc *, uint64_t regoff);
273static inline void ntb_db_write(struct ntb_softc *, uint64_t regoff, uint64_t val);
274static inline void mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
275static inline void unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
276static inline void ntb_db_set_mask(struct ntb_softc *, uint64_t bits);
277static inline void ntb_db_clear_mask(struct ntb_softc *, uint64_t bits);
278static int ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors);
279static void ntb_free_callbacks(struct ntb_softc *ntb);
280static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
281static void ntb_detect_max_mw(struct ntb_softc *ntb);
282static int ntb_detect_xeon(struct ntb_softc *ntb);
283static int ntb_detect_soc(struct ntb_softc *ntb);
284static int ntb_xeon_init_dev(struct ntb_softc *ntb);
285static int ntb_soc_init_dev(struct ntb_softc *ntb);
286static void ntb_teardown_xeon(struct ntb_softc *ntb);
287static void configure_soc_secondary_side_bars(struct ntb_softc *ntb);
288static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
289    enum ntb_bar regbar);
290static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
291    uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
292static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
293    enum ntb_bar idx);
294static int xeon_setup_b2b_mw(struct ntb_softc *,
295    const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
296static void soc_link_hb(void *arg);
297static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state);
298static void ntb_link_disable(struct ntb_softc *ntb);
299static void ntb_link_enable(struct ntb_softc *ntb);
300static void recover_soc_link(void *arg);
301static int ntb_poll_link(struct ntb_softc *ntb);
302static void save_bar_parameters(struct ntb_pci_bar_info *bar);
303
304static struct ntb_hw_info pci_ids[] = {
305	{ 0x0C4E8086, "Atom Processor S1200 NTB Primary B2B", NTB_SOC, 0 },
306
307	/* XXX: PS/SS IDs left out until they are supported. */
308	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
309		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
310	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
311		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
312	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
313		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
314		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
315	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
316		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
317		    NTB_SB01BASE_LOCKUP },
318	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
319		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
320		    NTB_SB01BASE_LOCKUP },
321
322	{ 0x00000000, NULL, NTB_SOC, 0 }
323};
324
325static const struct ntb_reg soc_reg = {
326	.ntb_ctl = SOC_NTBCNTL_OFFSET,
327	.lnk_sta = SOC_LINK_STATUS_OFFSET,
328	.db_size = sizeof(uint64_t),
329	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
330};
331
332static const struct ntb_alt_reg soc_b2b_reg = {
333	.db_bell = SOC_B2B_DOORBELL_OFFSET,
334	.spad = SOC_B2B_SPAD_OFFSET,
335};
336
337static const struct ntb_xlat_reg soc_sec_xlat = {
338#if 0
339	/* "FIXME" says the Linux driver. */
340	.bar0_base = SOC_SBAR0BASE_OFFSET,
341	.bar2_limit = SOC_SBAR2LMT_OFFSET,
342#endif
343	.bar2_xlat = SOC_SBAR2XLAT_OFFSET,
344};
345
346static const struct ntb_reg xeon_reg = {
347	.ntb_ctl = XEON_NTBCNTL_OFFSET,
348	.lnk_sta = XEON_LINK_STATUS_OFFSET,
349	.db_size = sizeof(uint16_t),
350	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
351};
352
353static const struct ntb_alt_reg xeon_b2b_reg = {
354	.db_bell = XEON_B2B_DOORBELL_OFFSET,
355	.spad = XEON_B2B_SPAD_OFFSET,
356};
357
358static const struct ntb_xlat_reg xeon_sec_xlat = {
359	.bar0_base = XEON_SBAR0BASE_OFFSET,
360	.bar2_limit = XEON_SBAR2LMT_OFFSET,
361	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
362};
363
364static const struct ntb_b2b_addr xeon_b2b_usd_addr = {
365	.bar0_addr = XEON_B2B_BAR0_USD_ADDR,
366	.bar2_addr64 = XEON_B2B_BAR2_USD_ADDR64,
367	.bar4_addr64 = XEON_B2B_BAR4_USD_ADDR64,
368	.bar4_addr32 = XEON_B2B_BAR4_USD_ADDR32,
369	.bar5_addr32 = XEON_B2B_BAR5_USD_ADDR32,
370};
371
372static const struct ntb_b2b_addr xeon_b2b_dsd_addr = {
373	.bar0_addr = XEON_B2B_BAR0_DSD_ADDR,
374	.bar2_addr64 = XEON_B2B_BAR2_DSD_ADDR64,
375	.bar4_addr64 = XEON_B2B_BAR4_DSD_ADDR64,
376	.bar4_addr32 = XEON_B2B_BAR4_DSD_ADDR32,
377	.bar5_addr32 = XEON_B2B_BAR5_DSD_ADDR32,
378};
379
380/*
381 * OS <-> Driver interface structures
382 */
383MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
384
385static device_method_t ntb_pci_methods[] = {
386	/* Device interface */
387	DEVMETHOD(device_probe,     ntb_probe),
388	DEVMETHOD(device_attach,    ntb_attach),
389	DEVMETHOD(device_detach,    ntb_detach),
390	DEVMETHOD_END
391};
392
393static driver_t ntb_pci_driver = {
394	"ntb_hw",
395	ntb_pci_methods,
396	sizeof(struct ntb_softc),
397};
398
399static devclass_t ntb_devclass;
400DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
401MODULE_VERSION(ntb_hw, 1);
402
403SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
404
405/*
406 * OS <-> Driver linkage functions
407 */
408static int
409ntb_probe(device_t device)
410{
411	struct ntb_hw_info *p;
412
413	p = ntb_get_device_info(pci_get_devid(device));
414	if (p == NULL)
415		return (ENXIO);
416
417	device_set_desc(device, p->desc);
418	return (0);
419}
420
421static int
422ntb_attach(device_t device)
423{
424	struct ntb_softc *ntb;
425	struct ntb_hw_info *p;
426	int error;
427
428	ntb = DEVICE2SOFTC(device);
429	p = ntb_get_device_info(pci_get_devid(device));
430
431	ntb->device = device;
432	ntb->type = p->type;
433	ntb->features = p->features;
434	ntb->b2b_mw_idx = B2B_MW_DISABLED;
435
436	/* Heartbeat timer for NTB_SOC since there is no link interrupt */
437	callout_init(&ntb->heartbeat_timer, 1);
438	callout_init(&ntb->lr_timer, 1);
439	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
440
441	if (ntb->type == NTB_SOC)
442		error = ntb_detect_soc(ntb);
443	else
444		error = ntb_detect_xeon(ntb);
445	if (error)
446		goto out;
447
448	ntb_detect_max_mw(ntb);
449
450	error = ntb_map_pci_bars(ntb);
451	if (error)
452		goto out;
453	if (ntb->type == NTB_SOC)
454		error = ntb_soc_init_dev(ntb);
455	else
456		error = ntb_xeon_init_dev(ntb);
457	if (error)
458		goto out;
459	error = ntb_init_isr(ntb);
460	if (error)
461		goto out;
462
463	pci_enable_busmaster(ntb->device);
464
465out:
466	if (error != 0)
467		ntb_detach(device);
468	return (error);
469}
470
471static int
472ntb_detach(device_t device)
473{
474	struct ntb_softc *ntb;
475
476	ntb = DEVICE2SOFTC(device);
477
478	ntb_db_set_mask(ntb, ntb->db_valid_mask);
479	callout_drain(&ntb->heartbeat_timer);
480	callout_drain(&ntb->lr_timer);
481	if (ntb->type == NTB_XEON)
482		ntb_teardown_xeon(ntb);
483	ntb_teardown_interrupts(ntb);
484
485	mtx_destroy(&ntb->db_mask_lock);
486
487	/*
488	 * Redetect total MWs so we unmap properly -- in case we lowered the
489	 * maximum to work around Xeon errata.
490	 */
491	ntb_detect_max_mw(ntb);
492	ntb_unmap_pci_bar(ntb);
493
494	return (0);
495}
496
497/*
498 * Driver internal routines
499 */
500static inline enum ntb_bar
501ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
502{
503
504	KASSERT(mw < ntb->mw_count ||
505	    (mw != B2B_MW_DISABLED && mw == ntb->b2b_mw_idx),
506	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
507
508	return (ntb->reg->mw_bar[mw]);
509}
510
511static int
512ntb_map_pci_bars(struct ntb_softc *ntb)
513{
514	int rc;
515
516	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
517	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
518	if (rc != 0)
519		goto out;
520
521	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
522	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
523	if (rc != 0)
524		goto out;
525	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
526	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
527	ntb->bar_info[NTB_B2B_BAR_1].sbarbase_off = XEON_SBAR2BASE_OFFSET;
528	ntb->bar_info[NTB_B2B_BAR_1].sbarlmt_off = XEON_SBAR2LMT_OFFSET;
529	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
530
531	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
532	/* XXX Are shared MW B2Bs write-combining? */
533	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP) && !HAS_FEATURE(NTB_SPLIT_BAR))
534		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
535	else
536		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
537	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
538	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
539	ntb->bar_info[NTB_B2B_BAR_2].sbarbase_off = XEON_SBAR4BASE_OFFSET;
540	ntb->bar_info[NTB_B2B_BAR_2].sbarlmt_off = XEON_SBAR4LMT_OFFSET;
541	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
542
543	if (!HAS_FEATURE(NTB_SPLIT_BAR))
544		goto out;
545
546	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
547	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
548		rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
549	else
550		rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
551	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
552	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
553	ntb->bar_info[NTB_B2B_BAR_3].sbarbase_off = XEON_SBAR5BASE_OFFSET;
554	ntb->bar_info[NTB_B2B_BAR_3].sbarlmt_off = XEON_SBAR5LMT_OFFSET;
555	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
556
557out:
558	if (rc != 0)
559		device_printf(ntb->device,
560		    "unable to allocate pci resource\n");
561	return (rc);
562}
563
564static void
565print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
566{
567
568	device_printf(ntb->device, "Bar size = %lx, v %p, p %p\n",
569	    bar->size, bar->vbase, (void *)(bar->pbase));
570}
571
572static int
573map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
574{
575
576	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
577	    &bar->pci_resource_id, RF_ACTIVE);
578	if (bar->pci_resource == NULL)
579		return (ENXIO);
580
581	save_bar_parameters(bar);
582	print_map_success(ntb, bar);
583	return (0);
584}
585
586static int
587map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
588{
589	int rc;
590	uint8_t bar_size_bits = 0;
591
592	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
593	    &bar->pci_resource_id, RF_ACTIVE);
594
595	if (bar->pci_resource == NULL)
596		return (ENXIO);
597
598	save_bar_parameters(bar);
599	/*
600	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
601	 * hardware issue. To work around this, query the size it should be
602	 * configured to by the device and modify the resource to correspond to
603	 * this new size. The BIOS on systems with this problem is required to
604	 * provide enough address space to allow the driver to make this change
605	 * safely.
606	 *
607	 * Ideally I could have just specified the size when I allocated the
608	 * resource like:
609	 *  bus_alloc_resource(ntb->device,
610	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
611	 *	1ul << bar_size_bits, RF_ACTIVE);
612	 * but the PCI driver does not honor the size in this call, so we have
613	 * to modify it after the fact.
614	 */
615	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
616		if (bar->pci_resource_id == PCIR_BAR(2))
617			bar_size_bits = pci_read_config(ntb->device,
618			    XEON_PBAR23SZ_OFFSET, 1);
619		else
620			bar_size_bits = pci_read_config(ntb->device,
621			    XEON_PBAR45SZ_OFFSET, 1);
622
623		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
624		    bar->pci_resource, bar->pbase,
625		    bar->pbase + (1ul << bar_size_bits) - 1);
626		if (rc != 0) {
627			device_printf(ntb->device,
628			    "unable to resize bar\n");
629			return (rc);
630		}
631
632		save_bar_parameters(bar);
633	}
634
635	/* Mark bar region as write combining to improve performance. */
636	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
637	    VM_MEMATTR_WRITE_COMBINING);
638	if (rc != 0) {
639		device_printf(ntb->device,
640		    "unable to mark bar as WRITE_COMBINING\n");
641		return (rc);
642	}
643	print_map_success(ntb, bar);
644	return (0);
645}
646
647static void
648ntb_unmap_pci_bar(struct ntb_softc *ntb)
649{
650	struct ntb_pci_bar_info *current_bar;
651	int i;
652
653	for (i = 0; i < NTB_MAX_BARS; i++) {
654		current_bar = &ntb->bar_info[i];
655		if (current_bar->pci_resource != NULL)
656			bus_release_resource(ntb->device, SYS_RES_MEMORY,
657			    current_bar->pci_resource_id,
658			    current_bar->pci_resource);
659	}
660}
661
662static int
663ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
664{
665	uint32_t i;
666	int rc;
667
668	for (i = 0; i < num_vectors; i++) {
669		ntb->int_info[i].rid = i + 1;
670		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
671		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
672		if (ntb->int_info[i].res == NULL) {
673			device_printf(ntb->device,
674			    "bus_alloc_resource failed\n");
675			return (ENOMEM);
676		}
677		ntb->int_info[i].tag = NULL;
678		ntb->allocated_interrupts++;
679		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
680		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, handle_irq,
681		    &ntb->db_cb[i], &ntb->int_info[i].tag);
682		if (rc != 0) {
683			device_printf(ntb->device, "bus_setup_intr failed\n");
684			return (ENXIO);
685		}
686	}
687	return (0);
688}
689
690/*
691 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
692 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
693 * should be okay.  This tunable should enable us to test that hypothesis
694 * when someone gets their hands on some Xeon hardware.
695 */
696static int ntb_force_remap_mode;
697SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
698    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
699    " to a smaller number of ithreads, even if the desired number are "
700    "available");
701
702/*
703 * In case it is NOT ok, give consumers an abort button.
704 */
705static int ntb_prefer_intx;
706SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
707    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
708    "than remapping MSI-X messages over available slots (match Linux driver "
709    "behavior)");
710
711/*
712 * Remap the desired number of MSI-X messages to available ithreads in a simple
713 * round-robin fashion.
714 */
715static int
716ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
717{
718	u_int *vectors;
719	uint32_t i;
720	int rc;
721
722	if (ntb_prefer_intx != 0)
723		return (ENXIO);
724
725	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
726
727	for (i = 0; i < desired; i++)
728		vectors[i] = (i % avail) + 1;
729
730	rc = pci_remap_msix(dev, desired, vectors);
731	free(vectors, M_NTB);
732	return (rc);
733}
734
735static int
736ntb_init_isr(struct ntb_softc *ntb)
737{
738	uint32_t desired_vectors, num_vectors;
739	uint64_t mask;
740	int rc;
741
742	ntb->allocated_interrupts = 0;
743	ntb->last_ts = ticks;
744
745	/*
746	 * On SOC, disable all interrupts.  On XEON, disable all but Link
747	 * Interrupt.  The rest will be unmasked as callbacks are registered.
748	 */
749	mask = ntb->db_valid_mask;
750	if (ntb->type == NTB_XEON)
751		mask &= ~ntb->db_link_mask;
752	ntb_db_set_mask(ntb, mask);
753
754	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
755	    ntb->db_count);
756	if (desired_vectors >= 1) {
757		rc = pci_alloc_msix(ntb->device, &num_vectors);
758
759		if (ntb_force_remap_mode != 0 && rc == 0 &&
760		    num_vectors == desired_vectors)
761			num_vectors--;
762
763		if (rc == 0 && num_vectors < desired_vectors) {
764			rc = ntb_remap_msix(ntb->device, desired_vectors,
765			    num_vectors);
766			if (rc == 0)
767				num_vectors = desired_vectors;
768			else
769				pci_release_msi(ntb->device);
770		}
771		if (rc != 0)
772			num_vectors = 1;
773	} else
774		num_vectors = 1;
775
776	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
777		/*
778		 * If allocating MSI-X interrupts failed and we're forced to
779		 * use legacy INTx anyway, the only limit on individual
780		 * callbacks is the number of doorbell bits.
781		 */
782		ntb->db_vec_count = 1;
783		ntb->db_vec_shift = ntb->db_count;
784		ntb_create_callbacks(ntb, ntb->db_count);
785		rc = ntb_setup_legacy_interrupt(ntb);
786	} else {
787		ntb_create_callbacks(ntb, num_vectors);
788		rc = ntb_setup_msix(ntb, num_vectors);
789		if (rc == 0 && ntb->type == NTB_XEON) {
790			/*
791			 * Prevent consumers from registering callbacks on the link event irq
792			 * slot, from which they will never be called back.
793			 */
794			ntb->db_cb[num_vectors - 1].reserved = true;
795			ntb->max_cbs--;
796		}
797	}
798	if (rc != 0) {
799		device_printf(ntb->device,
800		    "Error allocating interrupts: %d\n", rc);
801		ntb_free_callbacks(ntb);
802	}
803
804	return (rc);
805}
806
807static int
808ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
809{
810	int rc;
811
812	ntb->int_info[0].rid = 0;
813	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
814	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
815	if (ntb->int_info[0].res == NULL) {
816		device_printf(ntb->device, "bus_alloc_resource failed\n");
817		return (ENOMEM);
818	}
819
820	ntb->int_info[0].tag = NULL;
821	ntb->allocated_interrupts = 1;
822
823	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
824	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ntb_handle_legacy_interrupt,
825	    ntb, &ntb->int_info[0].tag);
826	if (rc != 0) {
827		device_printf(ntb->device, "bus_setup_intr failed\n");
828		return (ENXIO);
829	}
830
831	return (0);
832}
833
834static void
835ntb_teardown_interrupts(struct ntb_softc *ntb)
836{
837	struct ntb_int_info *current_int;
838	int i;
839
840	for (i = 0; i < ntb->allocated_interrupts; i++) {
841		current_int = &ntb->int_info[i];
842		if (current_int->tag != NULL)
843			bus_teardown_intr(ntb->device, current_int->res,
844			    current_int->tag);
845
846		if (current_int->res != NULL)
847			bus_release_resource(ntb->device, SYS_RES_IRQ,
848			    rman_get_rid(current_int->res), current_int->res);
849	}
850
851	ntb_free_callbacks(ntb);
852	pci_release_msi(ntb->device);
853}
854
855/*
856 * Doorbell register and mask are 64-bit on SoC, 16-bit on Xeon.  Abstract it
857 * out to make code clearer.
858 */
859static inline uint64_t
860ntb_db_read(struct ntb_softc *ntb, uint64_t regoff)
861{
862
863	if (ntb->type == NTB_SOC)
864		return (ntb_reg_read(8, regoff));
865
866	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
867
868	return (ntb_reg_read(2, regoff));
869}
870
871static inline void
872ntb_db_write(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
873{
874
875	KASSERT((val & ~ntb->db_valid_mask) == 0,
876	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
877	     (uintmax_t)(val & ~ntb->db_valid_mask),
878	     (uintmax_t)ntb->db_valid_mask));
879
880	if (regoff == ntb->reg_ofs.ldb_mask)
881		HW_ASSERT(ntb, MA_OWNED);
882
883	if (ntb->type == NTB_SOC) {
884		ntb_reg_write(8, regoff, val);
885		return;
886	}
887
888	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
889	ntb_reg_write(2, regoff, (uint16_t)val);
890}
891
892static inline void
893ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
894{
895
896	HW_LOCK(ntb);
897	ntb->db_mask |= bits;
898	ntb_db_write(ntb, ntb->reg_ofs.ldb_mask, ntb->db_mask);
899	HW_UNLOCK(ntb);
900}
901
902static inline void
903ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
904{
905
906	KASSERT((bits & ~ntb->db_valid_mask) == 0,
907	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
908	     (uintmax_t)(bits & ~ntb->db_valid_mask),
909	     (uintmax_t)ntb->db_valid_mask));
910
911	HW_LOCK(ntb);
912	ntb->db_mask &= ~bits;
913	ntb_db_write(ntb, ntb->reg_ofs.ldb_mask, ntb->db_mask);
914	HW_UNLOCK(ntb);
915}
916
917static inline void
918mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
919{
920	uint64_t mask;
921
922	mask = 1ull << (idx * ntb->db_vec_shift);
923	ntb_db_set_mask(ntb, mask);
924}
925
926static inline void
927unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
928{
929	uint64_t mask;
930
931	mask = 1ull << (idx * ntb->db_vec_shift);
932	ntb_db_clear_mask(ntb, mask);
933}
934
935static inline uint64_t
936ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
937{
938	uint64_t shift, mask;
939
940	shift = ntb->db_vec_shift;
941	mask = (1ull << shift) - 1;
942	return (mask << (shift * db_vector));
943}
944
945static void
946handle_irq(void *arg)
947{
948	struct ntb_db_cb *db_cb = arg;
949	struct ntb_softc *ntb = db_cb->ntb;
950	uint64_t vec_mask;
951	int rc;
952
953	ntb->last_ts = ticks;
954	vec_mask = ntb_vec_mask(ntb, db_cb->db_num);
955
956	if ((vec_mask & ntb->db_link_mask) != 0) {
957		rc = ntb_poll_link(ntb);
958		if (rc != 0)
959			device_printf(ntb->device,
960			    "Error determining link status\n");
961	}
962
963	if (db_cb->callback != NULL) {
964		KASSERT(!db_cb->reserved, ("user callback on link event cb"));
965		mask_ldb_interrupt(ntb, db_cb->db_num);
966	}
967
968	ntb_db_write(ntb, ntb->reg_ofs.ldb, vec_mask);
969
970	if (db_cb->callback != NULL)
971		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
972}
973
974static void
975ntb_handle_legacy_interrupt(void *arg)
976{
977	struct ntb_softc *ntb = arg;
978	unsigned int i;
979	uint64_t ldb;
980
981	ldb = ntb_db_read(ntb, ntb->reg_ofs.ldb);
982	while (ldb != 0) {
983		i = ffs(ldb);
984		ldb &= ldb - 1;
985		handle_irq(&ntb->db_cb[i]);
986	}
987}
988
989static int
990ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors)
991{
992	uint32_t i;
993
994	ntb->max_cbs = num_vectors;
995	ntb->db_cb = malloc(num_vectors * sizeof(*ntb->db_cb), M_NTB,
996	    M_ZERO | M_WAITOK);
997	for (i = 0; i < num_vectors; i++) {
998		ntb->db_cb[i].db_num = i;
999		ntb->db_cb[i].ntb = ntb;
1000	}
1001
1002	return (0);
1003}
1004
1005static void
1006ntb_free_callbacks(struct ntb_softc *ntb)
1007{
1008	uint8_t i;
1009
1010	if (ntb->db_cb == NULL)
1011		return;
1012
1013	for (i = 0; i < ntb->max_cbs; i++)
1014		ntb_unregister_db_callback(ntb, i);
1015
1016	free(ntb->db_cb, M_NTB);
1017	ntb->db_cb = NULL;
1018	ntb->max_cbs = 0;
1019}
1020
1021static struct ntb_hw_info *
1022ntb_get_device_info(uint32_t device_id)
1023{
1024	struct ntb_hw_info *ep = pci_ids;
1025
1026	while (ep->device_id) {
1027		if (ep->device_id == device_id)
1028			return (ep);
1029		++ep;
1030	}
1031	return (NULL);
1032}
1033
1034static void
1035ntb_teardown_xeon(struct ntb_softc *ntb)
1036{
1037
1038	ntb_link_disable(ntb);
1039}
1040
1041static void
1042ntb_detect_max_mw(struct ntb_softc *ntb)
1043{
1044
1045	if (ntb->type == NTB_SOC) {
1046		ntb->mw_count = SOC_MW_COUNT;
1047		return;
1048	}
1049
1050	if (HAS_FEATURE(NTB_SPLIT_BAR))
1051		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
1052	else
1053		ntb->mw_count = XEON_SNB_MW_COUNT;
1054}
1055
1056static int
1057ntb_detect_xeon(struct ntb_softc *ntb)
1058{
1059	uint8_t ppd, conn_type;
1060
1061	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
1062	ntb->ppd = ppd;
1063
1064	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
1065		ntb->dev_type = NTB_DEV_USD;
1066	else
1067		ntb->dev_type = NTB_DEV_DSD;
1068
1069	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
1070		ntb->features |= NTB_SPLIT_BAR;
1071
1072	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
1073	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
1074		ntb->features |= NTB_SDOORBELL_LOCKUP;
1075
1076	conn_type = ppd & XEON_PPD_CONN_TYPE;
1077	switch (conn_type) {
1078	case NTB_CONN_B2B:
1079		ntb->conn_type = conn_type;
1080		break;
1081	case NTB_CONN_RP:
1082	case NTB_CONN_TRANSPARENT:
1083	default:
1084		device_printf(ntb->device, "Unsupported connection type: %u\n",
1085		    (unsigned)conn_type);
1086		return (ENXIO);
1087	}
1088	return (0);
1089}
1090
1091static int
1092ntb_detect_soc(struct ntb_softc *ntb)
1093{
1094	uint32_t ppd, conn_type;
1095
1096	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
1097	ntb->ppd = ppd;
1098
1099	if ((ppd & SOC_PPD_DEV_TYPE) != 0)
1100		ntb->dev_type = NTB_DEV_DSD;
1101	else
1102		ntb->dev_type = NTB_DEV_USD;
1103
1104	conn_type = (ppd & SOC_PPD_CONN_TYPE) >> 8;
1105	switch (conn_type) {
1106	case NTB_CONN_B2B:
1107		ntb->conn_type = conn_type;
1108		break;
1109	default:
1110		device_printf(ntb->device, "Unsupported NTB configuration\n");
1111		return (ENXIO);
1112	}
1113	return (0);
1114}
1115
1116static int
1117ntb_xeon_init_dev(struct ntb_softc *ntb)
1118{
1119	int rc;
1120
1121	ntb->reg_ofs.ldb	= XEON_PDOORBELL_OFFSET;
1122	ntb->reg_ofs.ldb_mask	= XEON_PDBMSK_OFFSET;
1123	ntb->reg_ofs.spad_local	= XEON_SPAD_OFFSET;
1124	ntb->reg_ofs.bar4_xlat	= XEON_SBAR4XLAT_OFFSET;
1125	if (HAS_FEATURE(NTB_SPLIT_BAR))
1126		ntb->reg_ofs.bar5_xlat = XEON_SBAR5XLAT_OFFSET;
1127	ntb->reg_ofs.spci_cmd	= XEON_PCICMD_OFFSET;
1128
1129	ntb->spad_count		= XEON_SPAD_COUNT;
1130	ntb->db_count		= XEON_DB_COUNT;
1131	ntb->db_link_mask	= XEON_DB_LINK_BIT;
1132	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
1133	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
1134
1135	if (ntb->conn_type != NTB_CONN_B2B) {
1136		device_printf(ntb->device, "Connection type %d not supported\n",
1137		    ntb->conn_type);
1138		return (ENXIO);
1139	}
1140
1141	ntb->reg = &xeon_reg;
1142	ntb->peer_reg = &xeon_b2b_reg;
1143	ntb->xlat_reg = &xeon_sec_xlat;
1144
1145	/*
1146	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1147	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1148	 * which may hang the system.  To workaround this use the second memory
1149	 * window to access the interrupt and scratch pad registers on the
1150	 * remote system.
1151	 */
1152	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1153		/* Use the last MW for mapping remote spad */
1154		ntb->b2b_mw_idx = ntb->mw_count - 1;
1155	else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
1156		/*
1157		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1158		 * mirrored to the remote system.  Shrink the number of bits by one,
1159		 * since bit 14 is the last bit.
1160		 *
1161		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1162		 * anyway.  Nor for non-B2B connection types.
1163		 */
1164		ntb->db_count = XEON_DB_COUNT - 1;
1165
1166	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
1167
1168	if (ntb->dev_type == NTB_DEV_USD)
1169		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
1170		    &xeon_b2b_usd_addr);
1171	else
1172		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
1173		    &xeon_b2b_dsd_addr);
1174	if (rc != 0)
1175		return (rc);
1176
1177	/* Enable Bus Master and Memory Space on the secondary side */
1178	ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1179	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1180
1181	/* Enable link training */
1182	ntb_link_enable(ntb);
1183
1184	return (0);
1185}
1186
1187static int
1188ntb_soc_init_dev(struct ntb_softc *ntb)
1189{
1190
1191	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1192	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1193
1194	ntb->reg_ofs.ldb	 = SOC_PDOORBELL_OFFSET;
1195	ntb->reg_ofs.ldb_mask	 = SOC_PDBMSK_OFFSET;
1196	ntb->reg_ofs.bar4_xlat	 = SOC_SBAR4XLAT_OFFSET;
1197	ntb->reg_ofs.spad_local	 = SOC_SPAD_OFFSET;
1198	ntb->reg_ofs.spci_cmd	 = SOC_PCICMD_OFFSET;
1199
1200	ntb->spad_count		 = SOC_SPAD_COUNT;
1201	ntb->db_count		 = SOC_DB_COUNT;
1202	ntb->db_vec_count	 = SOC_DB_MSIX_VECTOR_COUNT;
1203	ntb->db_vec_shift	 = SOC_DB_MSIX_VECTOR_SHIFT;
1204	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
1205
1206	ntb->reg = &soc_reg;
1207	ntb->peer_reg = &soc_b2b_reg;
1208	ntb->xlat_reg = &soc_sec_xlat;
1209
1210	/*
1211	 * FIXME - MSI-X bug on early SOC HW, remove once internal issue is
1212	 * resolved.  Mask transaction layer internal parity errors.
1213	 */
1214	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1215
1216	configure_soc_secondary_side_bars(ntb);
1217
1218	/* Enable Bus Master and Memory Space on the secondary side */
1219	ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1220	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1221
1222	/* Initiate PCI-E link training */
1223	ntb_link_enable(ntb);
1224
1225	callout_reset(&ntb->heartbeat_timer, 0, soc_link_hb, ntb);
1226
1227	return (0);
1228}
1229
1230/* XXX: Linux driver doesn't seem to do any of this for SoC. */
1231static void
1232configure_soc_secondary_side_bars(struct ntb_softc *ntb)
1233{
1234
1235	if (ntb->dev_type == NTB_DEV_USD) {
1236		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET,
1237		    XEON_B2B_BAR2_DSD_ADDR64);
1238		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET,
1239		    XEON_B2B_BAR4_DSD_ADDR64);
1240		ntb_reg_write(8, SOC_MBAR23_OFFSET, XEON_B2B_BAR2_USD_ADDR64);
1241		ntb_reg_write(8, SOC_MBAR45_OFFSET, XEON_B2B_BAR4_USD_ADDR64);
1242	} else {
1243		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET,
1244		    XEON_B2B_BAR2_USD_ADDR64);
1245		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET,
1246		    XEON_B2B_BAR4_USD_ADDR64);
1247		ntb_reg_write(8, SOC_MBAR23_OFFSET, XEON_B2B_BAR2_DSD_ADDR64);
1248		ntb_reg_write(8, SOC_MBAR45_OFFSET, XEON_B2B_BAR4_DSD_ADDR64);
1249	}
1250}
1251
1252
1253/*
1254 * When working around Xeon SDOORBELL errata by remapping remote registers in a
1255 * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
1256 * remains for use by a higher layer.
1257 *
1258 * Will only be used if working around SDOORBELL errata and the BIOS-configured
1259 * MW size is sufficiently large.
1260 */
1261static unsigned int ntb_b2b_mw_share;
1262SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
1263    0, "If enabled (non-zero), prefer to share half of the B2B peer register "
1264    "MW with higher level consumers.  Both sides of the NTB MUST set the same "
1265    "value here.");
1266
1267static void
1268xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
1269    enum ntb_bar regbar)
1270{
1271	struct ntb_pci_bar_info *bar;
1272	uint8_t bar_sz;
1273
1274	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
1275		return;
1276
1277	bar = &ntb->bar_info[idx];
1278	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
1279	if (idx == regbar) {
1280		if (ntb->b2b_off != 0)
1281			bar_sz--;
1282		else
1283			bar_sz = 0;
1284	}
1285	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
1286	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
1287	(void)bar_sz;
1288}
1289
1290static void
1291xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t base_addr,
1292    enum ntb_bar idx, enum ntb_bar regbar)
1293{
1294	struct ntb_pci_bar_info *bar;
1295	vm_paddr_t bar_addr;
1296
1297	bar = &ntb->bar_info[idx];
1298	bar_addr = base_addr + ((idx == regbar) ? ntb->b2b_off : 0);
1299
1300	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1301		ntb_reg_write(4, bar->sbarbase_off, bar_addr);
1302		ntb_reg_write(4, bar->sbarlmt_off, bar_addr);
1303		bar_addr = ntb_reg_read(4, bar->sbarbase_off);
1304		(void)bar_addr;
1305		bar_addr = ntb_reg_read(4, bar->sbarlmt_off);
1306	} else {
1307		ntb_reg_write(8, bar->sbarbase_off, bar_addr);
1308		ntb_reg_write(8, bar->sbarlmt_off, bar_addr);
1309		bar_addr = ntb_reg_read(8, bar->sbarbase_off);
1310		(void)bar_addr;
1311		bar_addr = ntb_reg_read(8, bar->sbarlmt_off);
1312	}
1313	(void)bar_addr;
1314}
1315
1316static void
1317xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
1318{
1319	struct ntb_pci_bar_info *bar;
1320
1321	bar = &ntb->bar_info[idx];
1322	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
1323		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
1324		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
1325	} else {
1326		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
1327		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
1328	}
1329	(void)base_addr;
1330}
1331
1332static int
1333xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
1334    const struct ntb_b2b_addr *peer_addr)
1335{
1336	struct ntb_pci_bar_info *b2b_bar;
1337	vm_size_t bar_size;
1338	uint64_t bar_addr;
1339	enum ntb_bar b2b_bar_num, i;
1340
1341	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
1342		b2b_bar = NULL;
1343		b2b_bar_num = NTB_CONFIG_BAR;
1344		ntb->b2b_off = 0;
1345	} else {
1346		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
1347		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
1348		    ("invalid b2b mw bar"));
1349
1350		b2b_bar = &ntb->bar_info[b2b_bar_num];
1351		bar_size = b2b_bar->size;
1352
1353		if (ntb_b2b_mw_share != 0 &&
1354		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
1355			ntb->b2b_off = bar_size >> 1;
1356		else if (bar_size >= XEON_B2B_MIN_SIZE) {
1357			ntb->b2b_off = 0;
1358			ntb->mw_count--;
1359		} else {
1360			device_printf(ntb->device,
1361			    "B2B bar size is too small!\n");
1362			return (EIO);
1363		}
1364	}
1365
1366	/*
1367	 * Reset the secondary bar sizes to match the primary bar sizes.
1368	 * (Except, disable or halve the size of the B2B secondary bar.)
1369	 */
1370	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
1371		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
1372
1373	bar_addr = 0;
1374	if (b2b_bar_num == NTB_CONFIG_BAR)
1375		bar_addr = addr->bar0_addr;
1376	else if (b2b_bar_num == NTB_B2B_BAR_1)
1377		bar_addr = addr->bar2_addr64;
1378	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1379		bar_addr = addr->bar4_addr64;
1380	else if (b2b_bar_num == NTB_B2B_BAR_2)
1381		bar_addr = addr->bar4_addr32;
1382	else if (b2b_bar_num == NTB_B2B_BAR_3)
1383		bar_addr = addr->bar5_addr32;
1384	else
1385		KASSERT(false, ("invalid bar"));
1386
1387	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
1388
1389	/*
1390	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
1391	 * register BAR.  The B2B BAR is either disabled above or configured
1392	 * half-size.  It starts at PBAR xlat + offset.
1393	 *
1394	 * Also set up incoming BAR limits == base (zero length window).
1395	 */
1396	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
1397	    b2b_bar_num);
1398	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1399		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
1400		    NTB_B2B_BAR_2, b2b_bar_num);
1401		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
1402		    NTB_B2B_BAR_3, b2b_bar_num);
1403	} else
1404		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
1405		    NTB_B2B_BAR_2, b2b_bar_num);
1406
1407	/* Zero incoming translation addrs */
1408	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
1409	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
1410
1411	/* Zero outgoing translation limits (whole bar size windows) */
1412	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
1413	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1414
1415	/* Set outgoing translation offsets */
1416	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
1417	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
1418		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
1419		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
1420	} else
1421		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
1422
1423	/* Set the translation offset for B2B registers */
1424	bar_addr = 0;
1425	if (b2b_bar_num == NTB_CONFIG_BAR)
1426		bar_addr = peer_addr->bar0_addr;
1427	else if (b2b_bar_num == NTB_B2B_BAR_1)
1428		bar_addr = peer_addr->bar2_addr64;
1429	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
1430		bar_addr = peer_addr->bar4_addr64;
1431	else if (b2b_bar_num == NTB_B2B_BAR_2)
1432		bar_addr = peer_addr->bar4_addr32;
1433	else if (b2b_bar_num == NTB_B2B_BAR_3)
1434		bar_addr = peer_addr->bar5_addr32;
1435	else
1436		KASSERT(false, ("invalid bar"));
1437
1438	/*
1439	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
1440	 * at a time.
1441	 */
1442	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
1443	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
1444	return (0);
1445}
1446
1447/* SOC does not have link status interrupt, poll on that platform */
1448static void
1449soc_link_hb(void *arg)
1450{
1451	struct ntb_softc *ntb = arg;
1452	uint32_t status32;
1453	int rc;
1454
1455	/*
1456	 * Delay polling the link status if an interrupt was received, unless
1457	 * the cached link status says the link is down.
1458	 */
1459	if ((long)ticks - ((long)ntb->last_ts + NTB_HB_TIMEOUT * hz) < 0 &&
1460	    (ntb->ntb_ctl & SOC_CNTL_LINK_DOWN) == 0)
1461		goto out;
1462
1463
1464	rc = ntb_poll_link(ntb);
1465	if (rc != 0)
1466		device_printf(ntb->device,
1467		    "Error determining link status\n");
1468
1469	/* Check to see if a link error is the cause of the link down */
1470	if (ntb->link_status == NTB_LINK_DOWN) {
1471		status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1472		if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) {
1473			callout_reset(&ntb->lr_timer, 0, recover_soc_link,
1474			    ntb);
1475			return;
1476		}
1477	}
1478
1479out:
1480	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, soc_link_hb,
1481	    ntb);
1482}
1483
1484static void
1485soc_perform_link_restart(struct ntb_softc *ntb)
1486{
1487	uint32_t status;
1488
1489	/* Driver resets the NTB ModPhy lanes - magic! */
1490	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0xe0);
1491	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x40);
1492	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x60);
1493	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0x60);
1494
1495	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1496	pause("ModPhy", hz / 10);
1497
1498	/* Clear AER Errors, write to clear */
1499	status = ntb_reg_read(4, SOC_ERRCORSTS_OFFSET);
1500	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1501	ntb_reg_write(4, SOC_ERRCORSTS_OFFSET, status);
1502
1503	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1504	status = ntb_reg_read(4, SOC_LTSSMERRSTS0_OFFSET);
1505	status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI;
1506	ntb_reg_write(4, SOC_LTSSMERRSTS0_OFFSET, status);
1507
1508	/* Clear DeSkew Buffer error, write to clear */
1509	status = ntb_reg_read(4, SOC_DESKEWSTS_OFFSET);
1510	status |= SOC_DESKEWSTS_DBERR;
1511	ntb_reg_write(4, SOC_DESKEWSTS_OFFSET, status);
1512
1513	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1514	status &= SOC_IBIST_ERR_OFLOW;
1515	ntb_reg_write(4, SOC_IBSTERRRCRVSTS0_OFFSET, status);
1516
1517	/* Releases the NTB state machine to allow the link to retrain */
1518	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1519	status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT;
1520	ntb_reg_write(4, SOC_LTSSMSTATEJMP_OFFSET, status);
1521}
1522
1523static void
1524ntb_handle_link_event(struct ntb_softc *ntb, int link_state)
1525{
1526	enum ntb_hw_event event;
1527	uint16_t status;
1528
1529	if (ntb->link_status == link_state)
1530		return;
1531
1532	if (link_state == NTB_LINK_UP) {
1533		device_printf(ntb->device, "Link Up\n");
1534		ntb->link_status = NTB_LINK_UP;
1535		event = NTB_EVENT_HW_LINK_UP;
1536
1537		if (ntb->type == NTB_SOC ||
1538		    ntb->conn_type == NTB_CONN_TRANSPARENT)
1539			status = ntb_reg_read(2, ntb->reg->lnk_sta);
1540		else
1541			status = pci_read_config(ntb->device,
1542			    XEON_LINK_STATUS_OFFSET, 2);
1543		ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
1544		ntb->link_speed = (status & NTB_LINK_SPEED_MASK);
1545		device_printf(ntb->device, "Link Width %d, Link Speed %d\n",
1546		    ntb->link_width, ntb->link_speed);
1547		callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1548		    soc_link_hb, ntb);
1549	} else {
1550		device_printf(ntb->device, "Link Down\n");
1551		ntb->link_status = NTB_LINK_DOWN;
1552		event = NTB_EVENT_HW_LINK_DOWN;
1553		/* Do not modify link width/speed, we need it in link recovery */
1554	}
1555
1556	/* notify the upper layer if we have an event change */
1557	if (ntb->event_cb != NULL)
1558		ntb->event_cb(ntb->ntb_transport, event);
1559}
1560
1561static void
1562ntb_link_enable(struct ntb_softc *ntb)
1563{
1564	uint32_t cntl;
1565
1566	if (ntb->type == NTB_SOC) {
1567		pci_write_config(ntb->device, NTB_PPD_OFFSET,
1568		    ntb->ppd | SOC_PPD_INIT_LINK, 4);
1569		return;
1570	}
1571
1572	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1573		ntb_handle_link_event(ntb, NTB_LINK_UP);
1574		return;
1575	}
1576
1577	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1578	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1579	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1580	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
1581	if (HAS_FEATURE(NTB_SPLIT_BAR))
1582		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
1583	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1584}
1585
1586static void
1587ntb_link_disable(struct ntb_softc *ntb)
1588{
1589	uint32_t cntl;
1590
1591	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1592		ntb_handle_link_event(ntb, NTB_LINK_DOWN);
1593		return;
1594	}
1595
1596	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1597	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1598	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
1599	if (HAS_FEATURE(NTB_SPLIT_BAR))
1600		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
1601	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1602	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
1603}
1604
1605static void
1606recover_soc_link(void *arg)
1607{
1608	struct ntb_softc *ntb = arg;
1609	uint8_t speed, width;
1610	uint32_t status32;
1611
1612	soc_perform_link_restart(ntb);
1613
1614	/*
1615	 * There is a potential race between the 2 NTB devices recovering at
1616	 * the same time.  If the times are the same, the link will not recover
1617	 * and the driver will be stuck in this loop forever.  Add a random
1618	 * interval to the recovery time to prevent this race.
1619	 */
1620	status32 = arc4random() % SOC_LINK_RECOVERY_TIME;
1621	pause("Link", (SOC_LINK_RECOVERY_TIME + status32) * hz / 1000);
1622
1623	status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1624	if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0)
1625		goto retry;
1626
1627	status32 = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1628	if ((status32 & SOC_IBIST_ERR_OFLOW) != 0)
1629		goto retry;
1630
1631	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
1632	if ((status32 & SOC_CNTL_LINK_DOWN) != 0)
1633		goto out;
1634
1635	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
1636	width = (status32 & NTB_LINK_WIDTH_MASK) >> 4;
1637	speed = (status32 & NTB_LINK_SPEED_MASK);
1638	if (ntb->link_width != width || ntb->link_speed != speed)
1639		goto retry;
1640
1641out:
1642	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, soc_link_hb,
1643	    ntb);
1644	return;
1645
1646retry:
1647	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link,
1648	    ntb);
1649}
1650
1651static int
1652ntb_poll_link(struct ntb_softc *ntb)
1653{
1654	int link_state;
1655	uint32_t ntb_cntl;
1656	uint16_t status;
1657
1658	if (ntb->type == NTB_SOC) {
1659		HW_LOCK(ntb);
1660		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
1661		if (ntb_cntl == ntb->ntb_ctl) {
1662			HW_UNLOCK(ntb);
1663			return (0);
1664		}
1665		ntb->ntb_ctl = ntb_cntl;
1666		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
1667		HW_UNLOCK(ntb);
1668
1669		if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0)
1670			link_state = NTB_LINK_DOWN;
1671		else
1672			link_state = NTB_LINK_UP;
1673	} else {
1674		status = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
1675		if (status == ntb->lnk_sta)
1676			return (0);
1677		ntb->lnk_sta = status;
1678
1679		if ((status & NTB_LINK_STATUS_ACTIVE) != 0)
1680			link_state = NTB_LINK_UP;
1681		else
1682			link_state = NTB_LINK_DOWN;
1683	}
1684
1685	ntb_handle_link_event(ntb, link_state);
1686	return (0);
1687}
1688
1689static void
1690ntb_irq_work(void *arg)
1691{
1692	struct ntb_db_cb *db_cb = arg;
1693	struct ntb_softc *ntb;
1694	int rc;
1695
1696	rc = db_cb->callback(db_cb->data, db_cb->db_num);
1697	/* Poll if forward progress was made. */
1698	if (rc != 0) {
1699		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
1700		return;
1701	}
1702
1703	/* Unmask interrupt if no progress was made. */
1704	ntb = db_cb->ntb;
1705	unmask_ldb_interrupt(ntb, db_cb->db_num);
1706}
1707
1708/*
1709 * Public API to the rest of the OS
1710 */
1711
1712/**
1713 * ntb_register_event_callback() - register event callback
1714 * @ntb: pointer to ntb_softc instance
1715 * @func: callback function to register
1716 *
1717 * This function registers a callback for any HW driver events such as link
1718 * up/down, power management notices and etc.
1719 *
1720 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1721 */
1722int
1723ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func)
1724{
1725
1726	if (ntb->event_cb != NULL)
1727		return (EINVAL);
1728
1729	ntb->event_cb = func;
1730
1731	return (0);
1732}
1733
1734/**
1735 * ntb_unregister_event_callback() - unregisters the event callback
1736 * @ntb: pointer to ntb_softc instance
1737 *
1738 * This function unregisters the existing callback from transport
1739 */
1740void
1741ntb_unregister_event_callback(struct ntb_softc *ntb)
1742{
1743
1744	ntb->event_cb = NULL;
1745}
1746
1747/**
1748 * ntb_register_db_callback() - register a callback for doorbell interrupt
1749 * @ntb: pointer to ntb_softc instance
1750 * @idx: doorbell index to register callback, zero based
1751 * @data: pointer to be returned to caller with every callback
1752 * @func: callback function to register
1753 *
1754 * This function registers a callback function for the doorbell interrupt
1755 * on the primary side. The function will unmask the doorbell as well to
1756 * allow interrupt.
1757 *
1758 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1759 */
1760int
1761ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data,
1762    ntb_db_callback func)
1763{
1764	struct ntb_db_cb *db_cb = &ntb->db_cb[idx];
1765
1766	if (idx >= ntb->max_cbs || db_cb->callback != NULL || db_cb->reserved) {
1767		device_printf(ntb->device, "Invalid Index.\n");
1768		return (EINVAL);
1769	}
1770
1771	db_cb->callback = func;
1772	db_cb->data = data;
1773	callout_init(&db_cb->irq_work, 1);
1774
1775	unmask_ldb_interrupt(ntb, idx);
1776
1777	return (0);
1778}
1779
1780/**
1781 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
1782 * @ntb: pointer to ntb_softc instance
1783 * @idx: doorbell index to register callback, zero based
1784 *
1785 * This function unregisters a callback function for the doorbell interrupt
1786 * on the primary side. The function will also mask the said doorbell.
1787 */
1788void
1789ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx)
1790{
1791
1792	if (idx >= ntb->max_cbs || ntb->db_cb[idx].callback == NULL)
1793		return;
1794
1795	mask_ldb_interrupt(ntb, idx);
1796
1797	callout_drain(&ntb->db_cb[idx].irq_work);
1798	ntb->db_cb[idx].callback = NULL;
1799}
1800
1801/**
1802 * ntb_find_transport() - find the transport pointer
1803 * @transport: pointer to pci device
1804 *
1805 * Given the pci device pointer, return the transport pointer passed in when
1806 * the transport attached when it was inited.
1807 *
1808 * RETURNS: pointer to transport.
1809 */
1810void *
1811ntb_find_transport(struct ntb_softc *ntb)
1812{
1813
1814	return (ntb->ntb_transport);
1815}
1816
1817/**
1818 * ntb_register_transport() - Register NTB transport with NTB HW driver
1819 * @transport: transport identifier
1820 *
1821 * This function allows a transport to reserve the hardware driver for
1822 * NTB usage.
1823 *
1824 * RETURNS: pointer to ntb_softc, NULL on error.
1825 */
1826struct ntb_softc *
1827ntb_register_transport(struct ntb_softc *ntb, void *transport)
1828{
1829
1830	/*
1831	 * TODO: when we have more than one transport, we will need to rewrite
1832	 * this to prevent race conditions
1833	 */
1834	if (ntb->ntb_transport != NULL)
1835		return (NULL);
1836
1837	ntb->ntb_transport = transport;
1838	return (ntb);
1839}
1840
1841/**
1842 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
1843 * @ntb - ntb_softc of the transport to be freed
1844 *
1845 * This function unregisters the transport from the HW driver and performs any
1846 * necessary cleanups.
1847 */
1848void
1849ntb_unregister_transport(struct ntb_softc *ntb)
1850{
1851	uint8_t i;
1852
1853	if (ntb->ntb_transport == NULL)
1854		return;
1855
1856	for (i = 0; i < ntb->max_cbs; i++)
1857		ntb_unregister_db_callback(ntb, i);
1858
1859	ntb_unregister_event_callback(ntb);
1860	ntb->ntb_transport = NULL;
1861}
1862
1863/**
1864 * ntb_get_max_spads() - get the total scratch regs usable
1865 * @ntb: pointer to ntb_softc instance
1866 *
1867 * This function returns the max 32bit scratchpad registers usable by the
1868 * upper layer.
1869 *
1870 * RETURNS: total number of scratch pad registers available
1871 */
1872uint8_t
1873ntb_get_max_spads(struct ntb_softc *ntb)
1874{
1875
1876	return (ntb->spad_count);
1877}
1878
1879uint8_t
1880ntb_get_max_cbs(struct ntb_softc *ntb)
1881{
1882
1883	return (ntb->max_cbs);
1884}
1885
1886uint8_t
1887ntb_mw_count(struct ntb_softc *ntb)
1888{
1889
1890	return (ntb->mw_count);
1891}
1892
1893/**
1894 * ntb_write_local_spad() - write to the secondary scratchpad register
1895 * @ntb: pointer to ntb_softc instance
1896 * @idx: index to the scratchpad register, 0 based
1897 * @val: the data value to put into the register
1898 *
1899 * This function allows writing of a 32bit value to the indexed scratchpad
1900 * register. The register resides on the secondary (external) side.
1901 *
1902 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1903 */
1904int
1905ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1906{
1907
1908	if (idx >= ntb->spad_count)
1909		return (EINVAL);
1910
1911	ntb_reg_write(4, ntb->reg_ofs.spad_local + idx * 4, val);
1912
1913	return (0);
1914}
1915
1916/**
1917 * ntb_read_local_spad() - read from the primary scratchpad register
1918 * @ntb: pointer to ntb_softc instance
1919 * @idx: index to scratchpad register, 0 based
1920 * @val: pointer to 32bit integer for storing the register value
1921 *
1922 * This function allows reading of the 32bit scratchpad register on
1923 * the primary (internal) side.
1924 *
1925 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1926 */
1927int
1928ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1929{
1930
1931	if (idx >= ntb->spad_count)
1932		return (EINVAL);
1933
1934	*val = ntb_reg_read(4, ntb->reg_ofs.spad_local + idx * 4);
1935
1936	return (0);
1937}
1938
1939/**
1940 * ntb_write_remote_spad() - write to the secondary scratchpad register
1941 * @ntb: pointer to ntb_softc instance
1942 * @idx: index to the scratchpad register, 0 based
1943 * @val: the data value to put into the register
1944 *
1945 * This function allows writing of a 32bit value to the indexed scratchpad
1946 * register. The register resides on the secondary (external) side.
1947 *
1948 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1949 */
1950int
1951ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1952{
1953
1954	if (idx >= ntb->spad_count)
1955		return (EINVAL);
1956
1957	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1958		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
1959	else
1960		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
1961
1962	return (0);
1963}
1964
1965/**
1966 * ntb_read_remote_spad() - read from the primary scratchpad register
1967 * @ntb: pointer to ntb_softc instance
1968 * @idx: index to scratchpad register, 0 based
1969 * @val: pointer to 32bit integer for storing the register value
1970 *
1971 * This function allows reading of the 32bit scratchpad register on
1972 * the primary (internal) side.
1973 *
1974 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1975 */
1976int
1977ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1978{
1979
1980	if (idx >= ntb->spad_count)
1981		return (EINVAL);
1982
1983	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
1984		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
1985	else
1986		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
1987
1988	return (0);
1989}
1990
1991/**
1992 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
1993 * @ntb: pointer to ntb_softc instance
1994 * @mw: memory window number
1995 *
1996 * This function provides the base virtual address of the memory window
1997 * specified.
1998 *
1999 * RETURNS: pointer to virtual address, or NULL on error.
2000 */
2001void *
2002ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw)
2003{
2004
2005	if (mw >= ntb_mw_count(ntb))
2006		return (NULL);
2007
2008	return (ntb->bar_info[ntb_mw_to_bar(ntb, mw)].vbase);
2009}
2010
2011bus_addr_t
2012ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw)
2013{
2014
2015	if (mw >= ntb_mw_count(ntb))
2016		return (0);
2017
2018	return (ntb->bar_info[ntb_mw_to_bar(ntb, mw)].pbase);
2019}
2020
2021/**
2022 * ntb_get_mw_size() - return size of NTB memory window
2023 * @ntb: pointer to ntb_softc instance
2024 * @mw: memory window number
2025 *
2026 * This function provides the physical size of the memory window specified
2027 *
2028 * RETURNS: the size of the memory window or zero on error
2029 */
2030u_long
2031ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw)
2032{
2033
2034	if (mw >= ntb_mw_count(ntb))
2035		return (0);
2036
2037	return (ntb->bar_info[ntb_mw_to_bar(ntb, mw)].size);
2038}
2039
2040/**
2041 * ntb_set_mw_addr - set the memory window address
2042 * @ntb: pointer to ntb_softc instance
2043 * @mw: memory window number
2044 * @addr: base address for data
2045 *
2046 * This function sets the base physical address of the memory window.  This
2047 * memory address is where data from the remote system will be transfered into
2048 * or out of depending on how the transport is configured.
2049 */
2050void
2051ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr)
2052{
2053
2054	if (mw >= ntb_mw_count(ntb))
2055		return;
2056
2057	switch (ntb_mw_to_bar(ntb, mw)) {
2058	case NTB_B2B_BAR_1:
2059		ntb_reg_write(8, ntb->xlat_reg->bar2_xlat, addr);
2060		break;
2061	case NTB_B2B_BAR_2:
2062		if (HAS_FEATURE(NTB_SPLIT_BAR))
2063			ntb_reg_write(4, ntb->reg_ofs.bar4_xlat, addr);
2064		else
2065			ntb_reg_write(8, ntb->reg_ofs.bar4_xlat, addr);
2066		break;
2067	case NTB_B2B_BAR_3:
2068		ntb_reg_write(4, ntb->reg_ofs.bar5_xlat, addr);
2069		break;
2070	default:
2071		KASSERT(false, ("invalid BAR"));
2072		break;
2073	}
2074}
2075
2076/**
2077 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
2078 * @ntb: pointer to ntb_softc instance
2079 * @db: doorbell to ring
2080 *
2081 * This function allows triggering of a doorbell on the secondary/external
2082 * side that will initiate an interrupt on the remote host
2083 */
2084void
2085ntb_ring_doorbell(struct ntb_softc *ntb, unsigned int db)
2086{
2087	uint64_t bit;
2088
2089	if (ntb->type == NTB_SOC)
2090		bit = 1 << db;
2091	else
2092		bit = ((1 << ntb->db_vec_shift) - 1) <<
2093		    (db * ntb->db_vec_shift);
2094
2095	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2096		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
2097		return;
2098	}
2099
2100	ntb_db_write(ntb, ntb->peer_reg->db_bell, bit);
2101}
2102
2103/*
2104 * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
2105 * as well as the size of the register (via *sz_out).
2106 *
2107 * This function allows a caller using I/OAT DMA to chain the remote doorbell
2108 * ring to its memory window write.
2109 *
2110 * Note that writing the peer doorbell via a memory window will *not* generate
2111 * an interrupt on the remote host; that must be done seperately.
2112 */
2113bus_addr_t
2114ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
2115{
2116	struct ntb_pci_bar_info *bar;
2117	uint64_t regoff;
2118
2119	KASSERT(sz_out != NULL, ("must be non-NULL"));
2120
2121	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
2122		bar = &ntb->bar_info[NTB_CONFIG_BAR];
2123		regoff = ntb->peer_reg->db_bell;
2124	} else {
2125		KASSERT((HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 2) ||
2126		    (!HAS_FEATURE(NTB_SPLIT_BAR) && ntb->mw_count == 1),
2127		    ("mw_count invalid after setup"));
2128		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
2129		    ("invalid b2b idx"));
2130
2131		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
2132		regoff = XEON_SHADOW_PDOORBELL_OFFSET;
2133	}
2134	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
2135
2136	*sz_out = ntb->reg->db_size;
2137	/* HACK: Specific to current x86 bus implementation. */
2138	return ((uint64_t)bar->pci_bus_handle + regoff);
2139}
2140
2141/**
2142 * ntb_query_link_status() - return the hardware link status
2143 * @ndev: pointer to ntb_device instance
2144 *
2145 * Returns true if the hardware is connected to the remote system
2146 *
2147 * RETURNS: true or false based on the hardware link state
2148 */
2149bool
2150ntb_query_link_status(struct ntb_softc *ntb)
2151{
2152
2153	return (ntb->link_status == NTB_LINK_UP);
2154}
2155
2156static void
2157save_bar_parameters(struct ntb_pci_bar_info *bar)
2158{
2159
2160	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
2161	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
2162	bar->pbase = rman_get_start(bar->pci_resource);
2163	bar->size = rman_get_size(bar->pci_resource);
2164	bar->vbase = rman_get_virtual(bar->pci_resource);
2165}
2166
2167device_t
2168ntb_get_device(struct ntb_softc *ntb)
2169{
2170
2171	return (ntb->device);
2172}
2173
2174/* Export HW-specific errata information. */
2175bool
2176ntb_has_feature(struct ntb_softc *ntb, uint64_t feature)
2177{
2178
2179	return (HAS_FEATURE(feature));
2180}
2181