ntb_hw_intel.c revision 289344
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289344 2015-10-14 23:47:23Z cem $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/systm.h>
33#include <sys/bus.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/queue.h>
37#include <sys/rman.h>
38#include <sys/sysctl.h>
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <machine/bus.h>
42#include <machine/pmap.h>
43#include <machine/resource.h>
44#include <dev/pci/pcireg.h>
45#include <dev/pci/pcivar.h>
46
47#include "ntb_regs.h"
48#include "ntb_hw.h"
49
50/*
51 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
52 * allows you to connect two systems using a PCI-e link.
53 *
54 * This module contains the hardware abstraction layer for the NTB. It allows
55 * you to send and recieve interrupts, map the memory windows and send and
56 * receive messages in the scratch-pad registers.
57 *
58 * NOTE: Much of the code in this module is shared with Linux. Any patches may
59 * be picked up and redistributed in Linux with a dual GPL/BSD license.
60 */
61
62#define NTB_CONFIG_BAR	0
63#define NTB_B2B_BAR_1	1
64#define NTB_B2B_BAR_2	2
65#define NTB_MAX_BARS	3
66#define NTB_MW_TO_BAR(mw) ((mw) + 1)
67
68#define MAX_MSIX_INTERRUPTS MAX(XEON_MAX_DB_BITS, SOC_MAX_DB_BITS)
69
70#define NTB_HB_TIMEOUT	1 /* second */
71#define SOC_LINK_RECOVERY_TIME	500
72
73#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
74
75enum ntb_device_type {
76	NTB_XEON,
77	NTB_SOC
78};
79
80/* Device features and workarounds */
81#define HAS_FEATURE(feature)	\
82	((ntb->features & (feature)) != 0)
83
84struct ntb_hw_info {
85	uint32_t		device_id;
86	const char		*desc;
87	enum ntb_device_type	type;
88	uint64_t		features;
89};
90
91struct ntb_pci_bar_info {
92	bus_space_tag_t		pci_bus_tag;
93	bus_space_handle_t	pci_bus_handle;
94	int			pci_resource_id;
95	struct resource		*pci_resource;
96	vm_paddr_t		pbase;
97	void			*vbase;
98	u_long			size;
99};
100
101struct ntb_int_info {
102	struct resource	*res;
103	int		rid;
104	void		*tag;
105};
106
107struct ntb_db_cb {
108	ntb_db_callback		callback;
109	unsigned int		db_num;
110	void			*data;
111	struct ntb_softc	*ntb;
112	struct callout		irq_work;
113	bool			reserved;
114};
115
116struct ntb_softc {
117	device_t		device;
118	enum ntb_device_type	type;
119	uint64_t		features;
120
121	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
122	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
123	uint32_t		allocated_interrupts;
124
125	struct callout		heartbeat_timer;
126	struct callout		lr_timer;
127
128	void			*ntb_transport;
129	ntb_event_callback	event_cb;
130	struct ntb_db_cb 	*db_cb;
131
132	struct {
133		uint8_t max_spads;
134		uint8_t max_db_bits;
135		uint8_t msix_cnt;
136	} limits;
137	struct {
138		uint32_t ldb;
139		uint32_t ldb_mask;
140		uint32_t rdb;
141		uint32_t bar2_xlat;
142		uint32_t bar4_xlat;
143		uint32_t spad_remote;
144		uint32_t spad_local;
145		uint32_t lnk_cntl;
146		uint32_t lnk_stat;
147		uint32_t spci_cmd;
148	} reg_ofs;
149	uint8_t conn_type;
150	uint8_t dev_type;
151	uint8_t bits_per_vector;
152	uint8_t link_status;
153	uint8_t link_width;
154	uint8_t link_speed;
155};
156
157#ifdef __i386__
158static __inline uint64_t
159bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
160    bus_size_t offset)
161{
162
163	return (bus_space_read_4(tag, handle, offset) |
164	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
165}
166
167static __inline void
168bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
169    bus_size_t offset, uint64_t val)
170{
171
172	bus_space_write_4(tag, handle, offset, val);
173	bus_space_write_4(tag, handle, offset + 4, val >> 32);
174}
175#endif
176
177#define ntb_bar_read(SIZE, bar, offset) \
178	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
179	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
180#define ntb_bar_write(SIZE, bar, offset, val) \
181	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
182	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
183#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
184#define ntb_reg_write(SIZE, offset, val) \
185	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
186#define ntb_mw_read(SIZE, offset) ntb_bar_read(SIZE, NTB_B2B_BAR_2, offset)
187#define ntb_mw_write(SIZE, offset, val) \
188	    ntb_bar_write(SIZE, NTB_B2B_BAR_2, offset, val)
189
190typedef int (*bar_map_strategy)(struct ntb_softc *ntb,
191    struct ntb_pci_bar_info *bar);
192
193static int ntb_probe(device_t device);
194static int ntb_attach(device_t device);
195static int ntb_detach(device_t device);
196static int ntb_map_pci_bars(struct ntb_softc *ntb);
197static int map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
198    struct ntb_pci_bar_info *bar);
199static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
200static int map_memory_window_bar(struct ntb_softc *ntb,
201    struct ntb_pci_bar_info *bar);
202static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
203static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
204static int ntb_setup_interrupts(struct ntb_softc *ntb);
205static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
206static int ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors);
207static int ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors);
208static void ntb_teardown_interrupts(struct ntb_softc *ntb);
209static void handle_soc_irq(void *arg);
210static void handle_xeon_irq(void *arg);
211static void handle_xeon_event_irq(void *arg);
212static void ntb_handle_legacy_interrupt(void *arg);
213static void ntb_irq_work(void *arg);
214static void mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
215static void unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
216static int ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors);
217static void ntb_free_callbacks(struct ntb_softc *ntb);
218static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
219static int ntb_setup_xeon(struct ntb_softc *ntb);
220static int ntb_setup_soc(struct ntb_softc *ntb);
221static void ntb_teardown_xeon(struct ntb_softc *ntb);
222static void configure_soc_secondary_side_bars(struct ntb_softc *ntb);
223static void configure_xeon_secondary_side_bars(struct ntb_softc *ntb);
224static void ntb_handle_heartbeat(void *arg);
225static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state);
226static void ntb_hw_link_down(struct ntb_softc *ntb);
227static void ntb_hw_link_up(struct ntb_softc *ntb);
228static void recover_soc_link(void *arg);
229static int ntb_check_link_status(struct ntb_softc *ntb);
230static void save_bar_parameters(struct ntb_pci_bar_info *bar);
231
232static struct ntb_hw_info pci_ids[] = {
233	{ 0x0C4E8086, "Atom Processor S1200 NTB Primary B2B", NTB_SOC, 0 },
234
235	/* XXX: PS/SS IDs left out until they are supported. */
236	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
237		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
238	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
239		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
240	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
241		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
242		    | NTB_BAR_SIZE_4K },
243	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
244		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
245	},
246	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
247		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
248	},
249
250	{ 0x00000000, NULL, NTB_SOC, 0 }
251};
252
253/*
254 * OS <-> Driver interface structures
255 */
256MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
257
258static device_method_t ntb_pci_methods[] = {
259	/* Device interface */
260	DEVMETHOD(device_probe,     ntb_probe),
261	DEVMETHOD(device_attach,    ntb_attach),
262	DEVMETHOD(device_detach,    ntb_detach),
263	DEVMETHOD_END
264};
265
266static driver_t ntb_pci_driver = {
267	"ntb_hw",
268	ntb_pci_methods,
269	sizeof(struct ntb_softc),
270};
271
272static devclass_t ntb_devclass;
273DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
274MODULE_VERSION(ntb_hw, 1);
275
276SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
277
278/*
279 * OS <-> Driver linkage functions
280 */
281static int
282ntb_probe(device_t device)
283{
284	struct ntb_hw_info *p;
285
286	p = ntb_get_device_info(pci_get_devid(device));
287	if (p == NULL)
288		return (ENXIO);
289
290	device_set_desc(device, p->desc);
291	return (0);
292}
293
294static int
295ntb_attach(device_t device)
296{
297	struct ntb_softc *ntb;
298	struct ntb_hw_info *p;
299	int error;
300
301	ntb = DEVICE2SOFTC(device);
302	p = ntb_get_device_info(pci_get_devid(device));
303
304	ntb->device = device;
305	ntb->type = p->type;
306	ntb->features = p->features;
307
308	/* Heartbeat timer for NTB_SOC since there is no link interrupt */
309	callout_init(&ntb->heartbeat_timer, 1);
310	callout_init(&ntb->lr_timer, 1);
311
312	error = ntb_map_pci_bars(ntb);
313	if (error)
314		goto out;
315	if (ntb->type == NTB_SOC)
316		error = ntb_setup_soc(ntb);
317	else
318		error = ntb_setup_xeon(ntb);
319	if (error)
320		goto out;
321	error = ntb_setup_interrupts(ntb);
322	if (error)
323		goto out;
324
325	pci_enable_busmaster(ntb->device);
326
327out:
328	if (error != 0)
329		ntb_detach(device);
330	return (error);
331}
332
333static int
334ntb_detach(device_t device)
335{
336	struct ntb_softc *ntb;
337
338	ntb = DEVICE2SOFTC(device);
339	callout_drain(&ntb->heartbeat_timer);
340	callout_drain(&ntb->lr_timer);
341	if (ntb->type == NTB_XEON)
342		ntb_teardown_xeon(ntb);
343	ntb_teardown_interrupts(ntb);
344	ntb_unmap_pci_bar(ntb);
345
346	return (0);
347}
348
349static int
350ntb_map_pci_bars(struct ntb_softc *ntb)
351{
352	int rc;
353
354	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
355	rc = map_pci_bar(ntb, map_mmr_bar, &ntb->bar_info[NTB_CONFIG_BAR]);
356	if (rc != 0)
357		return (rc);
358
359	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
360	rc = map_pci_bar(ntb, map_memory_window_bar,
361	    &ntb->bar_info[NTB_B2B_BAR_1]);
362	if (rc != 0)
363		return (rc);
364
365	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
366	if (HAS_FEATURE(NTB_REGS_THRU_MW))
367		rc = map_pci_bar(ntb, map_mmr_bar,
368		    &ntb->bar_info[NTB_B2B_BAR_2]);
369	else
370		rc = map_pci_bar(ntb, map_memory_window_bar,
371		    &ntb->bar_info[NTB_B2B_BAR_2]);
372	return (rc);
373}
374
375static int
376map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
377    struct ntb_pci_bar_info *bar)
378{
379	int rc;
380
381	rc = strategy(ntb, bar);
382	if (rc != 0)
383		device_printf(ntb->device,
384		    "unable to allocate pci resource\n");
385	else
386		device_printf(ntb->device,
387		    "Bar size = %lx, v %p, p %p\n",
388		    bar->size, bar->vbase, (void *)(bar->pbase));
389	return (rc);
390}
391
392static int
393map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
394{
395
396	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
397	    &bar->pci_resource_id, RF_ACTIVE);
398	if (bar->pci_resource == NULL)
399		return (ENXIO);
400
401	save_bar_parameters(bar);
402	return (0);
403}
404
405static int
406map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
407{
408	int rc;
409	uint8_t bar_size_bits = 0;
410
411	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
412	    &bar->pci_resource_id, RF_ACTIVE);
413
414	if (bar->pci_resource == NULL)
415		return (ENXIO);
416
417	save_bar_parameters(bar);
418	/*
419	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
420	 * hardware issue. To work around this, query the size it should be
421	 * configured to by the device and modify the resource to correspond to
422	 * this new size. The BIOS on systems with this problem is required to
423	 * provide enough address space to allow the driver to make this change
424	 * safely.
425	 *
426	 * Ideally I could have just specified the size when I allocated the
427	 * resource like:
428	 *  bus_alloc_resource(ntb->device,
429	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
430	 *	1ul << bar_size_bits, RF_ACTIVE);
431	 * but the PCI driver does not honor the size in this call, so we have
432	 * to modify it after the fact.
433	 */
434	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
435		if (bar->pci_resource_id == PCIR_BAR(2))
436			bar_size_bits = pci_read_config(ntb->device,
437			    XEON_PBAR23SZ_OFFSET, 1);
438		else
439			bar_size_bits = pci_read_config(ntb->device,
440			    XEON_PBAR45SZ_OFFSET, 1);
441
442		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
443		    bar->pci_resource, bar->pbase,
444		    bar->pbase + (1ul << bar_size_bits) - 1);
445		if (rc != 0) {
446			device_printf(ntb->device,
447			    "unable to resize bar\n");
448			return (rc);
449		}
450
451		save_bar_parameters(bar);
452	}
453
454	/* Mark bar region as write combining to improve performance. */
455	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
456	    VM_MEMATTR_WRITE_COMBINING);
457	if (rc != 0) {
458		device_printf(ntb->device,
459		    "unable to mark bar as WRITE_COMBINING\n");
460		return (rc);
461	}
462	return (0);
463}
464
465static void
466ntb_unmap_pci_bar(struct ntb_softc *ntb)
467{
468	struct ntb_pci_bar_info *current_bar;
469	int i;
470
471	for (i = 0; i< NTB_MAX_BARS; i++) {
472		current_bar = &ntb->bar_info[i];
473		if (current_bar->pci_resource != NULL)
474			bus_release_resource(ntb->device, SYS_RES_MEMORY,
475			    current_bar->pci_resource_id,
476			    current_bar->pci_resource);
477	}
478}
479
480static int
481ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors)
482{
483	void (*interrupt_handler)(void *);
484	void *int_arg;
485	uint32_t i;
486	int rc;
487
488	if (num_vectors < 4)
489		return (ENOSPC);
490
491	for (i = 0; i < num_vectors; i++) {
492		ntb->int_info[i].rid = i + 1;
493		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
494		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
495		if (ntb->int_info[i].res == NULL) {
496			device_printf(ntb->device,
497			    "bus_alloc_resource failed\n");
498			return (ENOMEM);
499		}
500		ntb->int_info[i].tag = NULL;
501		ntb->allocated_interrupts++;
502		if (i == num_vectors - 1) {
503			interrupt_handler = handle_xeon_event_irq;
504			int_arg = ntb;
505		} else {
506			interrupt_handler = handle_xeon_irq;
507			int_arg = &ntb->db_cb[i];
508		}
509		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
510		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, interrupt_handler,
511		    int_arg, &ntb->int_info[i].tag);
512		if (rc != 0) {
513			device_printf(ntb->device,
514			    "bus_setup_intr failed\n");
515			return (ENXIO);
516		}
517	}
518
519	/*
520	 * Prevent consumers from registering callbacks on the link event irq
521	 * slot, from which they will never be called back.
522	 */
523	ntb->db_cb[num_vectors - 1].reserved = true;
524	return (0);
525}
526
527static int
528ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors)
529{
530	uint32_t i;
531	int rc;
532
533	for (i = 0; i < num_vectors; i++) {
534		ntb->int_info[i].rid = i + 1;
535		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
536		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
537		if (ntb->int_info[i].res == NULL) {
538			device_printf(ntb->device,
539			    "bus_alloc_resource failed\n");
540			return (ENOMEM);
541		}
542		ntb->int_info[i].tag = NULL;
543		ntb->allocated_interrupts++;
544		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
545		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, handle_soc_irq,
546		    &ntb->db_cb[i], &ntb->int_info[i].tag);
547		if (rc != 0) {
548			device_printf(ntb->device, "bus_setup_intr failed\n");
549			return (ENXIO);
550		}
551	}
552	return (0);
553}
554
555/*
556 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
557 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
558 * should be okay.  This tunable should enable us to test that hypothesis
559 * when someone gets their hands on some Xeon hardware.
560 */
561static int ntb_force_remap_mode;
562SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
563    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
564    " to a smaller number of ithreads, even if the desired number are "
565    "available");
566
567/*
568 * In case it is NOT ok, give consumers an abort button.
569 */
570static int ntb_prefer_intx;
571SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
572    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
573    "than remapping MSI-X messages over available slots (match Linux driver "
574    "behavior)");
575
576/*
577 * Remap the desired number of MSI-X messages to available ithreads in a simple
578 * round-robin fashion.
579 */
580static int
581ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
582{
583	u_int *vectors;
584	uint32_t i;
585	int rc;
586
587	if (ntb_prefer_intx != 0)
588		return (ENXIO);
589
590	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
591
592	for (i = 0; i < desired; i++)
593		vectors[i] = (i % avail) + 1;
594
595	rc = pci_remap_msix(dev, desired, vectors);
596	free(vectors, M_NTB);
597	return (rc);
598}
599
600static int
601ntb_setup_interrupts(struct ntb_softc *ntb)
602{
603	uint32_t desired_vectors, num_vectors;
604	int rc;
605
606	ntb->allocated_interrupts = 0;
607	/*
608	 * On SOC, disable all interrupts.  On XEON, disable all but Link
609	 * Interrupt.  The rest will be unmasked as callbacks are registered.
610	 */
611	if (ntb->type == NTB_SOC)
612		ntb_reg_write(8, ntb->reg_ofs.ldb_mask, ~0);
613	else
614		ntb_reg_write(2, ntb->reg_ofs.ldb_mask,
615		    (uint16_t) ~(1 << XEON_LINK_DB));
616
617	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
618	    ntb->limits.max_db_bits);
619	if (desired_vectors >= 1) {
620		rc = pci_alloc_msix(ntb->device, &num_vectors);
621
622		if (ntb_force_remap_mode != 0 && rc == 0 &&
623		    num_vectors == desired_vectors)
624			num_vectors--;
625
626		if (rc == 0 && num_vectors < desired_vectors) {
627			rc = ntb_remap_msix(ntb->device, desired_vectors,
628			    num_vectors);
629			if (rc == 0)
630				num_vectors = desired_vectors;
631			else
632				pci_release_msi(ntb->device);
633		}
634		if (rc != 0)
635			num_vectors = 1;
636	} else
637		num_vectors = 1;
638
639	ntb_create_callbacks(ntb, num_vectors);
640
641	if (ntb->type == NTB_XEON)
642		rc = ntb_setup_xeon_msix(ntb, num_vectors);
643	else
644		rc = ntb_setup_soc_msix(ntb, num_vectors);
645	if (rc != 0)
646		device_printf(ntb->device,
647		    "Error allocating MSI-X interrupts: %d\n", rc);
648
649	if (ntb->type == NTB_XEON && rc == ENOSPC)
650		rc = ntb_setup_legacy_interrupt(ntb);
651
652	return (rc);
653}
654
655static int
656ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
657{
658	int rc;
659
660	ntb->int_info[0].rid = 0;
661	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
662	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
663	if (ntb->int_info[0].res == NULL) {
664		device_printf(ntb->device, "bus_alloc_resource failed\n");
665		return (ENOMEM);
666	}
667
668	ntb->int_info[0].tag = NULL;
669	ntb->allocated_interrupts = 1;
670
671	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
672	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ntb_handle_legacy_interrupt,
673	    ntb, &ntb->int_info[0].tag);
674	if (rc != 0) {
675		device_printf(ntb->device, "bus_setup_intr failed\n");
676		return (ENXIO);
677	}
678
679	return (0);
680}
681
682static void
683ntb_teardown_interrupts(struct ntb_softc *ntb)
684{
685	struct ntb_int_info *current_int;
686	int i;
687
688	for (i = 0; i < ntb->allocated_interrupts; i++) {
689		current_int = &ntb->int_info[i];
690		if (current_int->tag != NULL)
691			bus_teardown_intr(ntb->device, current_int->res,
692			    current_int->tag);
693
694		if (current_int->res != NULL)
695			bus_release_resource(ntb->device, SYS_RES_IRQ,
696			    rman_get_rid(current_int->res), current_int->res);
697	}
698
699	ntb_free_callbacks(ntb);
700	pci_release_msi(ntb->device);
701}
702
703static void
704mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
705{
706	unsigned long mask;
707
708	mask = ntb_reg_read(2, ntb->reg_ofs.ldb_mask);
709	mask |= 1 << (idx * ntb->bits_per_vector);
710	ntb_reg_write(2, ntb->reg_ofs.ldb_mask, mask);
711}
712
713static void
714unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
715{
716	unsigned long mask;
717
718	mask = ntb_reg_read(2, ntb->reg_ofs.ldb_mask);
719	mask &= ~(1 << (idx * ntb->bits_per_vector));
720	ntb_reg_write(2, ntb->reg_ofs.ldb_mask, mask);
721}
722
723static void
724handle_soc_irq(void *arg)
725{
726	struct ntb_db_cb *db_cb = arg;
727	struct ntb_softc *ntb = db_cb->ntb;
728
729	ntb_reg_write(8, ntb->reg_ofs.ldb, (uint64_t) 1 << db_cb->db_num);
730
731	if (db_cb->callback != NULL) {
732		mask_ldb_interrupt(ntb, db_cb->db_num);
733		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
734	}
735}
736
737static void
738handle_xeon_irq(void *arg)
739{
740	struct ntb_db_cb *db_cb = arg;
741	struct ntb_softc *ntb = db_cb->ntb;
742
743	/*
744	 * On Xeon, there are 16 bits in the interrupt register
745	 * but only 4 vectors.  So, 5 bits are assigned to the first 3
746	 * vectors, with the 4th having a single bit for link
747	 * interrupts.
748	 */
749	ntb_reg_write(2, ntb->reg_ofs.ldb,
750	    ((1 << ntb->bits_per_vector) - 1) <<
751	    (db_cb->db_num * ntb->bits_per_vector));
752
753	if (db_cb->callback != NULL) {
754		mask_ldb_interrupt(ntb, db_cb->db_num);
755		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
756	}
757}
758
759/* Since we do not have a HW doorbell in SOC, this is only used in JF/JT */
760static void
761handle_xeon_event_irq(void *arg)
762{
763	struct ntb_softc *ntb = arg;
764	int rc;
765
766	rc = ntb_check_link_status(ntb);
767	if (rc != 0)
768		device_printf(ntb->device, "Error determining link status\n");
769
770	/* bit 15 is always the link bit */
771	ntb_reg_write(2, ntb->reg_ofs.ldb, 1 << XEON_LINK_DB);
772}
773
774static void
775ntb_handle_legacy_interrupt(void *arg)
776{
777	struct ntb_softc *ntb = arg;
778	unsigned int i = 0;
779	uint64_t ldb64;
780	uint16_t ldb16;
781
782	if (ntb->type == NTB_SOC) {
783		ldb64 = ntb_reg_read(8, ntb->reg_ofs.ldb);
784
785		while (ldb64) {
786			i = ffs(ldb64);
787			ldb64 &= ldb64 - 1;
788			handle_soc_irq(&ntb->db_cb[i]);
789		}
790	} else {
791		ldb16 = ntb_reg_read(2, ntb->reg_ofs.ldb);
792
793		if ((ldb16 & XEON_DB_HW_LINK) != 0) {
794			handle_xeon_event_irq(ntb);
795			ldb16 &= ~XEON_DB_HW_LINK;
796		}
797
798		while (ldb16 != 0) {
799			i = ffs(ldb16);
800			ldb16 &= ldb16 - 1;
801			handle_xeon_irq(&ntb->db_cb[i]);
802		}
803	}
804
805}
806
807static int
808ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors)
809{
810	uint32_t i;
811
812	ntb->db_cb = malloc(num_vectors * sizeof(*ntb->db_cb), M_NTB,
813	    M_ZERO | M_WAITOK);
814	for (i = 0; i < num_vectors; i++) {
815		ntb->db_cb[i].db_num = i;
816		ntb->db_cb[i].ntb = ntb;
817	}
818
819	return (0);
820}
821
822static void
823ntb_free_callbacks(struct ntb_softc *ntb)
824{
825	uint8_t i;
826
827	for (i = 0; i < ntb->limits.max_db_bits; i++)
828		ntb_unregister_db_callback(ntb, i);
829
830	free(ntb->db_cb, M_NTB);
831}
832
833static struct ntb_hw_info *
834ntb_get_device_info(uint32_t device_id)
835{
836	struct ntb_hw_info *ep = pci_ids;
837
838	while (ep->device_id) {
839		if (ep->device_id == device_id)
840			return (ep);
841		++ep;
842	}
843	return (NULL);
844}
845
846static void
847ntb_teardown_xeon(struct ntb_softc *ntb)
848{
849
850	ntb_hw_link_down(ntb);
851}
852
853static int
854ntb_setup_xeon(struct ntb_softc *ntb)
855{
856	uint8_t val, connection_type;
857
858	val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
859
860	connection_type = val & XEON_PPD_CONN_TYPE;
861
862	if ((val & XEON_PPD_DEV_TYPE) != 0)
863		ntb->dev_type = NTB_DEV_USD;
864	else
865		ntb->dev_type = NTB_DEV_DSD;
866
867	ntb->reg_ofs.ldb	= XEON_PDOORBELL_OFFSET;
868	ntb->reg_ofs.ldb_mask	= XEON_PDBMSK_OFFSET;
869	ntb->reg_ofs.spad_local	= XEON_SPAD_OFFSET;
870	ntb->reg_ofs.bar2_xlat	= XEON_SBAR2XLAT_OFFSET;
871	ntb->reg_ofs.bar4_xlat	= XEON_SBAR4XLAT_OFFSET;
872
873	switch (connection_type) {
874	case NTB_CONN_B2B:
875		ntb->conn_type = NTB_CONN_B2B;
876
877		/*
878		 * reg_ofs.rdb and reg_ofs.spad_remote are effectively ignored
879		 * with the NTB_REGS_THRU_MW errata mode enabled.  (See
880		 * ntb_ring_doorbell() and ntb_read/write_remote_spad().)
881		 */
882		ntb->reg_ofs.rdb	 = XEON_B2B_DOORBELL_OFFSET;
883		ntb->reg_ofs.spad_remote = XEON_B2B_SPAD_OFFSET;
884
885		ntb->limits.max_spads	 = XEON_MAX_SPADS;
886		break;
887
888	case NTB_CONN_RP:
889		/*
890		 * Every Xeon today needs NTB_REGS_THRU_MW, so punt on RP for
891		 * now.
892		 */
893		KASSERT(HAS_FEATURE(NTB_REGS_THRU_MW),
894		    ("Xeon without MW errata unimplemented"));
895		device_printf(ntb->device,
896		    "NTB-RP disabled to due hardware errata.\n");
897		return (ENXIO);
898
899	case NTB_CONN_TRANSPARENT:
900	default:
901		device_printf(ntb->device, "Connection type %d not supported\n",
902		    connection_type);
903		return (ENXIO);
904	}
905
906	/*
907	 * There is a Xeon hardware errata related to writes to SDOORBELL or
908	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
909	 * which may hang the system.  To workaround this use the second memory
910	 * window to access the interrupt and scratch pad registers on the
911	 * remote system.
912	 *
913	 * There is another HW errata on the limit registers -- they can only
914	 * be written when the base register is (?)4GB aligned and < 32-bit.
915	 * This should already be the case based on the driver defaults, but
916	 * write the limit registers first just in case.
917	 */
918	if (HAS_FEATURE(NTB_REGS_THRU_MW))
919		/*
920		 * Set the Limit register to 4k, the minimum size, to prevent
921		 * an illegal access.
922		 */
923		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET,
924		    ntb_get_mw_size(ntb, 1) + 0x1000);
925	else
926		/*
927		 * Disable the limit register, just in case it is set to
928		 * something silly.
929		 */
930		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
931
932
933	ntb->reg_ofs.lnk_cntl	 = XEON_NTBCNTL_OFFSET;
934	ntb->reg_ofs.lnk_stat	 = XEON_LINK_STATUS_OFFSET;
935	ntb->reg_ofs.spci_cmd	 = XEON_PCICMD_OFFSET;
936
937	ntb->limits.max_db_bits	 = XEON_MAX_DB_BITS;
938	ntb->limits.msix_cnt	 = XEON_MSIX_CNT;
939	ntb->bits_per_vector	 = XEON_DB_BITS_PER_VEC;
940
941	/*
942	 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
943	 * mirrored to the remote system.  Shrink the number of bits by one,
944	 * since bit 14 is the last bit.
945	 *
946	 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
947	 * anyway.  Nor for non-B2B connection types.
948	 */
949	if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14) &&
950	    !HAS_FEATURE(NTB_REGS_THRU_MW) &&
951	    connection_type == NTB_CONN_B2B)
952		ntb->limits.max_db_bits = XEON_MAX_DB_BITS - 1;
953
954	configure_xeon_secondary_side_bars(ntb);
955
956	/* Enable Bus Master and Memory Space on the secondary side */
957	if (ntb->conn_type == NTB_CONN_B2B)
958		ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
959		    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
960
961	/* Enable link training */
962	ntb_hw_link_up(ntb);
963
964	return (0);
965}
966
967static int
968ntb_setup_soc(struct ntb_softc *ntb)
969{
970	uint32_t val, connection_type;
971
972	val = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
973
974	connection_type = (val & SOC_PPD_CONN_TYPE) >> 8;
975	switch (connection_type) {
976	case NTB_CONN_B2B:
977		ntb->conn_type = NTB_CONN_B2B;
978		break;
979	default:
980		device_printf(ntb->device,
981		    "Unsupported NTB configuration (%d)\n", connection_type);
982		return (ENXIO);
983	}
984
985	if ((val & SOC_PPD_DEV_TYPE) != 0)
986		ntb->dev_type = NTB_DEV_DSD;
987	else
988		ntb->dev_type = NTB_DEV_USD;
989
990	/* Initiate PCI-E link training */
991	pci_write_config(ntb->device, NTB_PPD_OFFSET, val | SOC_PPD_INIT_LINK,
992	    4);
993
994	ntb->reg_ofs.ldb	 = SOC_PDOORBELL_OFFSET;
995	ntb->reg_ofs.ldb_mask	 = SOC_PDBMSK_OFFSET;
996	ntb->reg_ofs.rdb	 = SOC_B2B_DOORBELL_OFFSET;
997	ntb->reg_ofs.bar2_xlat	 = SOC_SBAR2XLAT_OFFSET;
998	ntb->reg_ofs.bar4_xlat	 = SOC_SBAR4XLAT_OFFSET;
999	ntb->reg_ofs.lnk_cntl	 = SOC_NTBCNTL_OFFSET;
1000	ntb->reg_ofs.lnk_stat	 = SOC_LINK_STATUS_OFFSET;
1001	ntb->reg_ofs.spad_local	 = SOC_SPAD_OFFSET;
1002	ntb->reg_ofs.spad_remote = SOC_B2B_SPAD_OFFSET;
1003	ntb->reg_ofs.spci_cmd	 = SOC_PCICMD_OFFSET;
1004
1005	ntb->limits.max_spads	 = SOC_MAX_SPADS;
1006	ntb->limits.max_db_bits	 = SOC_MAX_DB_BITS;
1007	ntb->limits.msix_cnt	 = SOC_MSIX_CNT;
1008	ntb->bits_per_vector	 = SOC_DB_BITS_PER_VEC;
1009
1010	/*
1011	 * FIXME - MSI-X bug on early SOC HW, remove once internal issue is
1012	 * resolved.  Mask transaction layer internal parity errors.
1013	 */
1014	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1015
1016	configure_soc_secondary_side_bars(ntb);
1017
1018	/* Enable Bus Master and Memory Space on the secondary side */
1019	ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1020	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1021
1022	callout_reset(&ntb->heartbeat_timer, 0, ntb_handle_heartbeat, ntb);
1023
1024	return (0);
1025}
1026
1027static void
1028configure_soc_secondary_side_bars(struct ntb_softc *ntb)
1029{
1030
1031	if (ntb->dev_type == NTB_DEV_USD) {
1032		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1033		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_USD_ADDR);
1034		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_USD_ADDR);
1035		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_USD_ADDR);
1036	} else {
1037		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1038		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_DSD_ADDR);
1039		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_DSD_ADDR);
1040		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_DSD_ADDR);
1041	}
1042}
1043
1044static void
1045configure_xeon_secondary_side_bars(struct ntb_softc *ntb)
1046{
1047
1048	if (ntb->dev_type == NTB_DEV_USD) {
1049		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1050		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1051			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1052			    MBAR01_DSD_ADDR);
1053		else {
1054			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1055			    PBAR4XLAT_USD_ADDR);
1056			/*
1057			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1058			 * written 32 bits at a time.
1059			 */
1060			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1061			    MBAR01_DSD_ADDR & 0xffffffff);
1062			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1063			    MBAR01_DSD_ADDR >> 32);
1064		}
1065		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_USD_ADDR);
1066		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_USD_ADDR);
1067		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_USD_ADDR);
1068	} else {
1069		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1070		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1071			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1072			    MBAR01_USD_ADDR);
1073		else {
1074			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1075			    PBAR4XLAT_DSD_ADDR);
1076			/*
1077			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1078			 * written 32 bits at a time.
1079			 */
1080			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1081			    MBAR01_USD_ADDR & 0xffffffff);
1082			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1083			    MBAR01_USD_ADDR >> 32);
1084		}
1085		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_DSD_ADDR);
1086		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_DSD_ADDR);
1087		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_DSD_ADDR);
1088	}
1089}
1090
1091/* SOC does not have link status interrupt, poll on that platform */
1092static void
1093ntb_handle_heartbeat(void *arg)
1094{
1095	struct ntb_softc *ntb = arg;
1096	uint32_t status32;
1097	int rc;
1098
1099	rc = ntb_check_link_status(ntb);
1100	if (rc != 0)
1101		device_printf(ntb->device,
1102		    "Error determining link status\n");
1103
1104	/* Check to see if a link error is the cause of the link down */
1105	if (ntb->link_status == NTB_LINK_DOWN) {
1106		status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1107		if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) {
1108			callout_reset(&ntb->lr_timer, 0, recover_soc_link,
1109			    ntb);
1110			return;
1111		}
1112	}
1113
1114	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1115	    ntb_handle_heartbeat, ntb);
1116}
1117
1118static void
1119soc_perform_link_restart(struct ntb_softc *ntb)
1120{
1121	uint32_t status;
1122
1123	/* Driver resets the NTB ModPhy lanes - magic! */
1124	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0xe0);
1125	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x40);
1126	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x60);
1127	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0x60);
1128
1129	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1130	pause("ModPhy", hz / 10);
1131
1132	/* Clear AER Errors, write to clear */
1133	status = ntb_reg_read(4, SOC_ERRCORSTS_OFFSET);
1134	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1135	ntb_reg_write(4, SOC_ERRCORSTS_OFFSET, status);
1136
1137	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1138	status = ntb_reg_read(4, SOC_LTSSMERRSTS0_OFFSET);
1139	status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI;
1140	ntb_reg_write(4, SOC_LTSSMERRSTS0_OFFSET, status);
1141
1142	/* Clear DeSkew Buffer error, write to clear */
1143	status = ntb_reg_read(4, SOC_DESKEWSTS_OFFSET);
1144	status |= SOC_DESKEWSTS_DBERR;
1145	ntb_reg_write(4, SOC_DESKEWSTS_OFFSET, status);
1146
1147	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1148	status &= SOC_IBIST_ERR_OFLOW;
1149	ntb_reg_write(4, SOC_IBSTERRRCRVSTS0_OFFSET, status);
1150
1151	/* Releases the NTB state machine to allow the link to retrain */
1152	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1153	status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT;
1154	ntb_reg_write(4, SOC_LTSSMSTATEJMP_OFFSET, status);
1155}
1156
1157static void
1158ntb_handle_link_event(struct ntb_softc *ntb, int link_state)
1159{
1160	enum ntb_hw_event event;
1161	uint16_t status;
1162
1163	if (ntb->link_status == link_state)
1164		return;
1165
1166	if (link_state == NTB_LINK_UP) {
1167		device_printf(ntb->device, "Link Up\n");
1168		ntb->link_status = NTB_LINK_UP;
1169		event = NTB_EVENT_HW_LINK_UP;
1170
1171		if (ntb->type == NTB_SOC ||
1172		    ntb->conn_type == NTB_CONN_TRANSPARENT)
1173			status = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1174		else
1175			status = pci_read_config(ntb->device,
1176			    XEON_LINK_STATUS_OFFSET, 2);
1177		ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
1178		ntb->link_speed = (status & NTB_LINK_SPEED_MASK);
1179		device_printf(ntb->device, "Link Width %d, Link Speed %d\n",
1180		    ntb->link_width, ntb->link_speed);
1181		callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1182		    ntb_handle_heartbeat, ntb);
1183	} else {
1184		device_printf(ntb->device, "Link Down\n");
1185		ntb->link_status = NTB_LINK_DOWN;
1186		event = NTB_EVENT_HW_LINK_DOWN;
1187		/* Do not modify link width/speed, we need it in link recovery */
1188	}
1189
1190	/* notify the upper layer if we have an event change */
1191	if (ntb->event_cb != NULL)
1192		ntb->event_cb(ntb->ntb_transport, event);
1193}
1194
1195static void
1196ntb_hw_link_up(struct ntb_softc *ntb)
1197{
1198	uint32_t cntl;
1199
1200	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1201		ntb_handle_link_event(ntb, NTB_LINK_UP);
1202		return;
1203	}
1204
1205	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1206	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1207	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1208	cntl |= NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP;
1209	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1210}
1211
1212static void
1213ntb_hw_link_down(struct ntb_softc *ntb)
1214{
1215	uint32_t cntl;
1216
1217	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1218		ntb_handle_link_event(ntb, NTB_LINK_DOWN);
1219		return;
1220	}
1221
1222	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1223	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1224	cntl &= ~(NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP);
1225	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1226	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1227}
1228
1229static void
1230recover_soc_link(void *arg)
1231{
1232	struct ntb_softc *ntb = arg;
1233	uint8_t speed, width;
1234	uint32_t status32;
1235	uint16_t status16;
1236
1237	soc_perform_link_restart(ntb);
1238
1239	/*
1240	 * There is a potential race between the 2 NTB devices recovering at
1241	 * the same time.  If the times are the same, the link will not recover
1242	 * and the driver will be stuck in this loop forever.  Add a random
1243	 * interval to the recovery time to prevent this race.
1244	 */
1245	status32 = arc4random() % SOC_LINK_RECOVERY_TIME;
1246	pause("Link", (SOC_LINK_RECOVERY_TIME + status32) * hz / 1000);
1247
1248	status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1249	if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0)
1250		goto retry;
1251
1252	status32 = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1253	if ((status32 & SOC_IBIST_ERR_OFLOW) != 0)
1254		goto retry;
1255
1256	status32 = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1257	if ((status32 & SOC_CNTL_LINK_DOWN) != 0)
1258		goto out;
1259
1260	status16 = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1261	width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
1262	speed = (status16 & NTB_LINK_SPEED_MASK);
1263	if (ntb->link_width != width || ntb->link_speed != speed)
1264		goto retry;
1265
1266out:
1267	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1268	    ntb_handle_heartbeat, ntb);
1269	return;
1270
1271retry:
1272	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link,
1273	    ntb);
1274}
1275
1276static int
1277ntb_check_link_status(struct ntb_softc *ntb)
1278{
1279	int link_state;
1280	uint32_t ntb_cntl;
1281	uint16_t status;
1282
1283	if (ntb->type == NTB_SOC) {
1284		ntb_cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1285		if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0)
1286			link_state = NTB_LINK_DOWN;
1287		else
1288			link_state = NTB_LINK_UP;
1289	} else {
1290		status = pci_read_config(ntb->device, XEON_LINK_STATUS_OFFSET,
1291		    2);
1292
1293		if ((status & NTB_LINK_STATUS_ACTIVE) != 0)
1294			link_state = NTB_LINK_UP;
1295		else
1296			link_state = NTB_LINK_DOWN;
1297	}
1298
1299	ntb_handle_link_event(ntb, link_state);
1300
1301	return (0);
1302}
1303
1304/**
1305 * ntb_register_event_callback() - register event callback
1306 * @ntb: pointer to ntb_softc instance
1307 * @func: callback function to register
1308 *
1309 * This function registers a callback for any HW driver events such as link
1310 * up/down, power management notices and etc.
1311 *
1312 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1313 */
1314int
1315ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func)
1316{
1317
1318	if (ntb->event_cb != NULL)
1319		return (EINVAL);
1320
1321	ntb->event_cb = func;
1322
1323	return (0);
1324}
1325
1326/**
1327 * ntb_unregister_event_callback() - unregisters the event callback
1328 * @ntb: pointer to ntb_softc instance
1329 *
1330 * This function unregisters the existing callback from transport
1331 */
1332void
1333ntb_unregister_event_callback(struct ntb_softc *ntb)
1334{
1335
1336	ntb->event_cb = NULL;
1337}
1338
1339static void
1340ntb_irq_work(void *arg)
1341{
1342	struct ntb_db_cb *db_cb = arg;
1343	struct ntb_softc *ntb;
1344	int rc;
1345
1346	rc = db_cb->callback(db_cb->data, db_cb->db_num);
1347	/* Poll if forward progress was made. */
1348	if (rc != 0) {
1349		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
1350		return;
1351	}
1352
1353	/* Unmask interrupt if no progress was made. */
1354	ntb = db_cb->ntb;
1355	unmask_ldb_interrupt(ntb, db_cb->db_num);
1356}
1357
1358/**
1359 * ntb_register_db_callback() - register a callback for doorbell interrupt
1360 * @ntb: pointer to ntb_softc instance
1361 * @idx: doorbell index to register callback, zero based
1362 * @data: pointer to be returned to caller with every callback
1363 * @func: callback function to register
1364 *
1365 * This function registers a callback function for the doorbell interrupt
1366 * on the primary side. The function will unmask the doorbell as well to
1367 * allow interrupt.
1368 *
1369 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1370 */
1371int
1372ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data,
1373    ntb_db_callback func)
1374{
1375	struct ntb_db_cb *db_cb = &ntb->db_cb[idx];
1376
1377	if (idx >= ntb->allocated_interrupts || db_cb->callback ||
1378	    db_cb->reserved) {
1379		device_printf(ntb->device, "Invalid Index.\n");
1380		return (EINVAL);
1381	}
1382
1383	db_cb->callback = func;
1384	db_cb->data = data;
1385	callout_init(&db_cb->irq_work, 1);
1386
1387	unmask_ldb_interrupt(ntb, idx);
1388
1389	return (0);
1390}
1391
1392/**
1393 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
1394 * @ntb: pointer to ntb_softc instance
1395 * @idx: doorbell index to register callback, zero based
1396 *
1397 * This function unregisters a callback function for the doorbell interrupt
1398 * on the primary side. The function will also mask the said doorbell.
1399 */
1400void
1401ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx)
1402{
1403
1404	if (idx >= ntb->allocated_interrupts || !ntb->db_cb[idx].callback)
1405		return;
1406
1407	mask_ldb_interrupt(ntb, idx);
1408
1409	callout_drain(&ntb->db_cb[idx].irq_work);
1410	ntb->db_cb[idx].callback = NULL;
1411}
1412
1413/**
1414 * ntb_find_transport() - find the transport pointer
1415 * @transport: pointer to pci device
1416 *
1417 * Given the pci device pointer, return the transport pointer passed in when
1418 * the transport attached when it was inited.
1419 *
1420 * RETURNS: pointer to transport.
1421 */
1422void *
1423ntb_find_transport(struct ntb_softc *ntb)
1424{
1425
1426	return (ntb->ntb_transport);
1427}
1428
1429/**
1430 * ntb_register_transport() - Register NTB transport with NTB HW driver
1431 * @transport: transport identifier
1432 *
1433 * This function allows a transport to reserve the hardware driver for
1434 * NTB usage.
1435 *
1436 * RETURNS: pointer to ntb_softc, NULL on error.
1437 */
1438struct ntb_softc *
1439ntb_register_transport(struct ntb_softc *ntb, void *transport)
1440{
1441
1442	/*
1443	 * TODO: when we have more than one transport, we will need to rewrite
1444	 * this to prevent race conditions
1445	 */
1446	if (ntb->ntb_transport != NULL)
1447		return (NULL);
1448
1449	ntb->ntb_transport = transport;
1450	return (ntb);
1451}
1452
1453/**
1454 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
1455 * @ntb - ntb_softc of the transport to be freed
1456 *
1457 * This function unregisters the transport from the HW driver and performs any
1458 * necessary cleanups.
1459 */
1460void
1461ntb_unregister_transport(struct ntb_softc *ntb)
1462{
1463	int i;
1464
1465	if (ntb->ntb_transport == NULL)
1466		return;
1467
1468	for (i = 0; i < ntb->allocated_interrupts; i++)
1469		ntb_unregister_db_callback(ntb, i);
1470
1471	ntb_unregister_event_callback(ntb);
1472	ntb->ntb_transport = NULL;
1473}
1474
1475/**
1476 * ntb_get_max_spads() - get the total scratch regs usable
1477 * @ntb: pointer to ntb_softc instance
1478 *
1479 * This function returns the max 32bit scratchpad registers usable by the
1480 * upper layer.
1481 *
1482 * RETURNS: total number of scratch pad registers available
1483 */
1484uint8_t
1485ntb_get_max_spads(struct ntb_softc *ntb)
1486{
1487
1488	return (ntb->limits.max_spads);
1489}
1490
1491/**
1492 * ntb_write_local_spad() - write to the secondary scratchpad register
1493 * @ntb: pointer to ntb_softc instance
1494 * @idx: index to the scratchpad register, 0 based
1495 * @val: the data value to put into the register
1496 *
1497 * This function allows writing of a 32bit value to the indexed scratchpad
1498 * register. The register resides on the secondary (external) side.
1499 *
1500 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1501 */
1502int
1503ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1504{
1505
1506	if (idx >= ntb->limits.max_spads)
1507		return (EINVAL);
1508
1509	ntb_reg_write(4, ntb->reg_ofs.spad_local + idx * 4, val);
1510
1511	return (0);
1512}
1513
1514/**
1515 * ntb_read_local_spad() - read from the primary scratchpad register
1516 * @ntb: pointer to ntb_softc instance
1517 * @idx: index to scratchpad register, 0 based
1518 * @val: pointer to 32bit integer for storing the register value
1519 *
1520 * This function allows reading of the 32bit scratchpad register on
1521 * the primary (internal) side.
1522 *
1523 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1524 */
1525int
1526ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1527{
1528
1529	if (idx >= ntb->limits.max_spads)
1530		return (EINVAL);
1531
1532	*val = ntb_reg_read(4, ntb->reg_ofs.spad_local + idx * 4);
1533
1534	return (0);
1535}
1536
1537/**
1538 * ntb_write_remote_spad() - write to the secondary scratchpad register
1539 * @ntb: pointer to ntb_softc instance
1540 * @idx: index to the scratchpad register, 0 based
1541 * @val: the data value to put into the register
1542 *
1543 * This function allows writing of a 32bit value to the indexed scratchpad
1544 * register. The register resides on the secondary (external) side.
1545 *
1546 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1547 */
1548int
1549ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1550{
1551
1552	if (idx >= ntb->limits.max_spads)
1553		return (EINVAL);
1554
1555	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1556		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
1557	else
1558		ntb_reg_write(4, ntb->reg_ofs.spad_remote + idx * 4, val);
1559
1560	return (0);
1561}
1562
1563/**
1564 * ntb_read_remote_spad() - read from the primary scratchpad register
1565 * @ntb: pointer to ntb_softc instance
1566 * @idx: index to scratchpad register, 0 based
1567 * @val: pointer to 32bit integer for storing the register value
1568 *
1569 * This function allows reading of the 32bit scratchpad register on
1570 * the primary (internal) side.
1571 *
1572 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1573 */
1574int
1575ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1576{
1577
1578	if (idx >= ntb->limits.max_spads)
1579		return (EINVAL);
1580
1581	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1582		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
1583	else
1584		*val = ntb_reg_read(4, ntb->reg_ofs.spad_remote + idx * 4);
1585
1586	return (0);
1587}
1588
1589/**
1590 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
1591 * @ntb: pointer to ntb_softc instance
1592 * @mw: memory window number
1593 *
1594 * This function provides the base virtual address of the memory window
1595 * specified.
1596 *
1597 * RETURNS: pointer to virtual address, or NULL on error.
1598 */
1599void *
1600ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw)
1601{
1602
1603	if (mw >= NTB_NUM_MW)
1604		return (NULL);
1605
1606	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].vbase);
1607}
1608
1609vm_paddr_t
1610ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw)
1611{
1612
1613	if (mw >= NTB_NUM_MW)
1614		return (0);
1615
1616	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].pbase);
1617}
1618
1619/**
1620 * ntb_get_mw_size() - return size of NTB memory window
1621 * @ntb: pointer to ntb_softc instance
1622 * @mw: memory window number
1623 *
1624 * This function provides the physical size of the memory window specified
1625 *
1626 * RETURNS: the size of the memory window or zero on error
1627 */
1628u_long
1629ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw)
1630{
1631
1632	if (mw >= NTB_NUM_MW)
1633		return (0);
1634
1635	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].size);
1636}
1637
1638/**
1639 * ntb_set_mw_addr - set the memory window address
1640 * @ntb: pointer to ntb_softc instance
1641 * @mw: memory window number
1642 * @addr: base address for data
1643 *
1644 * This function sets the base physical address of the memory window.  This
1645 * memory address is where data from the remote system will be transfered into
1646 * or out of depending on how the transport is configured.
1647 */
1648void
1649ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr)
1650{
1651
1652	if (mw >= NTB_NUM_MW)
1653		return;
1654
1655	switch (NTB_MW_TO_BAR(mw)) {
1656	case NTB_B2B_BAR_1:
1657		ntb_reg_write(8, ntb->reg_ofs.bar2_xlat, addr);
1658		break;
1659	case NTB_B2B_BAR_2:
1660		ntb_reg_write(8, ntb->reg_ofs.bar4_xlat, addr);
1661		break;
1662	}
1663}
1664
1665/**
1666 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
1667 * @ntb: pointer to ntb_softc instance
1668 * @db: doorbell to ring
1669 *
1670 * This function allows triggering of a doorbell on the secondary/external
1671 * side that will initiate an interrupt on the remote host
1672 *
1673 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1674 */
1675void
1676ntb_ring_doorbell(struct ntb_softc *ntb, unsigned int db)
1677{
1678
1679	if (ntb->type == NTB_SOC)
1680		ntb_reg_write(8, ntb->reg_ofs.rdb, (uint64_t) 1 << db);
1681	else {
1682		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1683			ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET,
1684			    ((1 << ntb->bits_per_vector) - 1) <<
1685			    (db * ntb->bits_per_vector));
1686		else
1687			ntb_reg_write(2, ntb->reg_ofs.rdb,
1688			    ((1 << ntb->bits_per_vector) - 1) <<
1689			    (db * ntb->bits_per_vector));
1690	}
1691}
1692
1693/**
1694 * ntb_query_link_status() - return the hardware link status
1695 * @ndev: pointer to ntb_device instance
1696 *
1697 * Returns true if the hardware is connected to the remote system
1698 *
1699 * RETURNS: true or false based on the hardware link state
1700 */
1701bool
1702ntb_query_link_status(struct ntb_softc *ntb)
1703{
1704
1705	return (ntb->link_status == NTB_LINK_UP);
1706}
1707
1708static void
1709save_bar_parameters(struct ntb_pci_bar_info *bar)
1710{
1711
1712	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
1713	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
1714	bar->pbase = rman_get_start(bar->pci_resource);
1715	bar->size = rman_get_size(bar->pci_resource);
1716	bar->vbase = rman_get_virtual(bar->pci_resource);
1717}
1718
1719device_t
1720ntb_get_device(struct ntb_softc *ntb)
1721{
1722
1723	return (ntb->device);
1724}
1725
1726/* Export HW-specific errata information. */
1727bool
1728ntb_has_feature(struct ntb_softc *ntb, uint64_t feature)
1729{
1730
1731	return (HAS_FEATURE(feature));
1732}
1733