ntb_hw_intel.c revision 289348
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289348 2015-10-14 23:48:16Z cem $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/systm.h>
33#include <sys/bus.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/queue.h>
37#include <sys/rman.h>
38#include <sys/sysctl.h>
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <machine/bus.h>
42#include <machine/pmap.h>
43#include <machine/resource.h>
44#include <dev/pci/pcireg.h>
45#include <dev/pci/pcivar.h>
46
47#include "ntb_regs.h"
48#include "ntb_hw.h"
49
50/*
51 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
52 * allows you to connect two systems using a PCI-e link.
53 *
54 * This module contains the hardware abstraction layer for the NTB. It allows
55 * you to send and recieve interrupts, map the memory windows and send and
56 * receive messages in the scratch-pad registers.
57 *
58 * NOTE: Much of the code in this module is shared with Linux. Any patches may
59 * be picked up and redistributed in Linux with a dual GPL/BSD license.
60 */
61
62#define NTB_CONFIG_BAR	0
63#define NTB_B2B_BAR_1	1
64#define NTB_B2B_BAR_2	2
65#define NTB_MAX_BARS	3
66#define NTB_MW_TO_BAR(mw) ((mw) + 1)
67
68#define MAX_MSIX_INTERRUPTS MAX(XEON_MAX_DB_BITS, SOC_MAX_DB_BITS)
69
70#define NTB_HB_TIMEOUT	1 /* second */
71#define SOC_LINK_RECOVERY_TIME	500
72
73#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
74
75enum ntb_device_type {
76	NTB_XEON,
77	NTB_SOC
78};
79
80/* Device features and workarounds */
81#define HAS_FEATURE(feature)	\
82	((ntb->features & (feature)) != 0)
83
84struct ntb_hw_info {
85	uint32_t		device_id;
86	const char		*desc;
87	enum ntb_device_type	type;
88	uint64_t		features;
89};
90
91struct ntb_pci_bar_info {
92	bus_space_tag_t		pci_bus_tag;
93	bus_space_handle_t	pci_bus_handle;
94	int			pci_resource_id;
95	struct resource		*pci_resource;
96	vm_paddr_t		pbase;
97	void			*vbase;
98	u_long			size;
99};
100
101struct ntb_int_info {
102	struct resource	*res;
103	int		rid;
104	void		*tag;
105};
106
107struct ntb_db_cb {
108	ntb_db_callback		callback;
109	unsigned int		db_num;
110	void			*data;
111	struct ntb_softc	*ntb;
112	struct callout		irq_work;
113	bool			reserved;
114};
115
116struct ntb_softc {
117	device_t		device;
118	enum ntb_device_type	type;
119	uint64_t		features;
120
121	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
122	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
123	uint32_t		allocated_interrupts;
124
125	struct callout		heartbeat_timer;
126	struct callout		lr_timer;
127
128	void			*ntb_transport;
129	ntb_event_callback	event_cb;
130	struct ntb_db_cb 	*db_cb;
131
132	struct {
133		uint8_t max_spads;
134		uint8_t max_db_bits;
135		uint8_t msix_cnt;
136	} limits;
137	struct {
138		uint32_t ldb;
139		uint32_t ldb_mask;
140		uint32_t rdb;
141		uint32_t bar2_xlat;
142		uint32_t bar4_xlat;
143		uint32_t spad_remote;
144		uint32_t spad_local;
145		uint32_t lnk_cntl;
146		uint32_t lnk_stat;
147		uint32_t spci_cmd;
148	} reg_ofs;
149	uint32_t ppd;
150	uint8_t conn_type;
151	uint8_t dev_type;
152	uint8_t bits_per_vector;
153	uint8_t link_status;
154	uint8_t link_width;
155	uint8_t link_speed;
156};
157
158#ifdef __i386__
159static __inline uint64_t
160bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
161    bus_size_t offset)
162{
163
164	return (bus_space_read_4(tag, handle, offset) |
165	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
166}
167
168static __inline void
169bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
170    bus_size_t offset, uint64_t val)
171{
172
173	bus_space_write_4(tag, handle, offset, val);
174	bus_space_write_4(tag, handle, offset + 4, val >> 32);
175}
176#endif
177
178#define ntb_bar_read(SIZE, bar, offset) \
179	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
180	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
181#define ntb_bar_write(SIZE, bar, offset, val) \
182	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
183	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
184#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
185#define ntb_reg_write(SIZE, offset, val) \
186	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
187#define ntb_mw_read(SIZE, offset) ntb_bar_read(SIZE, NTB_B2B_BAR_2, offset)
188#define ntb_mw_write(SIZE, offset, val) \
189	    ntb_bar_write(SIZE, NTB_B2B_BAR_2, offset, val)
190
191typedef int (*bar_map_strategy)(struct ntb_softc *ntb,
192    struct ntb_pci_bar_info *bar);
193
194static int ntb_probe(device_t device);
195static int ntb_attach(device_t device);
196static int ntb_detach(device_t device);
197static int ntb_map_pci_bars(struct ntb_softc *ntb);
198static int map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
199    struct ntb_pci_bar_info *bar);
200static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
201static int map_memory_window_bar(struct ntb_softc *ntb,
202    struct ntb_pci_bar_info *bar);
203static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
204static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
205static int ntb_setup_interrupts(struct ntb_softc *ntb);
206static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
207static int ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors);
208static int ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors);
209static void ntb_teardown_interrupts(struct ntb_softc *ntb);
210static void handle_soc_irq(void *arg);
211static void handle_xeon_irq(void *arg);
212static void handle_xeon_event_irq(void *arg);
213static void ntb_handle_legacy_interrupt(void *arg);
214static void ntb_irq_work(void *arg);
215static uint64_t db_ioread(struct ntb_softc *, uint32_t regoff);
216static void db_iowrite(struct ntb_softc *, uint32_t regoff, uint64_t val);
217static void mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
218static void unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
219static int ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors);
220static void ntb_free_callbacks(struct ntb_softc *ntb);
221static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
222static int ntb_detect_xeon(struct ntb_softc *ntb);
223static int ntb_detect_soc(struct ntb_softc *ntb);
224static int ntb_setup_xeon(struct ntb_softc *ntb);
225static int ntb_setup_soc(struct ntb_softc *ntb);
226static void ntb_teardown_xeon(struct ntb_softc *ntb);
227static void configure_soc_secondary_side_bars(struct ntb_softc *ntb);
228static void configure_xeon_secondary_side_bars(struct ntb_softc *ntb);
229static void ntb_handle_heartbeat(void *arg);
230static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state);
231static void ntb_hw_link_down(struct ntb_softc *ntb);
232static void ntb_hw_link_up(struct ntb_softc *ntb);
233static void recover_soc_link(void *arg);
234static int ntb_check_link_status(struct ntb_softc *ntb);
235static void save_bar_parameters(struct ntb_pci_bar_info *bar);
236
237static struct ntb_hw_info pci_ids[] = {
238	{ 0x0C4E8086, "Atom Processor S1200 NTB Primary B2B", NTB_SOC, 0 },
239
240	/* XXX: PS/SS IDs left out until they are supported. */
241	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
242		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
243	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
244		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
245	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
246		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
247		    | NTB_BAR_SIZE_4K },
248	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
249		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
250	},
251	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
252		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
253	},
254
255	{ 0x00000000, NULL, NTB_SOC, 0 }
256};
257
258/*
259 * OS <-> Driver interface structures
260 */
261MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
262
263static device_method_t ntb_pci_methods[] = {
264	/* Device interface */
265	DEVMETHOD(device_probe,     ntb_probe),
266	DEVMETHOD(device_attach,    ntb_attach),
267	DEVMETHOD(device_detach,    ntb_detach),
268	DEVMETHOD_END
269};
270
271static driver_t ntb_pci_driver = {
272	"ntb_hw",
273	ntb_pci_methods,
274	sizeof(struct ntb_softc),
275};
276
277static devclass_t ntb_devclass;
278DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
279MODULE_VERSION(ntb_hw, 1);
280
281SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
282
283/*
284 * OS <-> Driver linkage functions
285 */
286static int
287ntb_probe(device_t device)
288{
289	struct ntb_hw_info *p;
290
291	p = ntb_get_device_info(pci_get_devid(device));
292	if (p == NULL)
293		return (ENXIO);
294
295	device_set_desc(device, p->desc);
296	return (0);
297}
298
299static int
300ntb_attach(device_t device)
301{
302	struct ntb_softc *ntb;
303	struct ntb_hw_info *p;
304	int error;
305
306	ntb = DEVICE2SOFTC(device);
307	p = ntb_get_device_info(pci_get_devid(device));
308
309	ntb->device = device;
310	ntb->type = p->type;
311	ntb->features = p->features;
312
313	/* Heartbeat timer for NTB_SOC since there is no link interrupt */
314	callout_init(&ntb->heartbeat_timer, 1);
315	callout_init(&ntb->lr_timer, 1);
316
317	if (ntb->type == NTB_SOC)
318		error = ntb_detect_soc(ntb);
319	else
320		error = ntb_detect_xeon(ntb);
321	if (error)
322		goto out;
323
324	error = ntb_map_pci_bars(ntb);
325	if (error)
326		goto out;
327	if (ntb->type == NTB_SOC)
328		error = ntb_setup_soc(ntb);
329	else
330		error = ntb_setup_xeon(ntb);
331	if (error)
332		goto out;
333	error = ntb_setup_interrupts(ntb);
334	if (error)
335		goto out;
336
337	pci_enable_busmaster(ntb->device);
338
339out:
340	if (error != 0)
341		ntb_detach(device);
342	return (error);
343}
344
345static int
346ntb_detach(device_t device)
347{
348	struct ntb_softc *ntb;
349
350	ntb = DEVICE2SOFTC(device);
351	callout_drain(&ntb->heartbeat_timer);
352	callout_drain(&ntb->lr_timer);
353	if (ntb->type == NTB_XEON)
354		ntb_teardown_xeon(ntb);
355	ntb_teardown_interrupts(ntb);
356	ntb_unmap_pci_bar(ntb);
357
358	return (0);
359}
360
361static int
362ntb_map_pci_bars(struct ntb_softc *ntb)
363{
364	int rc;
365
366	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
367	rc = map_pci_bar(ntb, map_mmr_bar, &ntb->bar_info[NTB_CONFIG_BAR]);
368	if (rc != 0)
369		return (rc);
370
371	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
372	rc = map_pci_bar(ntb, map_memory_window_bar,
373	    &ntb->bar_info[NTB_B2B_BAR_1]);
374	if (rc != 0)
375		return (rc);
376
377	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
378	if (HAS_FEATURE(NTB_REGS_THRU_MW))
379		rc = map_pci_bar(ntb, map_mmr_bar,
380		    &ntb->bar_info[NTB_B2B_BAR_2]);
381	else
382		rc = map_pci_bar(ntb, map_memory_window_bar,
383		    &ntb->bar_info[NTB_B2B_BAR_2]);
384	return (rc);
385}
386
387static int
388map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
389    struct ntb_pci_bar_info *bar)
390{
391	int rc;
392
393	rc = strategy(ntb, bar);
394	if (rc != 0)
395		device_printf(ntb->device,
396		    "unable to allocate pci resource\n");
397	else
398		device_printf(ntb->device,
399		    "Bar size = %lx, v %p, p %p\n",
400		    bar->size, bar->vbase, (void *)(bar->pbase));
401	return (rc);
402}
403
404static int
405map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
406{
407
408	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
409	    &bar->pci_resource_id, RF_ACTIVE);
410	if (bar->pci_resource == NULL)
411		return (ENXIO);
412
413	save_bar_parameters(bar);
414	return (0);
415}
416
417static int
418map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
419{
420	int rc;
421	uint8_t bar_size_bits = 0;
422
423	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
424	    &bar->pci_resource_id, RF_ACTIVE);
425
426	if (bar->pci_resource == NULL)
427		return (ENXIO);
428
429	save_bar_parameters(bar);
430	/*
431	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
432	 * hardware issue. To work around this, query the size it should be
433	 * configured to by the device and modify the resource to correspond to
434	 * this new size. The BIOS on systems with this problem is required to
435	 * provide enough address space to allow the driver to make this change
436	 * safely.
437	 *
438	 * Ideally I could have just specified the size when I allocated the
439	 * resource like:
440	 *  bus_alloc_resource(ntb->device,
441	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
442	 *	1ul << bar_size_bits, RF_ACTIVE);
443	 * but the PCI driver does not honor the size in this call, so we have
444	 * to modify it after the fact.
445	 */
446	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
447		if (bar->pci_resource_id == PCIR_BAR(2))
448			bar_size_bits = pci_read_config(ntb->device,
449			    XEON_PBAR23SZ_OFFSET, 1);
450		else
451			bar_size_bits = pci_read_config(ntb->device,
452			    XEON_PBAR45SZ_OFFSET, 1);
453
454		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
455		    bar->pci_resource, bar->pbase,
456		    bar->pbase + (1ul << bar_size_bits) - 1);
457		if (rc != 0) {
458			device_printf(ntb->device,
459			    "unable to resize bar\n");
460			return (rc);
461		}
462
463		save_bar_parameters(bar);
464	}
465
466	/* Mark bar region as write combining to improve performance. */
467	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
468	    VM_MEMATTR_WRITE_COMBINING);
469	if (rc != 0) {
470		device_printf(ntb->device,
471		    "unable to mark bar as WRITE_COMBINING\n");
472		return (rc);
473	}
474	return (0);
475}
476
477static void
478ntb_unmap_pci_bar(struct ntb_softc *ntb)
479{
480	struct ntb_pci_bar_info *current_bar;
481	int i;
482
483	for (i = 0; i< NTB_MAX_BARS; i++) {
484		current_bar = &ntb->bar_info[i];
485		if (current_bar->pci_resource != NULL)
486			bus_release_resource(ntb->device, SYS_RES_MEMORY,
487			    current_bar->pci_resource_id,
488			    current_bar->pci_resource);
489	}
490}
491
492static int
493ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors)
494{
495	void (*interrupt_handler)(void *);
496	void *int_arg;
497	uint32_t i;
498	int rc;
499
500	if (num_vectors < 4)
501		return (ENOSPC);
502
503	for (i = 0; i < num_vectors; i++) {
504		ntb->int_info[i].rid = i + 1;
505		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
506		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
507		if (ntb->int_info[i].res == NULL) {
508			device_printf(ntb->device,
509			    "bus_alloc_resource failed\n");
510			return (ENOMEM);
511		}
512		ntb->int_info[i].tag = NULL;
513		ntb->allocated_interrupts++;
514		if (i == num_vectors - 1) {
515			interrupt_handler = handle_xeon_event_irq;
516			int_arg = ntb;
517		} else {
518			interrupt_handler = handle_xeon_irq;
519			int_arg = &ntb->db_cb[i];
520		}
521		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
522		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, interrupt_handler,
523		    int_arg, &ntb->int_info[i].tag);
524		if (rc != 0) {
525			device_printf(ntb->device,
526			    "bus_setup_intr failed\n");
527			return (ENXIO);
528		}
529	}
530
531	/*
532	 * Prevent consumers from registering callbacks on the link event irq
533	 * slot, from which they will never be called back.
534	 */
535	ntb->db_cb[num_vectors - 1].reserved = true;
536	return (0);
537}
538
539static int
540ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors)
541{
542	uint32_t i;
543	int rc;
544
545	for (i = 0; i < num_vectors; i++) {
546		ntb->int_info[i].rid = i + 1;
547		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
548		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
549		if (ntb->int_info[i].res == NULL) {
550			device_printf(ntb->device,
551			    "bus_alloc_resource failed\n");
552			return (ENOMEM);
553		}
554		ntb->int_info[i].tag = NULL;
555		ntb->allocated_interrupts++;
556		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
557		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, handle_soc_irq,
558		    &ntb->db_cb[i], &ntb->int_info[i].tag);
559		if (rc != 0) {
560			device_printf(ntb->device, "bus_setup_intr failed\n");
561			return (ENXIO);
562		}
563	}
564	return (0);
565}
566
567/*
568 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
569 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
570 * should be okay.  This tunable should enable us to test that hypothesis
571 * when someone gets their hands on some Xeon hardware.
572 */
573static int ntb_force_remap_mode;
574SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
575    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
576    " to a smaller number of ithreads, even if the desired number are "
577    "available");
578
579/*
580 * In case it is NOT ok, give consumers an abort button.
581 */
582static int ntb_prefer_intx;
583SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
584    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
585    "than remapping MSI-X messages over available slots (match Linux driver "
586    "behavior)");
587
588/*
589 * Remap the desired number of MSI-X messages to available ithreads in a simple
590 * round-robin fashion.
591 */
592static int
593ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
594{
595	u_int *vectors;
596	uint32_t i;
597	int rc;
598
599	if (ntb_prefer_intx != 0)
600		return (ENXIO);
601
602	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
603
604	for (i = 0; i < desired; i++)
605		vectors[i] = (i % avail) + 1;
606
607	rc = pci_remap_msix(dev, desired, vectors);
608	free(vectors, M_NTB);
609	return (rc);
610}
611
612static int
613ntb_setup_interrupts(struct ntb_softc *ntb)
614{
615	uint32_t desired_vectors, num_vectors;
616	uint64_t mask;
617	int rc;
618
619	ntb->allocated_interrupts = 0;
620
621	/*
622	 * On SOC, disable all interrupts.  On XEON, disable all but Link
623	 * Interrupt.  The rest will be unmasked as callbacks are registered.
624	 */
625	mask = 0;
626	if (ntb->type == NTB_XEON)
627		mask = (1 << XEON_LINK_DB);
628	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, ~mask);
629
630	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
631	    ntb->limits.max_db_bits);
632	if (desired_vectors >= 1) {
633		rc = pci_alloc_msix(ntb->device, &num_vectors);
634
635		if (ntb_force_remap_mode != 0 && rc == 0 &&
636		    num_vectors == desired_vectors)
637			num_vectors--;
638
639		if (rc == 0 && num_vectors < desired_vectors) {
640			rc = ntb_remap_msix(ntb->device, desired_vectors,
641			    num_vectors);
642			if (rc == 0)
643				num_vectors = desired_vectors;
644			else
645				pci_release_msi(ntb->device);
646		}
647		if (rc != 0)
648			num_vectors = 1;
649	} else
650		num_vectors = 1;
651
652	ntb_create_callbacks(ntb, num_vectors);
653
654	if (ntb->type == NTB_XEON)
655		rc = ntb_setup_xeon_msix(ntb, num_vectors);
656	else
657		rc = ntb_setup_soc_msix(ntb, num_vectors);
658	if (rc != 0)
659		device_printf(ntb->device,
660		    "Error allocating MSI-X interrupts: %d\n", rc);
661
662	if (ntb->type == NTB_XEON && rc == ENOSPC)
663		rc = ntb_setup_legacy_interrupt(ntb);
664
665	return (rc);
666}
667
668static int
669ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
670{
671	int rc;
672
673	ntb->int_info[0].rid = 0;
674	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
675	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
676	if (ntb->int_info[0].res == NULL) {
677		device_printf(ntb->device, "bus_alloc_resource failed\n");
678		return (ENOMEM);
679	}
680
681	ntb->int_info[0].tag = NULL;
682	ntb->allocated_interrupts = 1;
683
684	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
685	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ntb_handle_legacy_interrupt,
686	    ntb, &ntb->int_info[0].tag);
687	if (rc != 0) {
688		device_printf(ntb->device, "bus_setup_intr failed\n");
689		return (ENXIO);
690	}
691
692	return (0);
693}
694
695static void
696ntb_teardown_interrupts(struct ntb_softc *ntb)
697{
698	struct ntb_int_info *current_int;
699	int i;
700
701	for (i = 0; i < ntb->allocated_interrupts; i++) {
702		current_int = &ntb->int_info[i];
703		if (current_int->tag != NULL)
704			bus_teardown_intr(ntb->device, current_int->res,
705			    current_int->tag);
706
707		if (current_int->res != NULL)
708			bus_release_resource(ntb->device, SYS_RES_IRQ,
709			    rman_get_rid(current_int->res), current_int->res);
710	}
711
712	ntb_free_callbacks(ntb);
713	pci_release_msi(ntb->device);
714}
715
716/*
717 * Doorbell register and mask are 64-bit on SoC, 16-bit on Xeon.  Abstract it
718 * out to make code clearer.
719 */
720static uint64_t
721db_ioread(struct ntb_softc *ntb, uint32_t regoff)
722{
723
724	if (ntb->type == NTB_SOC)
725		return (ntb_reg_read(8, regoff));
726
727	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
728
729	return (ntb_reg_read(2, regoff));
730}
731
732static void
733db_iowrite(struct ntb_softc *ntb, uint32_t regoff, uint64_t val)
734{
735
736	if (ntb->type == NTB_SOC) {
737		ntb_reg_write(8, regoff, val);
738		return;
739	}
740
741	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
742	ntb_reg_write(2, regoff, (uint16_t)val);
743}
744
745static void
746mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
747{
748	uint64_t mask;
749
750	mask = db_ioread(ntb, ntb->reg_ofs.ldb_mask);
751	mask |= 1 << (idx * ntb->bits_per_vector);
752	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, mask);
753}
754
755static void
756unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
757{
758	uint64_t mask;
759
760	mask = db_ioread(ntb, ntb->reg_ofs.ldb_mask);
761	mask &= ~(1 << (idx * ntb->bits_per_vector));
762	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, mask);
763}
764
765static void
766handle_soc_irq(void *arg)
767{
768	struct ntb_db_cb *db_cb = arg;
769	struct ntb_softc *ntb = db_cb->ntb;
770
771	db_iowrite(ntb, ntb->reg_ofs.ldb, (uint64_t) 1 << db_cb->db_num);
772
773	if (db_cb->callback != NULL) {
774		mask_ldb_interrupt(ntb, db_cb->db_num);
775		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
776	}
777}
778
779static void
780handle_xeon_irq(void *arg)
781{
782	struct ntb_db_cb *db_cb = arg;
783	struct ntb_softc *ntb = db_cb->ntb;
784
785	/*
786	 * On Xeon, there are 16 bits in the interrupt register
787	 * but only 4 vectors.  So, 5 bits are assigned to the first 3
788	 * vectors, with the 4th having a single bit for link
789	 * interrupts.
790	 */
791	db_iowrite(ntb, ntb->reg_ofs.ldb,
792	    ((1 << ntb->bits_per_vector) - 1) <<
793	    (db_cb->db_num * ntb->bits_per_vector));
794
795	if (db_cb->callback != NULL) {
796		mask_ldb_interrupt(ntb, db_cb->db_num);
797		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
798	}
799}
800
801/* Since we do not have a HW doorbell in SOC, this is only used in JF/JT */
802static void
803handle_xeon_event_irq(void *arg)
804{
805	struct ntb_softc *ntb = arg;
806	int rc;
807
808	rc = ntb_check_link_status(ntb);
809	if (rc != 0)
810		device_printf(ntb->device, "Error determining link status\n");
811
812	/* bit 15 is always the link bit */
813	db_iowrite(ntb, ntb->reg_ofs.ldb, 1 << XEON_LINK_DB);
814}
815
816static void
817ntb_handle_legacy_interrupt(void *arg)
818{
819	struct ntb_softc *ntb = arg;
820	unsigned int i;
821	uint64_t ldb;
822
823	ldb = db_ioread(ntb, ntb->reg_ofs.ldb);
824
825	if (ntb->type == NTB_XEON && (ldb & XEON_DB_HW_LINK) != 0) {
826		handle_xeon_event_irq(ntb);
827		ldb &= ~XEON_DB_HW_LINK;
828	}
829
830	while (ldb != 0) {
831		i = ffs(ldb);
832		ldb &= ldb - 1;
833		if (ntb->type == NTB_SOC)
834			handle_soc_irq(&ntb->db_cb[i]);
835		else
836			handle_xeon_irq(&ntb->db_cb[i]);
837	}
838}
839
840static int
841ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors)
842{
843	uint32_t i;
844
845	ntb->db_cb = malloc(num_vectors * sizeof(*ntb->db_cb), M_NTB,
846	    M_ZERO | M_WAITOK);
847	for (i = 0; i < num_vectors; i++) {
848		ntb->db_cb[i].db_num = i;
849		ntb->db_cb[i].ntb = ntb;
850	}
851
852	return (0);
853}
854
855static void
856ntb_free_callbacks(struct ntb_softc *ntb)
857{
858	uint8_t i;
859
860	for (i = 0; i < ntb->limits.max_db_bits; i++)
861		ntb_unregister_db_callback(ntb, i);
862
863	free(ntb->db_cb, M_NTB);
864}
865
866static struct ntb_hw_info *
867ntb_get_device_info(uint32_t device_id)
868{
869	struct ntb_hw_info *ep = pci_ids;
870
871	while (ep->device_id) {
872		if (ep->device_id == device_id)
873			return (ep);
874		++ep;
875	}
876	return (NULL);
877}
878
879static void
880ntb_teardown_xeon(struct ntb_softc *ntb)
881{
882
883	ntb_hw_link_down(ntb);
884}
885
886static int
887ntb_detect_xeon(struct ntb_softc *ntb)
888{
889	uint8_t ppd, conn_type;
890
891	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
892	ntb->ppd = ppd;
893
894	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
895		ntb->dev_type = NTB_DEV_USD;
896	else
897		ntb->dev_type = NTB_DEV_DSD;
898
899	conn_type = ppd & XEON_PPD_CONN_TYPE;
900	switch (conn_type) {
901	case NTB_CONN_B2B:
902		ntb->conn_type = conn_type;
903		break;
904	case NTB_CONN_RP:
905	case NTB_CONN_TRANSPARENT:
906	default:
907		device_printf(ntb->device, "Unsupported connection type: %u\n",
908		    (unsigned)conn_type);
909		return (ENXIO);
910	}
911	return (0);
912}
913
914static int
915ntb_detect_soc(struct ntb_softc *ntb)
916{
917	uint32_t ppd, conn_type;
918
919	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
920	ntb->ppd = ppd;
921
922	if ((ppd & SOC_PPD_DEV_TYPE) != 0)
923		ntb->dev_type = NTB_DEV_DSD;
924	else
925		ntb->dev_type = NTB_DEV_USD;
926
927	conn_type = (ppd & SOC_PPD_CONN_TYPE) >> 8;
928	switch (conn_type) {
929	case NTB_CONN_B2B:
930		ntb->conn_type = conn_type;
931		break;
932	default:
933		device_printf(ntb->device, "Unsupported NTB configuration\n");
934		return (ENXIO);
935	}
936	return (0);
937}
938
939static int
940ntb_setup_xeon(struct ntb_softc *ntb)
941{
942
943	ntb->reg_ofs.ldb	= XEON_PDOORBELL_OFFSET;
944	ntb->reg_ofs.ldb_mask	= XEON_PDBMSK_OFFSET;
945	ntb->reg_ofs.spad_local	= XEON_SPAD_OFFSET;
946	ntb->reg_ofs.bar2_xlat	= XEON_SBAR2XLAT_OFFSET;
947	ntb->reg_ofs.bar4_xlat	= XEON_SBAR4XLAT_OFFSET;
948
949	switch (ntb->conn_type) {
950	case NTB_CONN_B2B:
951		/*
952		 * reg_ofs.rdb and reg_ofs.spad_remote are effectively ignored
953		 * with the NTB_REGS_THRU_MW errata mode enabled.  (See
954		 * ntb_ring_doorbell() and ntb_read/write_remote_spad().)
955		 */
956		ntb->reg_ofs.rdb	 = XEON_B2B_DOORBELL_OFFSET;
957		ntb->reg_ofs.spad_remote = XEON_B2B_SPAD_OFFSET;
958
959		ntb->limits.max_spads	 = XEON_MAX_SPADS;
960		break;
961
962	case NTB_CONN_RP:
963		/*
964		 * Every Xeon today needs NTB_REGS_THRU_MW, so punt on RP for
965		 * now.
966		 */
967		KASSERT(HAS_FEATURE(NTB_REGS_THRU_MW),
968		    ("Xeon without MW errata unimplemented"));
969		device_printf(ntb->device,
970		    "NTB-RP disabled to due hardware errata.\n");
971		return (ENXIO);
972
973	case NTB_CONN_TRANSPARENT:
974	default:
975		device_printf(ntb->device, "Connection type %d not supported\n",
976		    ntb->conn_type);
977		return (ENXIO);
978	}
979
980	/*
981	 * There is a Xeon hardware errata related to writes to SDOORBELL or
982	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
983	 * which may hang the system.  To workaround this use the second memory
984	 * window to access the interrupt and scratch pad registers on the
985	 * remote system.
986	 *
987	 * There is another HW errata on the limit registers -- they can only
988	 * be written when the base register is (?)4GB aligned and < 32-bit.
989	 * This should already be the case based on the driver defaults, but
990	 * write the limit registers first just in case.
991	 */
992	if (HAS_FEATURE(NTB_REGS_THRU_MW))
993		/*
994		 * Set the Limit register to 4k, the minimum size, to prevent
995		 * an illegal access.
996		 */
997		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET,
998		    ntb_get_mw_size(ntb, 1) + 0x1000);
999	else
1000		/*
1001		 * Disable the limit register, just in case it is set to
1002		 * something silly.
1003		 */
1004		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1005
1006
1007	ntb->reg_ofs.lnk_cntl	 = XEON_NTBCNTL_OFFSET;
1008	ntb->reg_ofs.lnk_stat	 = XEON_LINK_STATUS_OFFSET;
1009	ntb->reg_ofs.spci_cmd	 = XEON_PCICMD_OFFSET;
1010
1011	ntb->limits.max_db_bits	 = XEON_MAX_DB_BITS;
1012	ntb->limits.msix_cnt	 = XEON_MSIX_CNT;
1013	ntb->bits_per_vector	 = XEON_DB_BITS_PER_VEC;
1014
1015	/*
1016	 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1017	 * mirrored to the remote system.  Shrink the number of bits by one,
1018	 * since bit 14 is the last bit.
1019	 *
1020	 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1021	 * anyway.  Nor for non-B2B connection types.
1022	 */
1023	if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14) &&
1024	    !HAS_FEATURE(NTB_REGS_THRU_MW) &&
1025	    ntb->conn_type == NTB_CONN_B2B)
1026		ntb->limits.max_db_bits = XEON_MAX_DB_BITS - 1;
1027
1028	configure_xeon_secondary_side_bars(ntb);
1029
1030	/* Enable Bus Master and Memory Space on the secondary side */
1031	if (ntb->conn_type == NTB_CONN_B2B)
1032		ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1033		    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1034
1035	/* Enable link training */
1036	ntb_hw_link_up(ntb);
1037
1038	return (0);
1039}
1040
1041static int
1042ntb_setup_soc(struct ntb_softc *ntb)
1043{
1044
1045	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1046	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1047
1048	/* Initiate PCI-E link training */
1049	pci_write_config(ntb->device, NTB_PPD_OFFSET,
1050	    ntb->ppd | SOC_PPD_INIT_LINK, 4);
1051
1052	ntb->reg_ofs.ldb	 = SOC_PDOORBELL_OFFSET;
1053	ntb->reg_ofs.ldb_mask	 = SOC_PDBMSK_OFFSET;
1054	ntb->reg_ofs.rdb	 = SOC_B2B_DOORBELL_OFFSET;
1055	ntb->reg_ofs.bar2_xlat	 = SOC_SBAR2XLAT_OFFSET;
1056	ntb->reg_ofs.bar4_xlat	 = SOC_SBAR4XLAT_OFFSET;
1057	ntb->reg_ofs.lnk_cntl	 = SOC_NTBCNTL_OFFSET;
1058	ntb->reg_ofs.lnk_stat	 = SOC_LINK_STATUS_OFFSET;
1059	ntb->reg_ofs.spad_local	 = SOC_SPAD_OFFSET;
1060	ntb->reg_ofs.spad_remote = SOC_B2B_SPAD_OFFSET;
1061	ntb->reg_ofs.spci_cmd	 = SOC_PCICMD_OFFSET;
1062
1063	ntb->limits.max_spads	 = SOC_MAX_SPADS;
1064	ntb->limits.max_db_bits	 = SOC_MAX_DB_BITS;
1065	ntb->limits.msix_cnt	 = SOC_MSIX_CNT;
1066	ntb->bits_per_vector	 = SOC_DB_BITS_PER_VEC;
1067
1068	/*
1069	 * FIXME - MSI-X bug on early SOC HW, remove once internal issue is
1070	 * resolved.  Mask transaction layer internal parity errors.
1071	 */
1072	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1073
1074	configure_soc_secondary_side_bars(ntb);
1075
1076	/* Enable Bus Master and Memory Space on the secondary side */
1077	ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1078	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1079
1080	callout_reset(&ntb->heartbeat_timer, 0, ntb_handle_heartbeat, ntb);
1081
1082	return (0);
1083}
1084
1085static void
1086configure_soc_secondary_side_bars(struct ntb_softc *ntb)
1087{
1088
1089	if (ntb->dev_type == NTB_DEV_USD) {
1090		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1091		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_USD_ADDR);
1092		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_USD_ADDR);
1093		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_USD_ADDR);
1094	} else {
1095		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1096		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_DSD_ADDR);
1097		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_DSD_ADDR);
1098		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_DSD_ADDR);
1099	}
1100}
1101
1102static void
1103configure_xeon_secondary_side_bars(struct ntb_softc *ntb)
1104{
1105
1106	if (ntb->dev_type == NTB_DEV_USD) {
1107		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1108		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1109			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1110			    MBAR01_DSD_ADDR);
1111		else {
1112			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1113			    PBAR4XLAT_USD_ADDR);
1114			/*
1115			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1116			 * written 32 bits at a time.
1117			 */
1118			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1119			    MBAR01_DSD_ADDR & 0xffffffff);
1120			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1121			    MBAR01_DSD_ADDR >> 32);
1122		}
1123		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_USD_ADDR);
1124		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_USD_ADDR);
1125		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_USD_ADDR);
1126	} else {
1127		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1128		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1129			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1130			    MBAR01_USD_ADDR);
1131		else {
1132			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1133			    PBAR4XLAT_DSD_ADDR);
1134			/*
1135			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1136			 * written 32 bits at a time.
1137			 */
1138			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1139			    MBAR01_USD_ADDR & 0xffffffff);
1140			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1141			    MBAR01_USD_ADDR >> 32);
1142		}
1143		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_DSD_ADDR);
1144		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_DSD_ADDR);
1145		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_DSD_ADDR);
1146	}
1147}
1148
1149/* SOC does not have link status interrupt, poll on that platform */
1150static void
1151ntb_handle_heartbeat(void *arg)
1152{
1153	struct ntb_softc *ntb = arg;
1154	uint32_t status32;
1155	int rc;
1156
1157	rc = ntb_check_link_status(ntb);
1158	if (rc != 0)
1159		device_printf(ntb->device,
1160		    "Error determining link status\n");
1161
1162	/* Check to see if a link error is the cause of the link down */
1163	if (ntb->link_status == NTB_LINK_DOWN) {
1164		status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1165		if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) {
1166			callout_reset(&ntb->lr_timer, 0, recover_soc_link,
1167			    ntb);
1168			return;
1169		}
1170	}
1171
1172	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1173	    ntb_handle_heartbeat, ntb);
1174}
1175
1176static void
1177soc_perform_link_restart(struct ntb_softc *ntb)
1178{
1179	uint32_t status;
1180
1181	/* Driver resets the NTB ModPhy lanes - magic! */
1182	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0xe0);
1183	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x40);
1184	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x60);
1185	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0x60);
1186
1187	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1188	pause("ModPhy", hz / 10);
1189
1190	/* Clear AER Errors, write to clear */
1191	status = ntb_reg_read(4, SOC_ERRCORSTS_OFFSET);
1192	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1193	ntb_reg_write(4, SOC_ERRCORSTS_OFFSET, status);
1194
1195	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1196	status = ntb_reg_read(4, SOC_LTSSMERRSTS0_OFFSET);
1197	status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI;
1198	ntb_reg_write(4, SOC_LTSSMERRSTS0_OFFSET, status);
1199
1200	/* Clear DeSkew Buffer error, write to clear */
1201	status = ntb_reg_read(4, SOC_DESKEWSTS_OFFSET);
1202	status |= SOC_DESKEWSTS_DBERR;
1203	ntb_reg_write(4, SOC_DESKEWSTS_OFFSET, status);
1204
1205	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1206	status &= SOC_IBIST_ERR_OFLOW;
1207	ntb_reg_write(4, SOC_IBSTERRRCRVSTS0_OFFSET, status);
1208
1209	/* Releases the NTB state machine to allow the link to retrain */
1210	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1211	status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT;
1212	ntb_reg_write(4, SOC_LTSSMSTATEJMP_OFFSET, status);
1213}
1214
1215static void
1216ntb_handle_link_event(struct ntb_softc *ntb, int link_state)
1217{
1218	enum ntb_hw_event event;
1219	uint16_t status;
1220
1221	if (ntb->link_status == link_state)
1222		return;
1223
1224	if (link_state == NTB_LINK_UP) {
1225		device_printf(ntb->device, "Link Up\n");
1226		ntb->link_status = NTB_LINK_UP;
1227		event = NTB_EVENT_HW_LINK_UP;
1228
1229		if (ntb->type == NTB_SOC ||
1230		    ntb->conn_type == NTB_CONN_TRANSPARENT)
1231			status = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1232		else
1233			status = pci_read_config(ntb->device,
1234			    XEON_LINK_STATUS_OFFSET, 2);
1235		ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
1236		ntb->link_speed = (status & NTB_LINK_SPEED_MASK);
1237		device_printf(ntb->device, "Link Width %d, Link Speed %d\n",
1238		    ntb->link_width, ntb->link_speed);
1239		callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1240		    ntb_handle_heartbeat, ntb);
1241	} else {
1242		device_printf(ntb->device, "Link Down\n");
1243		ntb->link_status = NTB_LINK_DOWN;
1244		event = NTB_EVENT_HW_LINK_DOWN;
1245		/* Do not modify link width/speed, we need it in link recovery */
1246	}
1247
1248	/* notify the upper layer if we have an event change */
1249	if (ntb->event_cb != NULL)
1250		ntb->event_cb(ntb->ntb_transport, event);
1251}
1252
1253static void
1254ntb_hw_link_up(struct ntb_softc *ntb)
1255{
1256	uint32_t cntl;
1257
1258	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1259		ntb_handle_link_event(ntb, NTB_LINK_UP);
1260		return;
1261	}
1262
1263	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1264	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1265	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1266	cntl |= NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP;
1267	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1268}
1269
1270static void
1271ntb_hw_link_down(struct ntb_softc *ntb)
1272{
1273	uint32_t cntl;
1274
1275	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1276		ntb_handle_link_event(ntb, NTB_LINK_DOWN);
1277		return;
1278	}
1279
1280	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1281	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1282	cntl &= ~(NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP);
1283	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1284	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1285}
1286
1287static void
1288recover_soc_link(void *arg)
1289{
1290	struct ntb_softc *ntb = arg;
1291	uint8_t speed, width;
1292	uint32_t status32;
1293	uint16_t status16;
1294
1295	soc_perform_link_restart(ntb);
1296
1297	/*
1298	 * There is a potential race between the 2 NTB devices recovering at
1299	 * the same time.  If the times are the same, the link will not recover
1300	 * and the driver will be stuck in this loop forever.  Add a random
1301	 * interval to the recovery time to prevent this race.
1302	 */
1303	status32 = arc4random() % SOC_LINK_RECOVERY_TIME;
1304	pause("Link", (SOC_LINK_RECOVERY_TIME + status32) * hz / 1000);
1305
1306	status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1307	if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0)
1308		goto retry;
1309
1310	status32 = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1311	if ((status32 & SOC_IBIST_ERR_OFLOW) != 0)
1312		goto retry;
1313
1314	status32 = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1315	if ((status32 & SOC_CNTL_LINK_DOWN) != 0)
1316		goto out;
1317
1318	status16 = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1319	width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
1320	speed = (status16 & NTB_LINK_SPEED_MASK);
1321	if (ntb->link_width != width || ntb->link_speed != speed)
1322		goto retry;
1323
1324out:
1325	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1326	    ntb_handle_heartbeat, ntb);
1327	return;
1328
1329retry:
1330	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link,
1331	    ntb);
1332}
1333
1334static int
1335ntb_check_link_status(struct ntb_softc *ntb)
1336{
1337	int link_state;
1338	uint32_t ntb_cntl;
1339	uint16_t status;
1340
1341	if (ntb->type == NTB_SOC) {
1342		ntb_cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1343		if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0)
1344			link_state = NTB_LINK_DOWN;
1345		else
1346			link_state = NTB_LINK_UP;
1347	} else {
1348		status = pci_read_config(ntb->device, XEON_LINK_STATUS_OFFSET,
1349		    2);
1350
1351		if ((status & NTB_LINK_STATUS_ACTIVE) != 0)
1352			link_state = NTB_LINK_UP;
1353		else
1354			link_state = NTB_LINK_DOWN;
1355	}
1356
1357	ntb_handle_link_event(ntb, link_state);
1358
1359	return (0);
1360}
1361
1362/**
1363 * ntb_register_event_callback() - register event callback
1364 * @ntb: pointer to ntb_softc instance
1365 * @func: callback function to register
1366 *
1367 * This function registers a callback for any HW driver events such as link
1368 * up/down, power management notices and etc.
1369 *
1370 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1371 */
1372int
1373ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func)
1374{
1375
1376	if (ntb->event_cb != NULL)
1377		return (EINVAL);
1378
1379	ntb->event_cb = func;
1380
1381	return (0);
1382}
1383
1384/**
1385 * ntb_unregister_event_callback() - unregisters the event callback
1386 * @ntb: pointer to ntb_softc instance
1387 *
1388 * This function unregisters the existing callback from transport
1389 */
1390void
1391ntb_unregister_event_callback(struct ntb_softc *ntb)
1392{
1393
1394	ntb->event_cb = NULL;
1395}
1396
1397static void
1398ntb_irq_work(void *arg)
1399{
1400	struct ntb_db_cb *db_cb = arg;
1401	struct ntb_softc *ntb;
1402	int rc;
1403
1404	rc = db_cb->callback(db_cb->data, db_cb->db_num);
1405	/* Poll if forward progress was made. */
1406	if (rc != 0) {
1407		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
1408		return;
1409	}
1410
1411	/* Unmask interrupt if no progress was made. */
1412	ntb = db_cb->ntb;
1413	unmask_ldb_interrupt(ntb, db_cb->db_num);
1414}
1415
1416/**
1417 * ntb_register_db_callback() - register a callback for doorbell interrupt
1418 * @ntb: pointer to ntb_softc instance
1419 * @idx: doorbell index to register callback, zero based
1420 * @data: pointer to be returned to caller with every callback
1421 * @func: callback function to register
1422 *
1423 * This function registers a callback function for the doorbell interrupt
1424 * on the primary side. The function will unmask the doorbell as well to
1425 * allow interrupt.
1426 *
1427 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1428 */
1429int
1430ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data,
1431    ntb_db_callback func)
1432{
1433	struct ntb_db_cb *db_cb = &ntb->db_cb[idx];
1434
1435	if (idx >= ntb->allocated_interrupts || db_cb->callback ||
1436	    db_cb->reserved) {
1437		device_printf(ntb->device, "Invalid Index.\n");
1438		return (EINVAL);
1439	}
1440
1441	db_cb->callback = func;
1442	db_cb->data = data;
1443	callout_init(&db_cb->irq_work, 1);
1444
1445	unmask_ldb_interrupt(ntb, idx);
1446
1447	return (0);
1448}
1449
1450/**
1451 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
1452 * @ntb: pointer to ntb_softc instance
1453 * @idx: doorbell index to register callback, zero based
1454 *
1455 * This function unregisters a callback function for the doorbell interrupt
1456 * on the primary side. The function will also mask the said doorbell.
1457 */
1458void
1459ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx)
1460{
1461
1462	if (idx >= ntb->allocated_interrupts || !ntb->db_cb[idx].callback)
1463		return;
1464
1465	mask_ldb_interrupt(ntb, idx);
1466
1467	callout_drain(&ntb->db_cb[idx].irq_work);
1468	ntb->db_cb[idx].callback = NULL;
1469}
1470
1471/**
1472 * ntb_find_transport() - find the transport pointer
1473 * @transport: pointer to pci device
1474 *
1475 * Given the pci device pointer, return the transport pointer passed in when
1476 * the transport attached when it was inited.
1477 *
1478 * RETURNS: pointer to transport.
1479 */
1480void *
1481ntb_find_transport(struct ntb_softc *ntb)
1482{
1483
1484	return (ntb->ntb_transport);
1485}
1486
1487/**
1488 * ntb_register_transport() - Register NTB transport with NTB HW driver
1489 * @transport: transport identifier
1490 *
1491 * This function allows a transport to reserve the hardware driver for
1492 * NTB usage.
1493 *
1494 * RETURNS: pointer to ntb_softc, NULL on error.
1495 */
1496struct ntb_softc *
1497ntb_register_transport(struct ntb_softc *ntb, void *transport)
1498{
1499
1500	/*
1501	 * TODO: when we have more than one transport, we will need to rewrite
1502	 * this to prevent race conditions
1503	 */
1504	if (ntb->ntb_transport != NULL)
1505		return (NULL);
1506
1507	ntb->ntb_transport = transport;
1508	return (ntb);
1509}
1510
1511/**
1512 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
1513 * @ntb - ntb_softc of the transport to be freed
1514 *
1515 * This function unregisters the transport from the HW driver and performs any
1516 * necessary cleanups.
1517 */
1518void
1519ntb_unregister_transport(struct ntb_softc *ntb)
1520{
1521	int i;
1522
1523	if (ntb->ntb_transport == NULL)
1524		return;
1525
1526	for (i = 0; i < ntb->allocated_interrupts; i++)
1527		ntb_unregister_db_callback(ntb, i);
1528
1529	ntb_unregister_event_callback(ntb);
1530	ntb->ntb_transport = NULL;
1531}
1532
1533/**
1534 * ntb_get_max_spads() - get the total scratch regs usable
1535 * @ntb: pointer to ntb_softc instance
1536 *
1537 * This function returns the max 32bit scratchpad registers usable by the
1538 * upper layer.
1539 *
1540 * RETURNS: total number of scratch pad registers available
1541 */
1542uint8_t
1543ntb_get_max_spads(struct ntb_softc *ntb)
1544{
1545
1546	return (ntb->limits.max_spads);
1547}
1548
1549/**
1550 * ntb_write_local_spad() - write to the secondary scratchpad register
1551 * @ntb: pointer to ntb_softc instance
1552 * @idx: index to the scratchpad register, 0 based
1553 * @val: the data value to put into the register
1554 *
1555 * This function allows writing of a 32bit value to the indexed scratchpad
1556 * register. The register resides on the secondary (external) side.
1557 *
1558 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1559 */
1560int
1561ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1562{
1563
1564	if (idx >= ntb->limits.max_spads)
1565		return (EINVAL);
1566
1567	ntb_reg_write(4, ntb->reg_ofs.spad_local + idx * 4, val);
1568
1569	return (0);
1570}
1571
1572/**
1573 * ntb_read_local_spad() - read from the primary scratchpad register
1574 * @ntb: pointer to ntb_softc instance
1575 * @idx: index to scratchpad register, 0 based
1576 * @val: pointer to 32bit integer for storing the register value
1577 *
1578 * This function allows reading of the 32bit scratchpad register on
1579 * the primary (internal) side.
1580 *
1581 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1582 */
1583int
1584ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1585{
1586
1587	if (idx >= ntb->limits.max_spads)
1588		return (EINVAL);
1589
1590	*val = ntb_reg_read(4, ntb->reg_ofs.spad_local + idx * 4);
1591
1592	return (0);
1593}
1594
1595/**
1596 * ntb_write_remote_spad() - write to the secondary scratchpad register
1597 * @ntb: pointer to ntb_softc instance
1598 * @idx: index to the scratchpad register, 0 based
1599 * @val: the data value to put into the register
1600 *
1601 * This function allows writing of a 32bit value to the indexed scratchpad
1602 * register. The register resides on the secondary (external) side.
1603 *
1604 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1605 */
1606int
1607ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1608{
1609
1610	if (idx >= ntb->limits.max_spads)
1611		return (EINVAL);
1612
1613	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1614		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
1615	else
1616		ntb_reg_write(4, ntb->reg_ofs.spad_remote + idx * 4, val);
1617
1618	return (0);
1619}
1620
1621/**
1622 * ntb_read_remote_spad() - read from the primary scratchpad register
1623 * @ntb: pointer to ntb_softc instance
1624 * @idx: index to scratchpad register, 0 based
1625 * @val: pointer to 32bit integer for storing the register value
1626 *
1627 * This function allows reading of the 32bit scratchpad register on
1628 * the primary (internal) side.
1629 *
1630 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1631 */
1632int
1633ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1634{
1635
1636	if (idx >= ntb->limits.max_spads)
1637		return (EINVAL);
1638
1639	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1640		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
1641	else
1642		*val = ntb_reg_read(4, ntb->reg_ofs.spad_remote + idx * 4);
1643
1644	return (0);
1645}
1646
1647/**
1648 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
1649 * @ntb: pointer to ntb_softc instance
1650 * @mw: memory window number
1651 *
1652 * This function provides the base virtual address of the memory window
1653 * specified.
1654 *
1655 * RETURNS: pointer to virtual address, or NULL on error.
1656 */
1657void *
1658ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw)
1659{
1660
1661	if (mw >= NTB_NUM_MW)
1662		return (NULL);
1663
1664	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].vbase);
1665}
1666
1667vm_paddr_t
1668ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw)
1669{
1670
1671	if (mw >= NTB_NUM_MW)
1672		return (0);
1673
1674	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].pbase);
1675}
1676
1677/**
1678 * ntb_get_mw_size() - return size of NTB memory window
1679 * @ntb: pointer to ntb_softc instance
1680 * @mw: memory window number
1681 *
1682 * This function provides the physical size of the memory window specified
1683 *
1684 * RETURNS: the size of the memory window or zero on error
1685 */
1686u_long
1687ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw)
1688{
1689
1690	if (mw >= NTB_NUM_MW)
1691		return (0);
1692
1693	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].size);
1694}
1695
1696/**
1697 * ntb_set_mw_addr - set the memory window address
1698 * @ntb: pointer to ntb_softc instance
1699 * @mw: memory window number
1700 * @addr: base address for data
1701 *
1702 * This function sets the base physical address of the memory window.  This
1703 * memory address is where data from the remote system will be transfered into
1704 * or out of depending on how the transport is configured.
1705 */
1706void
1707ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr)
1708{
1709
1710	if (mw >= NTB_NUM_MW)
1711		return;
1712
1713	switch (NTB_MW_TO_BAR(mw)) {
1714	case NTB_B2B_BAR_1:
1715		ntb_reg_write(8, ntb->reg_ofs.bar2_xlat, addr);
1716		break;
1717	case NTB_B2B_BAR_2:
1718		ntb_reg_write(8, ntb->reg_ofs.bar4_xlat, addr);
1719		break;
1720	}
1721}
1722
1723/**
1724 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
1725 * @ntb: pointer to ntb_softc instance
1726 * @db: doorbell to ring
1727 *
1728 * This function allows triggering of a doorbell on the secondary/external
1729 * side that will initiate an interrupt on the remote host
1730 */
1731void
1732ntb_ring_doorbell(struct ntb_softc *ntb, unsigned int db)
1733{
1734	uint64_t bit;
1735
1736	if (ntb->type == NTB_SOC)
1737		bit = 1 << db;
1738	else
1739		bit = ((1 << ntb->bits_per_vector) - 1) <<
1740		    (db * ntb->bits_per_vector);
1741
1742	if (HAS_FEATURE(NTB_REGS_THRU_MW)) {
1743		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
1744		return;
1745	}
1746
1747	db_iowrite(ntb, ntb->reg_ofs.rdb, bit);
1748}
1749
1750/**
1751 * ntb_query_link_status() - return the hardware link status
1752 * @ndev: pointer to ntb_device instance
1753 *
1754 * Returns true if the hardware is connected to the remote system
1755 *
1756 * RETURNS: true or false based on the hardware link state
1757 */
1758bool
1759ntb_query_link_status(struct ntb_softc *ntb)
1760{
1761
1762	return (ntb->link_status == NTB_LINK_UP);
1763}
1764
1765static void
1766save_bar_parameters(struct ntb_pci_bar_info *bar)
1767{
1768
1769	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
1770	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
1771	bar->pbase = rman_get_start(bar->pci_resource);
1772	bar->size = rman_get_size(bar->pci_resource);
1773	bar->vbase = rman_get_virtual(bar->pci_resource);
1774}
1775
1776device_t
1777ntb_get_device(struct ntb_softc *ntb)
1778{
1779
1780	return (ntb->device);
1781}
1782
1783/* Export HW-specific errata information. */
1784bool
1785ntb_has_feature(struct ntb_softc *ntb, uint64_t feature)
1786{
1787
1788	return (HAS_FEATURE(feature));
1789}
1790