ntb_hw_intel.c revision 289396
1/*-
2 * Copyright (C) 2013 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/ntb/ntb_hw/ntb_hw.c 289396 2015-10-15 23:45:43Z cem $");
29
30#include <sys/param.h>
31#include <sys/kernel.h>
32#include <sys/systm.h>
33#include <sys/bus.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/queue.h>
37#include <sys/rman.h>
38#include <sys/sysctl.h>
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <machine/bus.h>
42#include <machine/pmap.h>
43#include <machine/resource.h>
44#include <dev/pci/pcireg.h>
45#include <dev/pci/pcivar.h>
46
47#include "ntb_regs.h"
48#include "ntb_hw.h"
49
50/*
51 * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
52 * allows you to connect two systems using a PCI-e link.
53 *
54 * This module contains the hardware abstraction layer for the NTB. It allows
55 * you to send and recieve interrupts, map the memory windows and send and
56 * receive messages in the scratch-pad registers.
57 *
58 * NOTE: Much of the code in this module is shared with Linux. Any patches may
59 * be picked up and redistributed in Linux with a dual GPL/BSD license.
60 */
61
62#define NTB_CONFIG_BAR	0
63#define NTB_B2B_BAR_1	1
64#define NTB_B2B_BAR_2	2
65#define NTB_MAX_BARS	3
66#define NTB_MW_TO_BAR(mw) ((mw) + 1)
67
68#define MAX_MSIX_INTERRUPTS MAX(XEON_MAX_DB_BITS, SOC_MAX_DB_BITS)
69
70#define NTB_HB_TIMEOUT	1 /* second */
71#define SOC_LINK_RECOVERY_TIME	500
72
73#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
74
75enum ntb_device_type {
76	NTB_XEON,
77	NTB_SOC
78};
79
80/* Device features and workarounds */
81#define HAS_FEATURE(feature)	\
82	((ntb->features & (feature)) != 0)
83
84struct ntb_hw_info {
85	uint32_t		device_id;
86	const char		*desc;
87	enum ntb_device_type	type;
88	uint64_t		features;
89};
90
91struct ntb_pci_bar_info {
92	bus_space_tag_t		pci_bus_tag;
93	bus_space_handle_t	pci_bus_handle;
94	int			pci_resource_id;
95	struct resource		*pci_resource;
96	vm_paddr_t		pbase;
97	void			*vbase;
98	u_long			size;
99};
100
101struct ntb_int_info {
102	struct resource	*res;
103	int		rid;
104	void		*tag;
105};
106
107struct ntb_db_cb {
108	ntb_db_callback		callback;
109	unsigned int		db_num;
110	void			*data;
111	struct ntb_softc	*ntb;
112	struct callout		irq_work;
113	bool			reserved;
114};
115
116struct ntb_softc {
117	device_t		device;
118	enum ntb_device_type	type;
119	uint64_t		features;
120
121	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
122	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
123	uint32_t		allocated_interrupts;
124
125	struct callout		heartbeat_timer;
126	struct callout		lr_timer;
127
128	void			*ntb_transport;
129	ntb_event_callback	event_cb;
130	struct ntb_db_cb	*db_cb;
131	uint8_t			max_cbs;
132
133	struct {
134		uint8_t max_mw;
135		uint8_t max_spads;
136		uint8_t max_db_bits;
137		uint8_t msix_cnt;
138	} limits;
139	struct {
140		uint32_t ldb;
141		uint32_t ldb_mask;
142		uint32_t rdb;
143		uint32_t bar2_xlat;
144		uint32_t bar4_xlat;
145		uint32_t spad_remote;
146		uint32_t spad_local;
147		uint32_t lnk_cntl;
148		uint32_t lnk_stat;
149		uint32_t spci_cmd;
150	} reg_ofs;
151	uint32_t ppd;
152	uint8_t conn_type;
153	uint8_t dev_type;
154	uint8_t bits_per_vector;
155	uint8_t link_status;
156	uint8_t link_width;
157	uint8_t link_speed;
158};
159
160#ifdef __i386__
161static __inline uint64_t
162bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
163    bus_size_t offset)
164{
165
166	return (bus_space_read_4(tag, handle, offset) |
167	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
168}
169
170static __inline void
171bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
172    bus_size_t offset, uint64_t val)
173{
174
175	bus_space_write_4(tag, handle, offset, val);
176	bus_space_write_4(tag, handle, offset + 4, val >> 32);
177}
178#endif
179
180#define ntb_bar_read(SIZE, bar, offset) \
181	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
182	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
183#define ntb_bar_write(SIZE, bar, offset, val) \
184	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
185	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
186#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
187#define ntb_reg_write(SIZE, offset, val) \
188	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
189#define ntb_mw_read(SIZE, offset) ntb_bar_read(SIZE, NTB_B2B_BAR_2, offset)
190#define ntb_mw_write(SIZE, offset, val) \
191	    ntb_bar_write(SIZE, NTB_B2B_BAR_2, offset, val)
192
193typedef int (*bar_map_strategy)(struct ntb_softc *ntb,
194    struct ntb_pci_bar_info *bar);
195
196static int ntb_probe(device_t device);
197static int ntb_attach(device_t device);
198static int ntb_detach(device_t device);
199static int ntb_map_pci_bars(struct ntb_softc *ntb);
200static int map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
201    struct ntb_pci_bar_info *bar);
202static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
203static int map_memory_window_bar(struct ntb_softc *ntb,
204    struct ntb_pci_bar_info *bar);
205static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
206static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
207static int ntb_setup_interrupts(struct ntb_softc *ntb);
208static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
209static int ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors);
210static int ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors);
211static void ntb_teardown_interrupts(struct ntb_softc *ntb);
212static void handle_soc_irq(void *arg);
213static void handle_xeon_irq(void *arg);
214static void handle_xeon_event_irq(void *arg);
215static void ntb_handle_legacy_interrupt(void *arg);
216static void ntb_irq_work(void *arg);
217static uint64_t db_ioread(struct ntb_softc *, uint32_t regoff);
218static void db_iowrite(struct ntb_softc *, uint32_t regoff, uint64_t val);
219static void mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
220static void unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx);
221static int ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors);
222static void ntb_free_callbacks(struct ntb_softc *ntb);
223static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
224static int ntb_detect_xeon(struct ntb_softc *ntb);
225static int ntb_detect_soc(struct ntb_softc *ntb);
226static int ntb_setup_xeon(struct ntb_softc *ntb);
227static int ntb_setup_soc(struct ntb_softc *ntb);
228static void ntb_teardown_xeon(struct ntb_softc *ntb);
229static void configure_soc_secondary_side_bars(struct ntb_softc *ntb);
230static void configure_xeon_secondary_side_bars(struct ntb_softc *ntb);
231static void ntb_handle_heartbeat(void *arg);
232static void ntb_handle_link_event(struct ntb_softc *ntb, int link_state);
233static void ntb_hw_link_down(struct ntb_softc *ntb);
234static void ntb_hw_link_up(struct ntb_softc *ntb);
235static void recover_soc_link(void *arg);
236static int ntb_check_link_status(struct ntb_softc *ntb);
237static void save_bar_parameters(struct ntb_pci_bar_info *bar);
238
239static struct ntb_hw_info pci_ids[] = {
240	{ 0x0C4E8086, "Atom Processor S1200 NTB Primary B2B", NTB_SOC, 0 },
241
242	/* XXX: PS/SS IDs left out until they are supported. */
243	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
244		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
245	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
246		NTB_XEON, NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 },
247	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
248		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
249		    | NTB_BAR_SIZE_4K },
250	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
251		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
252	},
253	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
254		NTB_REGS_THRU_MW | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP
255	},
256
257	{ 0x00000000, NULL, NTB_SOC, 0 }
258};
259
260/*
261 * OS <-> Driver interface structures
262 */
263MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
264
265static device_method_t ntb_pci_methods[] = {
266	/* Device interface */
267	DEVMETHOD(device_probe,     ntb_probe),
268	DEVMETHOD(device_attach,    ntb_attach),
269	DEVMETHOD(device_detach,    ntb_detach),
270	DEVMETHOD_END
271};
272
273static driver_t ntb_pci_driver = {
274	"ntb_hw",
275	ntb_pci_methods,
276	sizeof(struct ntb_softc),
277};
278
279static devclass_t ntb_devclass;
280DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
281MODULE_VERSION(ntb_hw, 1);
282
283SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
284
285/*
286 * OS <-> Driver linkage functions
287 */
288static int
289ntb_probe(device_t device)
290{
291	struct ntb_hw_info *p;
292
293	p = ntb_get_device_info(pci_get_devid(device));
294	if (p == NULL)
295		return (ENXIO);
296
297	device_set_desc(device, p->desc);
298	return (0);
299}
300
301static int
302ntb_attach(device_t device)
303{
304	struct ntb_softc *ntb;
305	struct ntb_hw_info *p;
306	int error;
307
308	ntb = DEVICE2SOFTC(device);
309	p = ntb_get_device_info(pci_get_devid(device));
310
311	ntb->device = device;
312	ntb->type = p->type;
313	ntb->features = p->features;
314
315	/* Heartbeat timer for NTB_SOC since there is no link interrupt */
316	callout_init(&ntb->heartbeat_timer, 1);
317	callout_init(&ntb->lr_timer, 1);
318
319	if (ntb->type == NTB_SOC)
320		error = ntb_detect_soc(ntb);
321	else
322		error = ntb_detect_xeon(ntb);
323	if (error)
324		goto out;
325
326	ntb->limits.max_mw = NTB_MAX_NUM_MW;
327
328	error = ntb_map_pci_bars(ntb);
329	if (error)
330		goto out;
331	if (ntb->type == NTB_SOC)
332		error = ntb_setup_soc(ntb);
333	else
334		error = ntb_setup_xeon(ntb);
335	if (error)
336		goto out;
337	error = ntb_setup_interrupts(ntb);
338	if (error)
339		goto out;
340
341	pci_enable_busmaster(ntb->device);
342
343out:
344	if (error != 0)
345		ntb_detach(device);
346	return (error);
347}
348
349static int
350ntb_detach(device_t device)
351{
352	struct ntb_softc *ntb;
353
354	ntb = DEVICE2SOFTC(device);
355	callout_drain(&ntb->heartbeat_timer);
356	callout_drain(&ntb->lr_timer);
357	if (ntb->type == NTB_XEON)
358		ntb_teardown_xeon(ntb);
359	ntb_teardown_interrupts(ntb);
360	ntb_unmap_pci_bar(ntb);
361
362	return (0);
363}
364
365static int
366ntb_map_pci_bars(struct ntb_softc *ntb)
367{
368	int rc;
369
370	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
371	rc = map_pci_bar(ntb, map_mmr_bar, &ntb->bar_info[NTB_CONFIG_BAR]);
372	if (rc != 0)
373		return (rc);
374
375	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
376	rc = map_pci_bar(ntb, map_memory_window_bar,
377	    &ntb->bar_info[NTB_B2B_BAR_1]);
378	if (rc != 0)
379		return (rc);
380
381	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
382	if (HAS_FEATURE(NTB_REGS_THRU_MW))
383		rc = map_pci_bar(ntb, map_mmr_bar,
384		    &ntb->bar_info[NTB_B2B_BAR_2]);
385	else
386		rc = map_pci_bar(ntb, map_memory_window_bar,
387		    &ntb->bar_info[NTB_B2B_BAR_2]);
388	return (rc);
389}
390
391static int
392map_pci_bar(struct ntb_softc *ntb, bar_map_strategy strategy,
393    struct ntb_pci_bar_info *bar)
394{
395	int rc;
396
397	rc = strategy(ntb, bar);
398	if (rc != 0)
399		device_printf(ntb->device,
400		    "unable to allocate pci resource\n");
401	else
402		device_printf(ntb->device,
403		    "Bar size = %lx, v %p, p %p\n",
404		    bar->size, bar->vbase, (void *)(bar->pbase));
405	return (rc);
406}
407
408static int
409map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
410{
411
412	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
413	    &bar->pci_resource_id, RF_ACTIVE);
414	if (bar->pci_resource == NULL)
415		return (ENXIO);
416
417	save_bar_parameters(bar);
418	return (0);
419}
420
421static int
422map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
423{
424	int rc;
425	uint8_t bar_size_bits = 0;
426
427	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
428	    &bar->pci_resource_id, RF_ACTIVE);
429
430	if (bar->pci_resource == NULL)
431		return (ENXIO);
432
433	save_bar_parameters(bar);
434	/*
435	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
436	 * hardware issue. To work around this, query the size it should be
437	 * configured to by the device and modify the resource to correspond to
438	 * this new size. The BIOS on systems with this problem is required to
439	 * provide enough address space to allow the driver to make this change
440	 * safely.
441	 *
442	 * Ideally I could have just specified the size when I allocated the
443	 * resource like:
444	 *  bus_alloc_resource(ntb->device,
445	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
446	 *	1ul << bar_size_bits, RF_ACTIVE);
447	 * but the PCI driver does not honor the size in this call, so we have
448	 * to modify it after the fact.
449	 */
450	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
451		if (bar->pci_resource_id == PCIR_BAR(2))
452			bar_size_bits = pci_read_config(ntb->device,
453			    XEON_PBAR23SZ_OFFSET, 1);
454		else
455			bar_size_bits = pci_read_config(ntb->device,
456			    XEON_PBAR45SZ_OFFSET, 1);
457
458		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
459		    bar->pci_resource, bar->pbase,
460		    bar->pbase + (1ul << bar_size_bits) - 1);
461		if (rc != 0) {
462			device_printf(ntb->device,
463			    "unable to resize bar\n");
464			return (rc);
465		}
466
467		save_bar_parameters(bar);
468	}
469
470	/* Mark bar region as write combining to improve performance. */
471	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size,
472	    VM_MEMATTR_WRITE_COMBINING);
473	if (rc != 0) {
474		device_printf(ntb->device,
475		    "unable to mark bar as WRITE_COMBINING\n");
476		return (rc);
477	}
478	return (0);
479}
480
481static void
482ntb_unmap_pci_bar(struct ntb_softc *ntb)
483{
484	struct ntb_pci_bar_info *current_bar;
485	int i;
486
487	for (i = 0; i< NTB_MAX_BARS; i++) {
488		current_bar = &ntb->bar_info[i];
489		if (current_bar->pci_resource != NULL)
490			bus_release_resource(ntb->device, SYS_RES_MEMORY,
491			    current_bar->pci_resource_id,
492			    current_bar->pci_resource);
493	}
494}
495
496static int
497ntb_setup_xeon_msix(struct ntb_softc *ntb, uint32_t num_vectors)
498{
499	void (*interrupt_handler)(void *);
500	void *int_arg;
501	uint32_t i;
502	int rc;
503
504	if (num_vectors < 4)
505		return (ENOSPC);
506
507	for (i = 0; i < num_vectors; i++) {
508		ntb->int_info[i].rid = i + 1;
509		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
510		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
511		if (ntb->int_info[i].res == NULL) {
512			device_printf(ntb->device,
513			    "bus_alloc_resource failed\n");
514			return (ENOMEM);
515		}
516		ntb->int_info[i].tag = NULL;
517		ntb->allocated_interrupts++;
518		if (i == num_vectors - 1) {
519			interrupt_handler = handle_xeon_event_irq;
520			int_arg = ntb;
521		} else {
522			interrupt_handler = handle_xeon_irq;
523			int_arg = &ntb->db_cb[i];
524		}
525		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
526		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, interrupt_handler,
527		    int_arg, &ntb->int_info[i].tag);
528		if (rc != 0) {
529			device_printf(ntb->device,
530			    "bus_setup_intr failed\n");
531			return (ENXIO);
532		}
533	}
534
535	/*
536	 * Prevent consumers from registering callbacks on the link event irq
537	 * slot, from which they will never be called back.
538	 */
539	ntb->db_cb[num_vectors - 1].reserved = true;
540	ntb->max_cbs--;
541	return (0);
542}
543
544static int
545ntb_setup_soc_msix(struct ntb_softc *ntb, uint32_t num_vectors)
546{
547	uint32_t i;
548	int rc;
549
550	for (i = 0; i < num_vectors; i++) {
551		ntb->int_info[i].rid = i + 1;
552		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
553		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
554		if (ntb->int_info[i].res == NULL) {
555			device_printf(ntb->device,
556			    "bus_alloc_resource failed\n");
557			return (ENOMEM);
558		}
559		ntb->int_info[i].tag = NULL;
560		ntb->allocated_interrupts++;
561		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
562		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, handle_soc_irq,
563		    &ntb->db_cb[i], &ntb->int_info[i].tag);
564		if (rc != 0) {
565			device_printf(ntb->device, "bus_setup_intr failed\n");
566			return (ENXIO);
567		}
568	}
569	return (0);
570}
571
572/*
573 * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
574 * cannot be allocated for each MSI-X message.  JHB seems to think remapping
575 * should be okay.  This tunable should enable us to test that hypothesis
576 * when someone gets their hands on some Xeon hardware.
577 */
578static int ntb_force_remap_mode;
579SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
580    &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
581    " to a smaller number of ithreads, even if the desired number are "
582    "available");
583
584/*
585 * In case it is NOT ok, give consumers an abort button.
586 */
587static int ntb_prefer_intx;
588SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
589    &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
590    "than remapping MSI-X messages over available slots (match Linux driver "
591    "behavior)");
592
593/*
594 * Remap the desired number of MSI-X messages to available ithreads in a simple
595 * round-robin fashion.
596 */
597static int
598ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
599{
600	u_int *vectors;
601	uint32_t i;
602	int rc;
603
604	if (ntb_prefer_intx != 0)
605		return (ENXIO);
606
607	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
608
609	for (i = 0; i < desired; i++)
610		vectors[i] = (i % avail) + 1;
611
612	rc = pci_remap_msix(dev, desired, vectors);
613	free(vectors, M_NTB);
614	return (rc);
615}
616
617static int
618ntb_setup_interrupts(struct ntb_softc *ntb)
619{
620	uint32_t desired_vectors, num_vectors;
621	uint64_t mask;
622	int rc;
623
624	ntb->allocated_interrupts = 0;
625
626	/*
627	 * On SOC, disable all interrupts.  On XEON, disable all but Link
628	 * Interrupt.  The rest will be unmasked as callbacks are registered.
629	 */
630	mask = 0;
631	if (ntb->type == NTB_XEON)
632		mask = (1 << XEON_LINK_DB);
633	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, ~mask);
634
635	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
636	    ntb->limits.max_db_bits);
637	if (desired_vectors >= 1) {
638		rc = pci_alloc_msix(ntb->device, &num_vectors);
639
640		if (ntb_force_remap_mode != 0 && rc == 0 &&
641		    num_vectors == desired_vectors)
642			num_vectors--;
643
644		if (rc == 0 && num_vectors < desired_vectors) {
645			rc = ntb_remap_msix(ntb->device, desired_vectors,
646			    num_vectors);
647			if (rc == 0)
648				num_vectors = desired_vectors;
649			else
650				pci_release_msi(ntb->device);
651		}
652		if (rc != 0)
653			num_vectors = 1;
654	} else
655		num_vectors = 1;
656
657	/*
658	 * If allocating MSI-X interrupts succeeds, limit callbacks to the
659	 * number of MSI-X slots available.
660	 */
661	ntb_create_callbacks(ntb, num_vectors);
662
663	if (ntb->type == NTB_XEON)
664		rc = ntb_setup_xeon_msix(ntb, num_vectors);
665	else
666		rc = ntb_setup_soc_msix(ntb, num_vectors);
667	if (rc != 0) {
668		device_printf(ntb->device,
669		    "Error allocating MSI-X interrupts: %d\n", rc);
670
671		/*
672		 * If allocating MSI-X interrupts failed and we're forced to
673		 * use legacy INTx anyway, the only limit on individual
674		 * callbacks is the number of doorbell bits.
675		 *
676		 * CEM: This seems odd to me but matches the behavior of the
677		 * Linux driver ca. September 2013
678		 */
679		ntb_free_callbacks(ntb);
680		ntb_create_callbacks(ntb, ntb->limits.max_db_bits);
681	}
682
683	if (ntb->type == NTB_XEON && rc == ENOSPC)
684		rc = ntb_setup_legacy_interrupt(ntb);
685
686	return (rc);
687}
688
689static int
690ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
691{
692	int rc;
693
694	ntb->int_info[0].rid = 0;
695	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
696	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
697	if (ntb->int_info[0].res == NULL) {
698		device_printf(ntb->device, "bus_alloc_resource failed\n");
699		return (ENOMEM);
700	}
701
702	ntb->int_info[0].tag = NULL;
703	ntb->allocated_interrupts = 1;
704
705	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
706	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ntb_handle_legacy_interrupt,
707	    ntb, &ntb->int_info[0].tag);
708	if (rc != 0) {
709		device_printf(ntb->device, "bus_setup_intr failed\n");
710		return (ENXIO);
711	}
712
713	return (0);
714}
715
716static void
717ntb_teardown_interrupts(struct ntb_softc *ntb)
718{
719	struct ntb_int_info *current_int;
720	int i;
721
722	for (i = 0; i < ntb->allocated_interrupts; i++) {
723		current_int = &ntb->int_info[i];
724		if (current_int->tag != NULL)
725			bus_teardown_intr(ntb->device, current_int->res,
726			    current_int->tag);
727
728		if (current_int->res != NULL)
729			bus_release_resource(ntb->device, SYS_RES_IRQ,
730			    rman_get_rid(current_int->res), current_int->res);
731	}
732
733	ntb_free_callbacks(ntb);
734	pci_release_msi(ntb->device);
735}
736
737/*
738 * Doorbell register and mask are 64-bit on SoC, 16-bit on Xeon.  Abstract it
739 * out to make code clearer.
740 */
741static uint64_t
742db_ioread(struct ntb_softc *ntb, uint32_t regoff)
743{
744
745	if (ntb->type == NTB_SOC)
746		return (ntb_reg_read(8, regoff));
747
748	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
749
750	return (ntb_reg_read(2, regoff));
751}
752
753static void
754db_iowrite(struct ntb_softc *ntb, uint32_t regoff, uint64_t val)
755{
756
757	if (ntb->type == NTB_SOC) {
758		ntb_reg_write(8, regoff, val);
759		return;
760	}
761
762	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
763	ntb_reg_write(2, regoff, (uint16_t)val);
764}
765
766static void
767mask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
768{
769	uint64_t mask;
770
771	mask = db_ioread(ntb, ntb->reg_ofs.ldb_mask);
772	mask |= 1 << (idx * ntb->bits_per_vector);
773	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, mask);
774}
775
776static void
777unmask_ldb_interrupt(struct ntb_softc *ntb, unsigned int idx)
778{
779	uint64_t mask;
780
781	mask = db_ioread(ntb, ntb->reg_ofs.ldb_mask);
782	mask &= ~(1 << (idx * ntb->bits_per_vector));
783	db_iowrite(ntb, ntb->reg_ofs.ldb_mask, mask);
784}
785
786static void
787handle_soc_irq(void *arg)
788{
789	struct ntb_db_cb *db_cb = arg;
790	struct ntb_softc *ntb = db_cb->ntb;
791
792	db_iowrite(ntb, ntb->reg_ofs.ldb, (uint64_t) 1 << db_cb->db_num);
793
794	if (db_cb->callback != NULL) {
795		mask_ldb_interrupt(ntb, db_cb->db_num);
796		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
797	}
798}
799
800static void
801handle_xeon_irq(void *arg)
802{
803	struct ntb_db_cb *db_cb = arg;
804	struct ntb_softc *ntb = db_cb->ntb;
805
806	/*
807	 * On Xeon, there are 16 bits in the interrupt register
808	 * but only 4 vectors.  So, 5 bits are assigned to the first 3
809	 * vectors, with the 4th having a single bit for link
810	 * interrupts.
811	 */
812	db_iowrite(ntb, ntb->reg_ofs.ldb,
813	    ((1 << ntb->bits_per_vector) - 1) <<
814	    (db_cb->db_num * ntb->bits_per_vector));
815
816	if (db_cb->callback != NULL) {
817		mask_ldb_interrupt(ntb, db_cb->db_num);
818		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
819	}
820}
821
822/* Since we do not have a HW doorbell in SOC, this is only used in JF/JT */
823static void
824handle_xeon_event_irq(void *arg)
825{
826	struct ntb_softc *ntb = arg;
827	int rc;
828
829	rc = ntb_check_link_status(ntb);
830	if (rc != 0)
831		device_printf(ntb->device, "Error determining link status\n");
832
833	/* bit 15 is always the link bit */
834	db_iowrite(ntb, ntb->reg_ofs.ldb, 1 << XEON_LINK_DB);
835}
836
837static void
838ntb_handle_legacy_interrupt(void *arg)
839{
840	struct ntb_softc *ntb = arg;
841	unsigned int i;
842	uint64_t ldb;
843
844	ldb = db_ioread(ntb, ntb->reg_ofs.ldb);
845
846	if (ntb->type == NTB_XEON && (ldb & XEON_DB_HW_LINK) != 0) {
847		handle_xeon_event_irq(ntb);
848		ldb &= ~XEON_DB_HW_LINK;
849	}
850
851	while (ldb != 0) {
852		i = ffs(ldb);
853		ldb &= ldb - 1;
854		if (ntb->type == NTB_SOC)
855			handle_soc_irq(&ntb->db_cb[i]);
856		else
857			handle_xeon_irq(&ntb->db_cb[i]);
858	}
859}
860
861static int
862ntb_create_callbacks(struct ntb_softc *ntb, uint32_t num_vectors)
863{
864	uint32_t i;
865
866	ntb->max_cbs = num_vectors;
867	ntb->db_cb = malloc(num_vectors * sizeof(*ntb->db_cb), M_NTB,
868	    M_ZERO | M_WAITOK);
869	for (i = 0; i < num_vectors; i++) {
870		ntb->db_cb[i].db_num = i;
871		ntb->db_cb[i].ntb = ntb;
872	}
873
874	return (0);
875}
876
877static void
878ntb_free_callbacks(struct ntb_softc *ntb)
879{
880	uint8_t i;
881
882	for (i = 0; i < ntb->max_cbs; i++)
883		ntb_unregister_db_callback(ntb, i);
884
885	free(ntb->db_cb, M_NTB);
886	ntb->max_cbs = 0;
887}
888
889static struct ntb_hw_info *
890ntb_get_device_info(uint32_t device_id)
891{
892	struct ntb_hw_info *ep = pci_ids;
893
894	while (ep->device_id) {
895		if (ep->device_id == device_id)
896			return (ep);
897		++ep;
898	}
899	return (NULL);
900}
901
902static void
903ntb_teardown_xeon(struct ntb_softc *ntb)
904{
905
906	ntb_hw_link_down(ntb);
907}
908
909static int
910ntb_detect_xeon(struct ntb_softc *ntb)
911{
912	uint8_t ppd, conn_type;
913
914	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
915	ntb->ppd = ppd;
916
917	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
918		ntb->dev_type = NTB_DEV_USD;
919	else
920		ntb->dev_type = NTB_DEV_DSD;
921
922	conn_type = ppd & XEON_PPD_CONN_TYPE;
923	switch (conn_type) {
924	case NTB_CONN_B2B:
925		ntb->conn_type = conn_type;
926		break;
927	case NTB_CONN_RP:
928	case NTB_CONN_TRANSPARENT:
929	default:
930		device_printf(ntb->device, "Unsupported connection type: %u\n",
931		    (unsigned)conn_type);
932		return (ENXIO);
933	}
934	return (0);
935}
936
937static int
938ntb_detect_soc(struct ntb_softc *ntb)
939{
940	uint32_t ppd, conn_type;
941
942	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
943	ntb->ppd = ppd;
944
945	if ((ppd & SOC_PPD_DEV_TYPE) != 0)
946		ntb->dev_type = NTB_DEV_DSD;
947	else
948		ntb->dev_type = NTB_DEV_USD;
949
950	conn_type = (ppd & SOC_PPD_CONN_TYPE) >> 8;
951	switch (conn_type) {
952	case NTB_CONN_B2B:
953		ntb->conn_type = conn_type;
954		break;
955	default:
956		device_printf(ntb->device, "Unsupported NTB configuration\n");
957		return (ENXIO);
958	}
959	return (0);
960}
961
962static int
963ntb_setup_xeon(struct ntb_softc *ntb)
964{
965
966	ntb->reg_ofs.ldb	= XEON_PDOORBELL_OFFSET;
967	ntb->reg_ofs.ldb_mask	= XEON_PDBMSK_OFFSET;
968	ntb->reg_ofs.spad_local	= XEON_SPAD_OFFSET;
969	ntb->reg_ofs.bar2_xlat	= XEON_SBAR2XLAT_OFFSET;
970	ntb->reg_ofs.bar4_xlat	= XEON_SBAR4XLAT_OFFSET;
971
972	switch (ntb->conn_type) {
973	case NTB_CONN_B2B:
974		/*
975		 * reg_ofs.rdb and reg_ofs.spad_remote are effectively ignored
976		 * with the NTB_REGS_THRU_MW errata mode enabled.  (See
977		 * ntb_ring_doorbell() and ntb_read/write_remote_spad().)
978		 */
979		ntb->reg_ofs.rdb	 = XEON_B2B_DOORBELL_OFFSET;
980		ntb->reg_ofs.spad_remote = XEON_B2B_SPAD_OFFSET;
981
982		ntb->limits.max_spads	 = XEON_MAX_SPADS;
983		break;
984
985	case NTB_CONN_RP:
986		/*
987		 * Every Xeon today needs NTB_REGS_THRU_MW, so punt on RP for
988		 * now.
989		 */
990		KASSERT(HAS_FEATURE(NTB_REGS_THRU_MW),
991		    ("Xeon without MW errata unimplemented"));
992		device_printf(ntb->device,
993		    "NTB-RP disabled to due hardware errata.\n");
994		return (ENXIO);
995
996	case NTB_CONN_TRANSPARENT:
997	default:
998		device_printf(ntb->device, "Connection type %d not supported\n",
999		    ntb->conn_type);
1000		return (ENXIO);
1001	}
1002
1003	/*
1004	 * There is a Xeon hardware errata related to writes to SDOORBELL or
1005	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
1006	 * which may hang the system.  To workaround this use the second memory
1007	 * window to access the interrupt and scratch pad registers on the
1008	 * remote system.
1009	 *
1010	 * There is another HW errata on the limit registers -- they can only
1011	 * be written when the base register is (?)4GB aligned and < 32-bit.
1012	 * This should already be the case based on the driver defaults, but
1013	 * write the limit registers first just in case.
1014	 */
1015	if (HAS_FEATURE(NTB_REGS_THRU_MW)) {
1016		/* Reserve the last MW for mapping remote spad */
1017		ntb->limits.max_mw--;
1018		/*
1019		 * Set the Limit register to 4k, the minimum size, to prevent
1020		 * an illegal access.
1021		 */
1022		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET,
1023		    ntb_get_mw_size(ntb, 1) + 0x1000);
1024	} else
1025		/*
1026		 * Disable the limit register, just in case it is set to
1027		 * something silly.
1028		 */
1029		ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
1030
1031
1032	ntb->reg_ofs.lnk_cntl	 = XEON_NTBCNTL_OFFSET;
1033	ntb->reg_ofs.lnk_stat	 = XEON_LINK_STATUS_OFFSET;
1034	ntb->reg_ofs.spci_cmd	 = XEON_PCICMD_OFFSET;
1035
1036	ntb->limits.max_db_bits	 = XEON_MAX_DB_BITS;
1037	ntb->limits.msix_cnt	 = XEON_MSIX_CNT;
1038	ntb->bits_per_vector	 = XEON_DB_BITS_PER_VEC;
1039
1040	/*
1041	 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
1042	 * mirrored to the remote system.  Shrink the number of bits by one,
1043	 * since bit 14 is the last bit.
1044	 *
1045	 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
1046	 * anyway.  Nor for non-B2B connection types.
1047	 */
1048	if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14) &&
1049	    !HAS_FEATURE(NTB_REGS_THRU_MW) &&
1050	    ntb->conn_type == NTB_CONN_B2B)
1051		ntb->limits.max_db_bits = XEON_MAX_DB_BITS - 1;
1052
1053	configure_xeon_secondary_side_bars(ntb);
1054
1055	/* Enable Bus Master and Memory Space on the secondary side */
1056	if (ntb->conn_type == NTB_CONN_B2B)
1057		ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1058		    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1059
1060	/* Enable link training */
1061	ntb_hw_link_up(ntb);
1062
1063	return (0);
1064}
1065
1066static int
1067ntb_setup_soc(struct ntb_softc *ntb)
1068{
1069
1070	KASSERT(ntb->conn_type == NTB_CONN_B2B,
1071	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
1072
1073	/* Initiate PCI-E link training */
1074	pci_write_config(ntb->device, NTB_PPD_OFFSET,
1075	    ntb->ppd | SOC_PPD_INIT_LINK, 4);
1076
1077	ntb->reg_ofs.ldb	 = SOC_PDOORBELL_OFFSET;
1078	ntb->reg_ofs.ldb_mask	 = SOC_PDBMSK_OFFSET;
1079	ntb->reg_ofs.rdb	 = SOC_B2B_DOORBELL_OFFSET;
1080	ntb->reg_ofs.bar2_xlat	 = SOC_SBAR2XLAT_OFFSET;
1081	ntb->reg_ofs.bar4_xlat	 = SOC_SBAR4XLAT_OFFSET;
1082	ntb->reg_ofs.lnk_cntl	 = SOC_NTBCNTL_OFFSET;
1083	ntb->reg_ofs.lnk_stat	 = SOC_LINK_STATUS_OFFSET;
1084	ntb->reg_ofs.spad_local	 = SOC_SPAD_OFFSET;
1085	ntb->reg_ofs.spad_remote = SOC_B2B_SPAD_OFFSET;
1086	ntb->reg_ofs.spci_cmd	 = SOC_PCICMD_OFFSET;
1087
1088	ntb->limits.max_spads	 = SOC_MAX_SPADS;
1089	ntb->limits.max_db_bits	 = SOC_MAX_DB_BITS;
1090	ntb->limits.msix_cnt	 = SOC_MSIX_CNT;
1091	ntb->bits_per_vector	 = SOC_DB_BITS_PER_VEC;
1092
1093	/*
1094	 * FIXME - MSI-X bug on early SOC HW, remove once internal issue is
1095	 * resolved.  Mask transaction layer internal parity errors.
1096	 */
1097	pci_write_config(ntb->device, 0xFC, 0x4, 4);
1098
1099	configure_soc_secondary_side_bars(ntb);
1100
1101	/* Enable Bus Master and Memory Space on the secondary side */
1102	ntb_reg_write(2, ntb->reg_ofs.spci_cmd,
1103	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
1104
1105	callout_reset(&ntb->heartbeat_timer, 0, ntb_handle_heartbeat, ntb);
1106
1107	return (0);
1108}
1109
1110static void
1111configure_soc_secondary_side_bars(struct ntb_softc *ntb)
1112{
1113
1114	if (ntb->dev_type == NTB_DEV_USD) {
1115		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1116		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_USD_ADDR);
1117		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_USD_ADDR);
1118		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_USD_ADDR);
1119	} else {
1120		ntb_reg_write(8, SOC_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1121		ntb_reg_write(8, SOC_PBAR4XLAT_OFFSET, PBAR4XLAT_DSD_ADDR);
1122		ntb_reg_write(8, SOC_MBAR23_OFFSET, MBAR23_DSD_ADDR);
1123		ntb_reg_write(8, SOC_MBAR45_OFFSET, MBAR45_DSD_ADDR);
1124	}
1125}
1126
1127static void
1128configure_xeon_secondary_side_bars(struct ntb_softc *ntb)
1129{
1130
1131	if (ntb->dev_type == NTB_DEV_USD) {
1132		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_USD_ADDR);
1133		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1134			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1135			    MBAR01_DSD_ADDR);
1136		else {
1137			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1138			    PBAR4XLAT_USD_ADDR);
1139			/*
1140			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1141			 * written 32 bits at a time.
1142			 */
1143			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1144			    MBAR01_DSD_ADDR & 0xffffffff);
1145			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1146			    MBAR01_DSD_ADDR >> 32);
1147		}
1148		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_USD_ADDR);
1149		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_USD_ADDR);
1150		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_USD_ADDR);
1151	} else {
1152		ntb_reg_write(8, XEON_PBAR2XLAT_OFFSET, PBAR2XLAT_DSD_ADDR);
1153		if (HAS_FEATURE(NTB_REGS_THRU_MW))
1154			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1155			    MBAR01_USD_ADDR);
1156		else {
1157			ntb_reg_write(8, XEON_PBAR4XLAT_OFFSET,
1158			    PBAR4XLAT_DSD_ADDR);
1159			/*
1160			 * B2B_XLAT_OFFSET is a 64-bit register but can only be
1161			 * written 32 bits at a time.
1162			 */
1163			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL,
1164			    MBAR01_USD_ADDR & 0xffffffff);
1165			ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU,
1166			    MBAR01_USD_ADDR >> 32);
1167		}
1168		ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, MBAR01_DSD_ADDR);
1169		ntb_reg_write(8, XEON_SBAR2BASE_OFFSET, MBAR23_DSD_ADDR);
1170		ntb_reg_write(8, XEON_SBAR4BASE_OFFSET, MBAR45_DSD_ADDR);
1171	}
1172}
1173
1174/* SOC does not have link status interrupt, poll on that platform */
1175static void
1176ntb_handle_heartbeat(void *arg)
1177{
1178	struct ntb_softc *ntb = arg;
1179	uint32_t status32;
1180	int rc;
1181
1182	rc = ntb_check_link_status(ntb);
1183	if (rc != 0)
1184		device_printf(ntb->device,
1185		    "Error determining link status\n");
1186
1187	/* Check to see if a link error is the cause of the link down */
1188	if (ntb->link_status == NTB_LINK_DOWN) {
1189		status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1190		if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0) {
1191			callout_reset(&ntb->lr_timer, 0, recover_soc_link,
1192			    ntb);
1193			return;
1194		}
1195	}
1196
1197	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1198	    ntb_handle_heartbeat, ntb);
1199}
1200
1201static void
1202soc_perform_link_restart(struct ntb_softc *ntb)
1203{
1204	uint32_t status;
1205
1206	/* Driver resets the NTB ModPhy lanes - magic! */
1207	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0xe0);
1208	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x40);
1209	ntb_reg_write(1, SOC_MODPHY_PCSREG4, 0x60);
1210	ntb_reg_write(1, SOC_MODPHY_PCSREG6, 0x60);
1211
1212	/* Driver waits 100ms to allow the NTB ModPhy to settle */
1213	pause("ModPhy", hz / 10);
1214
1215	/* Clear AER Errors, write to clear */
1216	status = ntb_reg_read(4, SOC_ERRCORSTS_OFFSET);
1217	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
1218	ntb_reg_write(4, SOC_ERRCORSTS_OFFSET, status);
1219
1220	/* Clear unexpected electrical idle event in LTSSM, write to clear */
1221	status = ntb_reg_read(4, SOC_LTSSMERRSTS0_OFFSET);
1222	status |= SOC_LTSSMERRSTS0_UNEXPECTEDEI;
1223	ntb_reg_write(4, SOC_LTSSMERRSTS0_OFFSET, status);
1224
1225	/* Clear DeSkew Buffer error, write to clear */
1226	status = ntb_reg_read(4, SOC_DESKEWSTS_OFFSET);
1227	status |= SOC_DESKEWSTS_DBERR;
1228	ntb_reg_write(4, SOC_DESKEWSTS_OFFSET, status);
1229
1230	status = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1231	status &= SOC_IBIST_ERR_OFLOW;
1232	ntb_reg_write(4, SOC_IBSTERRRCRVSTS0_OFFSET, status);
1233
1234	/* Releases the NTB state machine to allow the link to retrain */
1235	status = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1236	status &= ~SOC_LTSSMSTATEJMP_FORCEDETECT;
1237	ntb_reg_write(4, SOC_LTSSMSTATEJMP_OFFSET, status);
1238}
1239
1240static void
1241ntb_handle_link_event(struct ntb_softc *ntb, int link_state)
1242{
1243	enum ntb_hw_event event;
1244	uint16_t status;
1245
1246	if (ntb->link_status == link_state)
1247		return;
1248
1249	if (link_state == NTB_LINK_UP) {
1250		device_printf(ntb->device, "Link Up\n");
1251		ntb->link_status = NTB_LINK_UP;
1252		event = NTB_EVENT_HW_LINK_UP;
1253
1254		if (ntb->type == NTB_SOC ||
1255		    ntb->conn_type == NTB_CONN_TRANSPARENT)
1256			status = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1257		else
1258			status = pci_read_config(ntb->device,
1259			    XEON_LINK_STATUS_OFFSET, 2);
1260		ntb->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
1261		ntb->link_speed = (status & NTB_LINK_SPEED_MASK);
1262		device_printf(ntb->device, "Link Width %d, Link Speed %d\n",
1263		    ntb->link_width, ntb->link_speed);
1264		callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1265		    ntb_handle_heartbeat, ntb);
1266	} else {
1267		device_printf(ntb->device, "Link Down\n");
1268		ntb->link_status = NTB_LINK_DOWN;
1269		event = NTB_EVENT_HW_LINK_DOWN;
1270		/* Do not modify link width/speed, we need it in link recovery */
1271	}
1272
1273	/* notify the upper layer if we have an event change */
1274	if (ntb->event_cb != NULL)
1275		ntb->event_cb(ntb->ntb_transport, event);
1276}
1277
1278static void
1279ntb_hw_link_up(struct ntb_softc *ntb)
1280{
1281	uint32_t cntl;
1282
1283	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1284		ntb_handle_link_event(ntb, NTB_LINK_UP);
1285		return;
1286	}
1287
1288	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1289	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
1290	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
1291	cntl |= NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP;
1292	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1293}
1294
1295static void
1296ntb_hw_link_down(struct ntb_softc *ntb)
1297{
1298	uint32_t cntl;
1299
1300	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
1301		ntb_handle_link_event(ntb, NTB_LINK_DOWN);
1302		return;
1303	}
1304
1305	cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1306	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
1307	cntl &= ~(NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP);
1308	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
1309	ntb_reg_write(4, ntb->reg_ofs.lnk_cntl, cntl);
1310}
1311
1312static void
1313recover_soc_link(void *arg)
1314{
1315	struct ntb_softc *ntb = arg;
1316	uint8_t speed, width;
1317	uint32_t status32;
1318	uint16_t status16;
1319
1320	soc_perform_link_restart(ntb);
1321
1322	/*
1323	 * There is a potential race between the 2 NTB devices recovering at
1324	 * the same time.  If the times are the same, the link will not recover
1325	 * and the driver will be stuck in this loop forever.  Add a random
1326	 * interval to the recovery time to prevent this race.
1327	 */
1328	status32 = arc4random() % SOC_LINK_RECOVERY_TIME;
1329	pause("Link", (SOC_LINK_RECOVERY_TIME + status32) * hz / 1000);
1330
1331	status32 = ntb_reg_read(4, SOC_LTSSMSTATEJMP_OFFSET);
1332	if ((status32 & SOC_LTSSMSTATEJMP_FORCEDETECT) != 0)
1333		goto retry;
1334
1335	status32 = ntb_reg_read(4, SOC_IBSTERRRCRVSTS0_OFFSET);
1336	if ((status32 & SOC_IBIST_ERR_OFLOW) != 0)
1337		goto retry;
1338
1339	status32 = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1340	if ((status32 & SOC_CNTL_LINK_DOWN) != 0)
1341		goto out;
1342
1343	status16 = ntb_reg_read(2, ntb->reg_ofs.lnk_stat);
1344	width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
1345	speed = (status16 & NTB_LINK_SPEED_MASK);
1346	if (ntb->link_width != width || ntb->link_speed != speed)
1347		goto retry;
1348
1349out:
1350	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz,
1351	    ntb_handle_heartbeat, ntb);
1352	return;
1353
1354retry:
1355	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_soc_link,
1356	    ntb);
1357}
1358
1359static int
1360ntb_check_link_status(struct ntb_softc *ntb)
1361{
1362	int link_state;
1363	uint32_t ntb_cntl;
1364	uint16_t status;
1365
1366	if (ntb->type == NTB_SOC) {
1367		ntb_cntl = ntb_reg_read(4, ntb->reg_ofs.lnk_cntl);
1368		if ((ntb_cntl & SOC_CNTL_LINK_DOWN) != 0)
1369			link_state = NTB_LINK_DOWN;
1370		else
1371			link_state = NTB_LINK_UP;
1372	} else {
1373		status = pci_read_config(ntb->device, XEON_LINK_STATUS_OFFSET,
1374		    2);
1375
1376		if ((status & NTB_LINK_STATUS_ACTIVE) != 0)
1377			link_state = NTB_LINK_UP;
1378		else
1379			link_state = NTB_LINK_DOWN;
1380	}
1381
1382	ntb_handle_link_event(ntb, link_state);
1383
1384	return (0);
1385}
1386
1387/**
1388 * ntb_register_event_callback() - register event callback
1389 * @ntb: pointer to ntb_softc instance
1390 * @func: callback function to register
1391 *
1392 * This function registers a callback for any HW driver events such as link
1393 * up/down, power management notices and etc.
1394 *
1395 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1396 */
1397int
1398ntb_register_event_callback(struct ntb_softc *ntb, ntb_event_callback func)
1399{
1400
1401	if (ntb->event_cb != NULL)
1402		return (EINVAL);
1403
1404	ntb->event_cb = func;
1405
1406	return (0);
1407}
1408
1409/**
1410 * ntb_unregister_event_callback() - unregisters the event callback
1411 * @ntb: pointer to ntb_softc instance
1412 *
1413 * This function unregisters the existing callback from transport
1414 */
1415void
1416ntb_unregister_event_callback(struct ntb_softc *ntb)
1417{
1418
1419	ntb->event_cb = NULL;
1420}
1421
1422static void
1423ntb_irq_work(void *arg)
1424{
1425	struct ntb_db_cb *db_cb = arg;
1426	struct ntb_softc *ntb;
1427	int rc;
1428
1429	rc = db_cb->callback(db_cb->data, db_cb->db_num);
1430	/* Poll if forward progress was made. */
1431	if (rc != 0) {
1432		callout_reset(&db_cb->irq_work, 0, ntb_irq_work, db_cb);
1433		return;
1434	}
1435
1436	/* Unmask interrupt if no progress was made. */
1437	ntb = db_cb->ntb;
1438	unmask_ldb_interrupt(ntb, db_cb->db_num);
1439}
1440
1441/**
1442 * ntb_register_db_callback() - register a callback for doorbell interrupt
1443 * @ntb: pointer to ntb_softc instance
1444 * @idx: doorbell index to register callback, zero based
1445 * @data: pointer to be returned to caller with every callback
1446 * @func: callback function to register
1447 *
1448 * This function registers a callback function for the doorbell interrupt
1449 * on the primary side. The function will unmask the doorbell as well to
1450 * allow interrupt.
1451 *
1452 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1453 */
1454int
1455ntb_register_db_callback(struct ntb_softc *ntb, unsigned int idx, void *data,
1456    ntb_db_callback func)
1457{
1458	struct ntb_db_cb *db_cb = &ntb->db_cb[idx];
1459
1460	if (idx >= ntb->max_cbs || db_cb->callback != NULL || db_cb->reserved) {
1461		device_printf(ntb->device, "Invalid Index.\n");
1462		return (EINVAL);
1463	}
1464
1465	db_cb->callback = func;
1466	db_cb->data = data;
1467	callout_init(&db_cb->irq_work, 1);
1468
1469	unmask_ldb_interrupt(ntb, idx);
1470
1471	return (0);
1472}
1473
1474/**
1475 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
1476 * @ntb: pointer to ntb_softc instance
1477 * @idx: doorbell index to register callback, zero based
1478 *
1479 * This function unregisters a callback function for the doorbell interrupt
1480 * on the primary side. The function will also mask the said doorbell.
1481 */
1482void
1483ntb_unregister_db_callback(struct ntb_softc *ntb, unsigned int idx)
1484{
1485
1486	if (idx >= ntb->max_cbs || ntb->db_cb[idx].callback == NULL)
1487		return;
1488
1489	mask_ldb_interrupt(ntb, idx);
1490
1491	callout_drain(&ntb->db_cb[idx].irq_work);
1492	ntb->db_cb[idx].callback = NULL;
1493}
1494
1495/**
1496 * ntb_find_transport() - find the transport pointer
1497 * @transport: pointer to pci device
1498 *
1499 * Given the pci device pointer, return the transport pointer passed in when
1500 * the transport attached when it was inited.
1501 *
1502 * RETURNS: pointer to transport.
1503 */
1504void *
1505ntb_find_transport(struct ntb_softc *ntb)
1506{
1507
1508	return (ntb->ntb_transport);
1509}
1510
1511/**
1512 * ntb_register_transport() - Register NTB transport with NTB HW driver
1513 * @transport: transport identifier
1514 *
1515 * This function allows a transport to reserve the hardware driver for
1516 * NTB usage.
1517 *
1518 * RETURNS: pointer to ntb_softc, NULL on error.
1519 */
1520struct ntb_softc *
1521ntb_register_transport(struct ntb_softc *ntb, void *transport)
1522{
1523
1524	/*
1525	 * TODO: when we have more than one transport, we will need to rewrite
1526	 * this to prevent race conditions
1527	 */
1528	if (ntb->ntb_transport != NULL)
1529		return (NULL);
1530
1531	ntb->ntb_transport = transport;
1532	return (ntb);
1533}
1534
1535/**
1536 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
1537 * @ntb - ntb_softc of the transport to be freed
1538 *
1539 * This function unregisters the transport from the HW driver and performs any
1540 * necessary cleanups.
1541 */
1542void
1543ntb_unregister_transport(struct ntb_softc *ntb)
1544{
1545	uint8_t i;
1546
1547	if (ntb->ntb_transport == NULL)
1548		return;
1549
1550	for (i = 0; i < ntb->max_cbs; i++)
1551		ntb_unregister_db_callback(ntb, i);
1552
1553	ntb_unregister_event_callback(ntb);
1554	ntb->ntb_transport = NULL;
1555}
1556
1557/**
1558 * ntb_get_max_spads() - get the total scratch regs usable
1559 * @ntb: pointer to ntb_softc instance
1560 *
1561 * This function returns the max 32bit scratchpad registers usable by the
1562 * upper layer.
1563 *
1564 * RETURNS: total number of scratch pad registers available
1565 */
1566uint8_t
1567ntb_get_max_spads(struct ntb_softc *ntb)
1568{
1569
1570	return (ntb->limits.max_spads);
1571}
1572
1573uint8_t
1574ntb_get_max_cbs(struct ntb_softc *ntb)
1575{
1576
1577	return (ntb->max_cbs);
1578}
1579
1580uint8_t
1581ntb_get_max_mw(struct ntb_softc *ntb)
1582{
1583
1584	return (ntb->limits.max_mw);
1585}
1586
1587/**
1588 * ntb_write_local_spad() - write to the secondary scratchpad register
1589 * @ntb: pointer to ntb_softc instance
1590 * @idx: index to the scratchpad register, 0 based
1591 * @val: the data value to put into the register
1592 *
1593 * This function allows writing of a 32bit value to the indexed scratchpad
1594 * register. The register resides on the secondary (external) side.
1595 *
1596 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1597 */
1598int
1599ntb_write_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1600{
1601
1602	if (idx >= ntb->limits.max_spads)
1603		return (EINVAL);
1604
1605	ntb_reg_write(4, ntb->reg_ofs.spad_local + idx * 4, val);
1606
1607	return (0);
1608}
1609
1610/**
1611 * ntb_read_local_spad() - read from the primary scratchpad register
1612 * @ntb: pointer to ntb_softc instance
1613 * @idx: index to scratchpad register, 0 based
1614 * @val: pointer to 32bit integer for storing the register value
1615 *
1616 * This function allows reading of the 32bit scratchpad register on
1617 * the primary (internal) side.
1618 *
1619 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1620 */
1621int
1622ntb_read_local_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1623{
1624
1625	if (idx >= ntb->limits.max_spads)
1626		return (EINVAL);
1627
1628	*val = ntb_reg_read(4, ntb->reg_ofs.spad_local + idx * 4);
1629
1630	return (0);
1631}
1632
1633/**
1634 * ntb_write_remote_spad() - write to the secondary scratchpad register
1635 * @ntb: pointer to ntb_softc instance
1636 * @idx: index to the scratchpad register, 0 based
1637 * @val: the data value to put into the register
1638 *
1639 * This function allows writing of a 32bit value to the indexed scratchpad
1640 * register. The register resides on the secondary (external) side.
1641 *
1642 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1643 */
1644int
1645ntb_write_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
1646{
1647
1648	if (idx >= ntb->limits.max_spads)
1649		return (EINVAL);
1650
1651	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1652		ntb_mw_write(4, XEON_SHADOW_SPAD_OFFSET + idx * 4, val);
1653	else
1654		ntb_reg_write(4, ntb->reg_ofs.spad_remote + idx * 4, val);
1655
1656	return (0);
1657}
1658
1659/**
1660 * ntb_read_remote_spad() - read from the primary scratchpad register
1661 * @ntb: pointer to ntb_softc instance
1662 * @idx: index to scratchpad register, 0 based
1663 * @val: pointer to 32bit integer for storing the register value
1664 *
1665 * This function allows reading of the 32bit scratchpad register on
1666 * the primary (internal) side.
1667 *
1668 * RETURNS: An appropriate ERRNO error value on error, or zero for success.
1669 */
1670int
1671ntb_read_remote_spad(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
1672{
1673
1674	if (idx >= ntb->limits.max_spads)
1675		return (EINVAL);
1676
1677	if (HAS_FEATURE(NTB_REGS_THRU_MW))
1678		*val = ntb_mw_read(4, XEON_SHADOW_SPAD_OFFSET + idx * 4);
1679	else
1680		*val = ntb_reg_read(4, ntb->reg_ofs.spad_remote + idx * 4);
1681
1682	return (0);
1683}
1684
1685/**
1686 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
1687 * @ntb: pointer to ntb_softc instance
1688 * @mw: memory window number
1689 *
1690 * This function provides the base virtual address of the memory window
1691 * specified.
1692 *
1693 * RETURNS: pointer to virtual address, or NULL on error.
1694 */
1695void *
1696ntb_get_mw_vbase(struct ntb_softc *ntb, unsigned int mw)
1697{
1698
1699	if (mw >= ntb_get_max_mw(ntb))
1700		return (NULL);
1701
1702	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].vbase);
1703}
1704
1705vm_paddr_t
1706ntb_get_mw_pbase(struct ntb_softc *ntb, unsigned int mw)
1707{
1708
1709	if (mw >= ntb_get_max_mw(ntb))
1710		return (0);
1711
1712	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].pbase);
1713}
1714
1715/**
1716 * ntb_get_mw_size() - return size of NTB memory window
1717 * @ntb: pointer to ntb_softc instance
1718 * @mw: memory window number
1719 *
1720 * This function provides the physical size of the memory window specified
1721 *
1722 * RETURNS: the size of the memory window or zero on error
1723 */
1724u_long
1725ntb_get_mw_size(struct ntb_softc *ntb, unsigned int mw)
1726{
1727
1728	if (mw >= ntb_get_max_mw(ntb))
1729		return (0);
1730
1731	return (ntb->bar_info[NTB_MW_TO_BAR(mw)].size);
1732}
1733
1734/**
1735 * ntb_set_mw_addr - set the memory window address
1736 * @ntb: pointer to ntb_softc instance
1737 * @mw: memory window number
1738 * @addr: base address for data
1739 *
1740 * This function sets the base physical address of the memory window.  This
1741 * memory address is where data from the remote system will be transfered into
1742 * or out of depending on how the transport is configured.
1743 */
1744void
1745ntb_set_mw_addr(struct ntb_softc *ntb, unsigned int mw, uint64_t addr)
1746{
1747
1748	if (mw >= ntb_get_max_mw(ntb))
1749		return;
1750
1751	switch (NTB_MW_TO_BAR(mw)) {
1752	case NTB_B2B_BAR_1:
1753		ntb_reg_write(8, ntb->reg_ofs.bar2_xlat, addr);
1754		break;
1755	case NTB_B2B_BAR_2:
1756		ntb_reg_write(8, ntb->reg_ofs.bar4_xlat, addr);
1757		break;
1758	}
1759}
1760
1761/**
1762 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
1763 * @ntb: pointer to ntb_softc instance
1764 * @db: doorbell to ring
1765 *
1766 * This function allows triggering of a doorbell on the secondary/external
1767 * side that will initiate an interrupt on the remote host
1768 */
1769void
1770ntb_ring_doorbell(struct ntb_softc *ntb, unsigned int db)
1771{
1772	uint64_t bit;
1773
1774	if (ntb->type == NTB_SOC)
1775		bit = 1 << db;
1776	else
1777		bit = ((1 << ntb->bits_per_vector) - 1) <<
1778		    (db * ntb->bits_per_vector);
1779
1780	if (HAS_FEATURE(NTB_REGS_THRU_MW)) {
1781		ntb_mw_write(2, XEON_SHADOW_PDOORBELL_OFFSET, bit);
1782		return;
1783	}
1784
1785	db_iowrite(ntb, ntb->reg_ofs.rdb, bit);
1786}
1787
1788/**
1789 * ntb_query_link_status() - return the hardware link status
1790 * @ndev: pointer to ntb_device instance
1791 *
1792 * Returns true if the hardware is connected to the remote system
1793 *
1794 * RETURNS: true or false based on the hardware link state
1795 */
1796bool
1797ntb_query_link_status(struct ntb_softc *ntb)
1798{
1799
1800	return (ntb->link_status == NTB_LINK_UP);
1801}
1802
1803static void
1804save_bar_parameters(struct ntb_pci_bar_info *bar)
1805{
1806
1807	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
1808	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
1809	bar->pbase = rman_get_start(bar->pci_resource);
1810	bar->size = rman_get_size(bar->pci_resource);
1811	bar->vbase = rman_get_virtual(bar->pci_resource);
1812}
1813
1814device_t
1815ntb_get_device(struct ntb_softc *ntb)
1816{
1817
1818	return (ntb->device);
1819}
1820
1821/* Export HW-specific errata information. */
1822bool
1823ntb_has_feature(struct ntb_softc *ntb, uint64_t feature)
1824{
1825
1826	return (HAS_FEATURE(feature));
1827}
1828