1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2015 Bjoern A. Zeeb
5 * Copyright (c) 2020 Denis Salopek
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
9 * ("MRC2"), as part of the DARPA MRC research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/11/sys/dev/sume/if_sume.c 364973 2020-08-30 07:34:32Z zec $");
35
36#include <sys/param.h>
37#include <sys/bus.h>
38#include <sys/endian.h>
39#include <sys/kernel.h>
40#include <sys/limits.h>
41#include <sys/module.h>
42#include <sys/rman.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/sysctl.h>
46#include <sys/taskqueue.h>
47
48#include <net/if.h>
49#include <net/if_media.h>
50#include <net/if_types.h>
51#include <net/if_var.h>
52
53#include <netinet/in.h>
54#include <netinet/if_ether.h>
55
56#include <dev/pci/pcivar.h>
57#include <dev/pci/pcireg.h>
58
59#include <machine/bus.h>
60
61#include "adapter.h"
62
63#define	PCI_VENDOR_ID_XILINX	0x10ee
64#define	PCI_DEVICE_ID_SUME	0x7028
65
66/* SUME bus driver interface */
67static int sume_probe(device_t);
68static int sume_attach(device_t);
69static int sume_detach(device_t);
70
71static device_method_t sume_methods[] = {
72	DEVMETHOD(device_probe,		sume_probe),
73	DEVMETHOD(device_attach,	sume_attach),
74	DEVMETHOD(device_detach,	sume_detach),
75	DEVMETHOD_END
76};
77
78static driver_t sume_driver = {
79	"sume",
80	sume_methods,
81	sizeof(struct sume_adapter)
82};
83
84/*
85 * The DMA engine for SUME generates interrupts for each RX/TX transaction.
86 * Depending on the channel (0 if packet transaction, 1 if register transaction)
87 * the used bits of the interrupt vector will be the lowest or the second lowest
88 * 5 bits.
89 *
90 * When receiving packets from SUME (RX):
91 * (1) SUME received a packet on one of the interfaces.
92 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
93 *     transaction).
94 * (3) We read the length of the incoming packet and the offset along with the
95 *     'last' flag from the SUME registers.
96 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
97 *     address buf_addr. For now, this is how it's done:
98 *     - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
99 *     address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
100 *     and length of incoming data (buf_addr[2]).
101 *     - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
102 *     physical address buf_hw_addr is a block of contiguous memory mapped to
103 *     buf_addr, so we can set the incoming data's physical address (buf_addr[0]
104 *     and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
105 * (5) We notify SUME that the bouncebuffer is ready for the transaction by
106 *     writing the lower/upper physical address buf_hw_addr to the SUME
107 *     registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
108 *     well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
109 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
110 *     bouncebuffer received).
111 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
112 *     transaction is done).
113 * (8) SUME can do both steps (6) and (7) using the same interrupt.
114 * (8) We read the first 16 bytes (metadata) of the received data and note the
115 *     incoming interface so we can later forward it to the right one in the OS
116 *     (sume0, sume1, sume2 or sume3).
117 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
118 *     and set the mbuf rcvif to the incoming interface.
119 * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
120 *
121 * When sending packets to SUME (TX):
122 * (1) The OS calls sume_if_start() function on TX.
123 * (2) We get the mbuf packet data and copy it to the
124 *     buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
125 * (3) We create the metadata based on the output interface and copy it to the
126 *     buf_addr+3*sizeof(uint32_t).
127 * (4) We write the offset/last and length of the packet to the SUME registers
128 *     RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
129 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
130 *     with the physical address and length just as in RX step (4).
131 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
132 *     registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
133 *     RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
134 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
135 *     bouncebuffer is read).
136 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
137 *     transaction is done).
138 * (9) SUME can do both steps (7) and (8) using the same interrupt.
139 *
140 * Internal registers
141 * Every module in the SUME hardware has its own set of internal registers
142 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
143 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
144 * offsets to different memory locations of every module are defined in their
145 * corresponding folder inside the library. These registers can be RO/RW and
146 * there is a special method to fetch/change this data over 1 or 2 DMA
147 * transactions. For writing, by calling the sume_module_reg_write(). For
148 * reading, by calling the sume_module_reg_write() and then
149 * sume_module_reg_read(). Check those functions for more information.
150 */
151
152MALLOC_DECLARE(M_SUME);
153MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
154
155static void check_tx_queues(struct sume_adapter *);
156static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
157    uint64_t);
158
159static struct unrhdr *unr;
160
161static struct {
162	uint16_t device;
163	char *desc;
164} sume_pciids[] = {
165	{PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
166};
167
168static inline uint32_t
169read_reg(struct sume_adapter *adapter, int offset)
170{
171
172	return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
173}
174
175static inline void
176write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
177{
178
179	bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
180}
181
182static int
183sume_probe(device_t dev)
184{
185	int i;
186	uint16_t v = pci_get_vendor(dev);
187	uint16_t d = pci_get_device(dev);
188
189	if (v != PCI_VENDOR_ID_XILINX)
190		return (ENXIO);
191
192	for (i = 0; i < nitems(sume_pciids); i++) {
193		if (d == sume_pciids[i].device) {
194			device_set_desc(dev, sume_pciids[i].desc);
195			return (BUS_PROBE_DEFAULT);
196		}
197	}
198
199	return (ENXIO);
200}
201
202/*
203 * Building mbuf for packet received from SUME. We expect to receive 'len'
204 * bytes of data (including metadata) written from the bouncebuffer address
205 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
206 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
207 * and the magic word needs to be 0xcafe. When we have the packet data, we
208 * create an mbuf and copy the data to it using m_copyback() function, set the
209 * correct interface to rcvif and return the mbuf to be later sent to the OS
210 * with if_input.
211 */
212static struct mbuf *
213sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
214{
215	struct nf_priv *nf_priv;
216	struct mbuf *m;
217	struct ifnet *ifp = NULL;
218	int np;
219	uint16_t dport, plen, magic;
220	device_t dev = adapter->dev;
221	uint8_t *indata = (uint8_t *)
222	    adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
223	    sizeof(struct nf_bb_desc);
224	struct nf_metadata *mdata = (struct nf_metadata *) indata;
225
226	/* The metadata header is 16 bytes. */
227	if (len < sizeof(struct nf_metadata)) {
228		device_printf(dev, "short frame (%d)\n", len);
229		adapter->packets_err++;
230		adapter->bytes_err += len;
231		return (NULL);
232	}
233
234	dport = le16toh(mdata->dport);
235	plen = le16toh(mdata->plen);
236	magic = le16toh(mdata->magic);
237
238	if (sizeof(struct nf_metadata) + plen > len ||
239	    magic != SUME_RIFFA_MAGIC) {
240		device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
241		    "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
242		    len, magic, SUME_RIFFA_MAGIC);
243		return (NULL);
244	}
245
246	/* We got the packet from one of the even bits */
247	np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
248	if (np > SUME_NPORTS) {
249		device_printf(dev, "invalid destination port 0x%04x (%d)\n",
250		    dport, np);
251		adapter->packets_err++;
252		adapter->bytes_err += plen;
253		return (NULL);
254	}
255	ifp = adapter->ifp[np];
256	nf_priv = ifp->if_softc;
257	nf_priv->stats.rx_packets++;
258	nf_priv->stats.rx_bytes += plen;
259
260	/* If the interface is down, well, we are done. */
261	if (!(ifp->if_flags & IFF_UP)) {
262		nf_priv->stats.ifc_down_packets++;
263		nf_priv->stats.ifc_down_bytes += plen;
264		return (NULL);
265	}
266
267	if (adapter->sume_debug)
268		printf("Building mbuf with length: %d\n", plen);
269
270	m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
271	if (m == NULL) {
272		adapter->packets_err++;
273		adapter->bytes_err += plen;
274		return (NULL);
275	}
276
277	/* Copy the data in at the right offset. */
278	m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
279	m->m_pkthdr.rcvif = ifp;
280
281	return (m);
282}
283
284/*
285 * SUME interrupt handler for when we get a valid interrupt from the board.
286 * Theoretically, we can receive interrupt for any of the available channels,
287 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
288 * bit number, using 5 bits for every channel, the least significant bits
289 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
290 * bits for RX/TX are:
291 * RX
292 * bit 0 - new transaction from SUME
293 * bit 1 - SUME received our bouncebuffer address
294 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
295 * TX
296 * bit 3 - SUME received our bouncebuffer address
297 * bit 4 - SUME copied the data from our bouncebuffer, transaction done
298 *
299 * There are two finite state machines (one for TX, one for RX). We loop
300 * through channels 0 and 1 to check and our current state and which interrupt
301 * bit is set.
302 * TX
303 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
304 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
305 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
306 * to go to the next state.
307 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
308 * our packet). Then we get the length of the sent data and go back to the
309 * IDLE state.
310 * RX
311 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
312 * transaction). When we get it, we prepare our bouncebuffer for reading and
313 * trigger the SUME to start the transaction. Go to the next state.
314 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
315 * bouncebuffer). Go to the next state.
316 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
317 * we can build the mbuf and go back to the IDLE state.
318 */
319static void
320sume_intr_handler(void *arg)
321{
322	struct sume_adapter *adapter = arg;
323	uint32_t vect, vect0, len;
324	int ch, loops;
325	device_t dev = adapter->dev;
326	struct mbuf *m = NULL;
327	struct ifnet *ifp = NULL;
328	struct riffa_chnl_dir *send, *recv;
329
330	SUME_LOCK(adapter);
331
332	vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
333	if ((vect0 & SUME_INVALID_VECT) != 0) {
334		SUME_UNLOCK(adapter);
335		return;
336	}
337
338	/*
339	 * We only have one interrupt for all channels and no way
340	 * to quickly lookup for which channel(s) we got an interrupt?
341	 */
342	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
343		vect = vect0 >> (5 * ch);
344		send = adapter->send[ch];
345		recv = adapter->recv[ch];
346
347		loops = 0;
348		while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
349		    loops <= 5) {
350			if (adapter->sume_debug)
351				device_printf(dev, "TX ch %d state %u vect = "
352				    "0x%08x\n", ch, send->state, vect);
353			switch (send->state) {
354			case SUME_RIFFA_CHAN_STATE_IDLE:
355				break;
356			case SUME_RIFFA_CHAN_STATE_READY:
357				if (!(vect & SUME_MSI_TXBUF)) {
358					device_printf(dev, "ch %d unexpected "
359					    "interrupt in send+3 state %u: "
360					    "vect = 0x%08x\n", ch, send->state,
361					    vect);
362					send->recovery = 1;
363					break;
364				}
365				send->state = SUME_RIFFA_CHAN_STATE_READ;
366				vect &= ~SUME_MSI_TXBUF;
367				break;
368			case SUME_RIFFA_CHAN_STATE_READ:
369				if (!(vect & SUME_MSI_TXDONE)) {
370					device_printf(dev, "ch %d unexpected "
371					    "interrupt in send+4 state %u: "
372					    "vect = 0x%08x\n", ch, send->state,
373					    vect);
374					send->recovery = 1;
375					break;
376				}
377				send->state = SUME_RIFFA_CHAN_STATE_LEN;
378
379				len = read_reg(adapter, RIFFA_CHNL_REG(ch,
380				    RIFFA_RX_TNFR_LEN_REG_OFF));
381				if (ch == SUME_RIFFA_CHANNEL_DATA) {
382					send->state =
383					    SUME_RIFFA_CHAN_STATE_IDLE;
384					check_tx_queues(adapter);
385				} else if (ch == SUME_RIFFA_CHANNEL_REG)
386					wakeup(&send->event);
387				else {
388					device_printf(dev, "ch %d unexpected "
389					    "interrupt in send+4 state %u: "
390					    "vect = 0x%08x\n", ch, send->state,
391					    vect);
392					send->recovery = 1;
393				}
394				vect &= ~SUME_MSI_TXDONE;
395				break;
396			case SUME_RIFFA_CHAN_STATE_LEN:
397				break;
398			default:
399				device_printf(dev, "unknown TX state!\n");
400			}
401			loops++;
402		}
403
404		if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
405		    send->recovery)
406			device_printf(dev, "ch %d ignoring vect = 0x%08x "
407			    "during TX; not in recovery; state = %d loops = "
408			    "%d\n", ch, vect, send->state, loops);
409
410		loops = 0;
411		while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
412		    SUME_MSI_RXDONE)) && loops < 5) {
413			if (adapter->sume_debug)
414				device_printf(dev, "RX ch %d state %u vect = "
415				    "0x%08x\n", ch, recv->state, vect);
416			switch (recv->state) {
417			case SUME_RIFFA_CHAN_STATE_IDLE:
418				if (!(vect & SUME_MSI_RXQUE)) {
419					device_printf(dev, "ch %d unexpected "
420					    "interrupt in recv+0 state %u: "
421					    "vect = 0x%08x\n", ch, recv->state,
422					    vect);
423					recv->recovery = 1;
424					break;
425				}
426				uint32_t max_ptr;
427
428				/* Clear recovery state. */
429				recv->recovery = 0;
430
431				/* Get offset and length. */
432				recv->offlast = read_reg(adapter,
433				    RIFFA_CHNL_REG(ch,
434				    RIFFA_TX_OFFLAST_REG_OFF));
435				recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
436				    RIFFA_TX_LEN_REG_OFF));
437
438				/* Boundary checks. */
439				max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
440				    + SUME_RIFFA_OFFSET(recv->offlast)
441				    + SUME_RIFFA_LEN(recv->len) - 1);
442				if (max_ptr <
443				    (uint32_t)((uintptr_t)recv->buf_addr))
444					device_printf(dev, "receive buffer "
445					    "wrap-around overflow.\n");
446				if (SUME_RIFFA_OFFSET(recv->offlast) +
447				    SUME_RIFFA_LEN(recv->len) >
448				    adapter->sg_buf_size)
449					device_printf(dev, "receive buffer too"
450					    " small.\n");
451
452				/* Fill the bouncebuf "descriptor". */
453				sume_fill_bb_desc(adapter, recv,
454				    SUME_RIFFA_LEN(recv->len));
455
456				bus_dmamap_sync(recv->ch_tag, recv->ch_map,
457				    BUS_DMASYNC_PREREAD |
458				    BUS_DMASYNC_PREWRITE);
459				write_reg(adapter, RIFFA_CHNL_REG(ch,
460				    RIFFA_TX_SG_ADDR_LO_REG_OFF),
461				    SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
462				write_reg(adapter, RIFFA_CHNL_REG(ch,
463				    RIFFA_TX_SG_ADDR_HI_REG_OFF),
464				    SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
465				write_reg(adapter, RIFFA_CHNL_REG(ch,
466				    RIFFA_TX_SG_LEN_REG_OFF),
467				    4 * recv->num_sg);
468				bus_dmamap_sync(recv->ch_tag, recv->ch_map,
469				    BUS_DMASYNC_POSTREAD |
470				    BUS_DMASYNC_POSTWRITE);
471
472				recv->state = SUME_RIFFA_CHAN_STATE_READY;
473				vect &= ~SUME_MSI_RXQUE;
474				break;
475			case SUME_RIFFA_CHAN_STATE_READY:
476				if (!(vect & SUME_MSI_RXBUF)) {
477					device_printf(dev, "ch %d unexpected "
478					    "interrupt in recv+1 state %u: "
479					    "vect = 0x%08x\n", ch, recv->state,
480					    vect);
481					recv->recovery = 1;
482					break;
483				}
484				recv->state = SUME_RIFFA_CHAN_STATE_READ;
485				vect &= ~SUME_MSI_RXBUF;
486				break;
487			case SUME_RIFFA_CHAN_STATE_READ:
488				if (!(vect & SUME_MSI_RXDONE)) {
489					device_printf(dev, "ch %d unexpected "
490					    "interrupt in recv+2 state %u: "
491					    "vect = 0x%08x\n", ch, recv->state,
492					    vect);
493					recv->recovery = 1;
494					break;
495				}
496				len = read_reg(adapter, RIFFA_CHNL_REG(ch,
497				    RIFFA_TX_TNFR_LEN_REG_OFF));
498
499				/* Remember, len and recv->len are words. */
500				if (ch == SUME_RIFFA_CHANNEL_DATA) {
501					m = sume_rx_build_mbuf(adapter,
502					    len << 2);
503					recv->state =
504					    SUME_RIFFA_CHAN_STATE_IDLE;
505				} else if (ch == SUME_RIFFA_CHANNEL_REG)
506					wakeup(&recv->event);
507				else {
508					device_printf(dev, "ch %d unexpected "
509					    "interrupt in recv+2 state %u: "
510					    "vect = 0x%08x\n", ch, recv->state,
511					    vect);
512					recv->recovery = 1;
513				}
514				vect &= ~SUME_MSI_RXDONE;
515				break;
516			case SUME_RIFFA_CHAN_STATE_LEN:
517				break;
518			default:
519				device_printf(dev, "unknown RX state!\n");
520			}
521			loops++;
522		}
523
524		if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
525		    SUME_MSI_RXDONE)) && recv->recovery) {
526			device_printf(dev, "ch %d ignoring vect = 0x%08x "
527			    "during RX; not in recovery; state = %d, loops = "
528			    "%d\n", ch, vect, recv->state, loops);
529
530			/* Clean the unfinished transaction. */
531			if (ch == SUME_RIFFA_CHANNEL_REG &&
532			    vect & SUME_MSI_RXDONE) {
533				read_reg(adapter, RIFFA_CHNL_REG(ch,
534				    RIFFA_TX_TNFR_LEN_REG_OFF));
535				recv->recovery = 0;
536			}
537		}
538	}
539	SUME_UNLOCK(adapter);
540
541	if (m != NULL) {
542		ifp = m->m_pkthdr.rcvif;
543		(*ifp->if_input)(ifp, m);
544	}
545}
546
547/*
548 * As we cannot disable interrupt generation, ignore early interrupts by waiting
549 * for the adapter to go into the 'running' state.
550 */
551static int
552sume_intr_filter(void *arg)
553{
554	struct sume_adapter *adapter = arg;
555
556	if (adapter->running == 0)
557		return (FILTER_STRAY);
558
559	return (FILTER_SCHEDULE_THREAD);
560}
561
562static int
563sume_probe_riffa_pci(struct sume_adapter *adapter)
564{
565	device_t dev = adapter->dev;
566	int error, count, capmem;
567	uint32_t reg, devctl, linkctl;
568
569	pci_enable_busmaster(dev);
570
571	adapter->rid = PCIR_BAR(0);
572	adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
573	    &adapter->rid, RF_ACTIVE);
574	if (adapter->bar0_addr == NULL) {
575		device_printf(dev, "unable to allocate bus resource: "
576		    "BAR0 address\n");
577		return (ENXIO);
578	}
579	adapter->bt = rman_get_bustag(adapter->bar0_addr);
580	adapter->bh = rman_get_bushandle(adapter->bar0_addr);
581	adapter->bar0_len = rman_get_size(adapter->bar0_addr);
582	if (adapter->bar0_len != 1024) {
583		device_printf(dev, "BAR0 resource length %lu != 1024\n",
584		    adapter->bar0_len);
585		return (ENXIO);
586	}
587
588	count = pci_msi_count(dev);
589	error = pci_alloc_msi(dev, &count);
590	if (error) {
591		device_printf(dev, "unable to allocate bus resource: PCI "
592		    "MSI\n");
593		return (error);
594	}
595
596	adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
597	adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
598	    &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
599	if (adapter->irq.res == NULL) {
600		device_printf(dev, "unable to allocate bus resource: IRQ "
601		    "memory\n");
602		return (ENXIO);
603	}
604
605	error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
606	    INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
607	    &adapter->irq.tag);
608	if (error) {
609		device_printf(dev, "failed to setup interrupt for rid %d, name"
610		    " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
611		return (ENXIO);
612	}
613
614	if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
615		device_printf(dev, "PCI not PCIe capable\n");
616		return (ENXIO);
617	}
618
619	devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
620	pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
621	    PCIEM_CTL_EXT_TAG_FIELD), 2);
622
623	devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
624	pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
625	    PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
626
627	linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
628	pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
629	    PCIEM_LINK_CTL_RCB), 2);
630
631	reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
632	adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
633	adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
634
635	error = ENODEV;
636	/* Check bus master is enabled. */
637	if (((reg >> 4) & 0x1) != 1) {
638		device_printf(dev, "bus master not enabled: %d\n",
639		    (reg >> 4) & 0x1);
640		return (error);
641	}
642	/* Check link parameters are valid. */
643	if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
644		device_printf(dev, "link parameters not valid: %d %d\n",
645		    (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
646		return (error);
647	}
648	/* Check # of channels are within valid range. */
649	if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
650		device_printf(dev, "number of channels out of range: %d\n",
651		    reg & 0xf);
652		return (error);
653	}
654	/* Check bus width. */
655	if (((reg >> 19) & 0xf) == 0 ||
656	    ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
657		device_printf(dev, "bus width out of range: %d\n",
658		    (reg >> 19) & 0xf);
659		return (error);
660	}
661
662	device_printf(dev, "[riffa] # of channels: %d\n",
663	    reg & 0xf);
664	device_printf(dev, "[riffa] bus interface width: %d\n",
665	    ((reg >> 19) & 0xf) << 5);
666	device_printf(dev, "[riffa] bus master enabled: %d\n",
667	    (reg >> 4) & 0x1);
668	device_printf(dev, "[riffa] negotiated link width: %d\n",
669	    (reg >> 5) & 0x3f);
670	device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
671	    ((reg >> 11) & 0x3) * 2500);
672	device_printf(dev, "[riffa] max downstream payload: %d B\n",
673	    128 << ((reg >> 13) & 0x7));
674	device_printf(dev, "[riffa] max upstream payload: %d B\n",
675	    128 << ((reg >> 16) & 0x7));
676
677	return (0);
678}
679
680/* If there is no sume_if_init, the ether_ioctl panics. */
681static void
682sume_if_init(void *sc)
683{
684}
685
686/* Write the address and length for our incoming / outgoing transaction. */
687static void
688sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
689    uint64_t len)
690{
691	struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
692
693	bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
694	bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
695	bouncebuf->len = len >> 2;
696}
697
698/* Module register locked write. */
699static int
700sume_modreg_write_locked(struct sume_adapter *adapter)
701{
702	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
703
704	/* Let the FPGA know about the transfer. */
705	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
706	    RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
707	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
708	    RIFFA_RX_LEN_REG_OFF), send->len);	/* words */
709
710	/* Fill the bouncebuf "descriptor". */
711	sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
712
713	/* Update the state before intiating the DMA to avoid races. */
714	send->state = SUME_RIFFA_CHAN_STATE_READY;
715
716	bus_dmamap_sync(send->ch_tag, send->ch_map,
717	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
718	/* DMA. */
719	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
720	    RIFFA_RX_SG_ADDR_LO_REG_OFF),
721	    SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
722	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
723	    RIFFA_RX_SG_ADDR_HI_REG_OFF),
724	    SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
725	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
726	    RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
727	bus_dmamap_sync(send->ch_tag, send->ch_map,
728	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
729
730	return (0);
731}
732
733/*
734 * Request a register read or write (depending on optype).
735 * If optype is set (0x1f) this will result in a register write,
736 * otherwise this will result in a register read request at the given
737 * address and the result will need to be DMAed back.
738 */
739static int
740sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
741    uint32_t optype)
742{
743	struct sume_adapter *adapter = nf_priv->adapter;
744	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
745	struct nf_regop_data *data;
746	int error;
747
748	/*
749	 * 1. Make sure the channel is free;  otherwise return EBUSY.
750	 * 2. Prepare the memory in the bounce buffer (which we always
751	 *    use for regs).
752	 * 3. Start the DMA process.
753	 * 4. Sleep and wait for result and return success or error.
754	 */
755	SUME_LOCK(adapter);
756
757	if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
758		SUME_UNLOCK(adapter);
759		return (EBUSY);
760	}
761
762	data = (struct nf_regop_data *) (send->buf_addr +
763	    sizeof(struct nf_bb_desc));
764	data->addr = htole32(sifr->addr);
765	data->val = htole32(sifr->val);
766	/* Tag to indentify request. */
767	data->rtag = htole32(++send->rtag);
768	data->optype = htole32(optype);
769	send->len = sizeof(struct nf_regop_data) / 4; /* words */
770
771	error = sume_modreg_write_locked(adapter);
772	if (error) {
773		SUME_UNLOCK(adapter);
774		return (EFAULT);
775	}
776
777	/* Timeout after 1s. */
778	if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
779		error = msleep(&send->event, &adapter->lock, 0,
780		    "Waiting recv finish", 1 * hz);
781
782	/* This was a write so we are done; were interrupted, or timed out. */
783	if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
784		send->state = SUME_RIFFA_CHAN_STATE_IDLE;
785		if (optype == SUME_MR_READ)
786			error = EWOULDBLOCK;
787		else
788			error = 0;
789	} else
790		error = 0;
791
792	/*
793	 * For read requests we will update state once we are done
794	 * having read the result to avoid any two outstanding
795	 * transactions, or we need a queue and validate tags,
796	 * which is a lot of work for a low priority, infrequent
797	 * event.
798	 */
799
800	SUME_UNLOCK(adapter);
801
802	return (error);
803}
804
805/* Module register read. */
806static int
807sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
808{
809	struct sume_adapter *adapter = nf_priv->adapter;
810	struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
811	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
812	struct nf_regop_data *data;
813	int error = 0;
814
815	/*
816	 * 0. Sleep waiting for result if needed (unless condition is
817	 *    true already).
818	 * 1. Read DMA results.
819	 * 2. Update state on *TX* to IDLE to allow next read to start.
820	 */
821	SUME_LOCK(adapter);
822
823	bus_dmamap_sync(recv->ch_tag, recv->ch_map,
824	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
825	/*
826	 * We only need to be woken up at the end of the transaction.
827	 * Timeout after 1s.
828	 */
829	if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
830		error = msleep(&recv->event, &adapter->lock, 0,
831		    "Waiting transaction finish", 1 * hz);
832
833	if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
834		SUME_UNLOCK(adapter);
835		device_printf(adapter->dev, "wait error: %d\n", error);
836		return (EWOULDBLOCK);
837	}
838
839	bus_dmamap_sync(recv->ch_tag, recv->ch_map,
840	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
841
842	/*
843	 * Read reply data and validate address and tag.
844	 * Note: we do access the send side without lock but the state
845	 * machine does prevent the data from changing.
846	 */
847	data = (struct nf_regop_data *) (recv->buf_addr +
848	    sizeof(struct nf_bb_desc));
849
850	if (le32toh(data->rtag) != send->rtag)
851		device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
852		    le32toh(data->rtag), send->rtag);
853
854	sifr->val = le32toh(data->val);
855	recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
856
857	/* We are done. */
858	send->state = SUME_RIFFA_CHAN_STATE_IDLE;
859
860	SUME_UNLOCK(adapter);
861
862	return (0);
863}
864
865/* Read value from a module register and return it to a sume_ifreq. */
866static int
867get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
868{
869	int error;
870
871	error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
872	if (!error)
873		error = sume_module_reg_read(nf_priv, sifr);
874
875	return (error);
876}
877
878static int
879sume_if_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
880{
881	struct ifreq *ifr = (struct ifreq *) data;
882	struct nf_priv *nf_priv = ifp->if_softc;
883	struct sume_ifreq sifr;
884	int error = 0;
885
886	switch (cmd) {
887	case SIOCGIFMEDIA:
888	case SIOCGIFXMEDIA:
889		error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
890		break;
891
892	case SUME_IOCTL_CMD_WRITE_REG:
893		error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
894		if (error) {
895			error = EINVAL;
896			break;
897		}
898		error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
899		break;
900
901	case SUME_IOCTL_CMD_READ_REG:
902		error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
903		if (error) {
904			error = EINVAL;
905			break;
906		}
907
908		error = get_modreg_value(nf_priv, &sifr);
909		if (error)
910			break;
911
912		error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
913		if (error)
914			error = EINVAL;
915
916		break;
917
918	case SIOCSIFFLAGS:
919		/* Silence tcpdump 'promisc mode not supported' warning. */
920		if (ifp->if_flags & IFF_PROMISC)
921			break;
922
923	default:
924		error = ether_ioctl(ifp, cmd, data);
925		break;
926	}
927
928	return (error);
929}
930
931static int
932sume_media_change(struct ifnet *ifp)
933{
934	struct nf_priv *nf_priv = ifp->if_softc;
935	struct ifmedia *ifm = &nf_priv->media;
936
937	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
938		return (EINVAL);
939
940	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
941		ifp->if_baudrate = ifmedia_baudrate(IFM_ETHER | IFM_10G_SR);
942	else
943		ifp->if_baudrate = ifmedia_baudrate(ifm->ifm_media);
944
945	return (0);
946}
947
948static void
949sume_update_link_status(struct ifnet *ifp)
950{
951	struct nf_priv *nf_priv = ifp->if_softc;
952	struct sume_adapter *adapter = nf_priv->adapter;
953	struct sume_ifreq sifr;
954	int link_status;
955
956	sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
957	sifr.val = 0;
958
959	if (get_modreg_value(nf_priv, &sifr))
960		return;
961
962	link_status = SUME_LINK_STATUS(sifr.val);
963
964	if (!link_status && nf_priv->link_up) {
965		if_link_state_change(ifp, LINK_STATE_DOWN);
966		nf_priv->link_up = 0;
967		if (adapter->sume_debug)
968			device_printf(adapter->dev, "port %d link state "
969			    "changed to DOWN\n", nf_priv->unit);
970	} else if (link_status && !nf_priv->link_up) {
971		nf_priv->link_up = 1;
972		if_link_state_change(ifp, LINK_STATE_UP);
973		if (adapter->sume_debug)
974			device_printf(adapter->dev, "port %d link state "
975			    "changed to UP\n", nf_priv->unit);
976	}
977}
978
979static void
980sume_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
981{
982	struct nf_priv *nf_priv = ifp->if_softc;
983	struct ifmedia *ifm = &nf_priv->media;
984
985	if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
986	    (ifp->if_flags & IFF_UP))
987		ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
988	else
989		ifmr->ifm_active = ifm->ifm_cur->ifm_media;
990
991	ifmr->ifm_status |= IFM_AVALID;
992
993	sume_update_link_status(ifp);
994
995	if (nf_priv->link_up)
996		ifmr->ifm_status |= IFM_ACTIVE;
997}
998
999/*
1000 * Packet to transmit. We take the packet data from the mbuf and copy it to the
1001 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
1002 * packet data are for metadata: sport/dport (depending on our source
1003 * interface), packet length and magic 0xcafe. We tell the SUME about the
1004 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
1005 * the information about the start and length of the packet and trigger the
1006 * transaction.
1007 */
1008static int
1009sume_if_start_locked(struct ifnet *ifp)
1010{
1011	struct mbuf *m;
1012	struct nf_priv *nf_priv = ifp->if_softc;
1013	struct sume_adapter *adapter = nf_priv->adapter;
1014	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
1015	uint8_t *outbuf;
1016	struct nf_metadata *mdata;
1017	int plen = SUME_MIN_PKT_SIZE;
1018
1019	KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1020	KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
1021	    ("SUME not in IDLE state"));
1022
1023	IFQ_DEQUEUE(&ifp->if_snd, m);
1024	if (m == NULL)
1025		return (EINVAL);
1026
1027	/* Packets large enough do not need to be padded */
1028	if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
1029		plen = m->m_pkthdr.len;
1030
1031	if (adapter->sume_debug)
1032		device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
1033		    SUME_ETH_DEVICE_NAME, nf_priv->unit);
1034
1035	outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
1036	mdata = (struct nf_metadata *) outbuf;
1037
1038	/* Clear the recovery flag. */
1039	send->recovery = 0;
1040
1041	/* Make sure we fit with the 16 bytes nf_metadata. */
1042	if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
1043	    adapter->sg_buf_size) {
1044		device_printf(adapter->dev, "packet too big for bounce buffer "
1045		    "(%d)\n", m->m_pkthdr.len);
1046		m_freem(m);
1047		nf_priv->stats.tx_dropped++;
1048		return (ENOMEM);
1049	}
1050
1051	bus_dmamap_sync(send->ch_tag, send->ch_map,
1052	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1053
1054	/* Zero out the padded data */
1055	if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
1056		bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
1057	/* Skip the first 16 bytes for the metadata. */
1058	m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
1059	send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
1060
1061	/* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
1062	mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
1063	mdata->dport = htole16(1 << (nf_priv->port * 2));
1064	mdata->plen = htole16(plen);
1065	mdata->magic = htole16(SUME_RIFFA_MAGIC);
1066	mdata->t1 = htole32(0);
1067	mdata->t2 = htole32(0);
1068
1069	/* Let the FPGA know about the transfer. */
1070	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1071	    RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
1072	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1073	    RIFFA_RX_LEN_REG_OFF), send->len);
1074
1075	/* Fill the bouncebuf "descriptor". */
1076	sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
1077
1078	/* Update the state before intiating the DMA to avoid races. */
1079	send->state = SUME_RIFFA_CHAN_STATE_READY;
1080
1081	/* DMA. */
1082	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1083	    RIFFA_RX_SG_ADDR_LO_REG_OFF),
1084	    SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
1085	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1086	    RIFFA_RX_SG_ADDR_HI_REG_OFF),
1087	    SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
1088	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1089	    RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
1090
1091	bus_dmamap_sync(send->ch_tag, send->ch_map,
1092	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1093
1094	nf_priv->stats.tx_packets++;
1095	nf_priv->stats.tx_bytes += plen;
1096
1097	/* We can free as long as we use the bounce buffer. */
1098	m_freem(m);
1099
1100	adapter->last_ifc = nf_priv->port;
1101
1102	/* Reset watchdog counter. */
1103	adapter->wd_counter = 0;
1104
1105	return (0);
1106}
1107
1108static void
1109sume_if_start(struct ifnet *ifp)
1110{
1111	struct nf_priv *nf_priv = ifp->if_softc;
1112	struct sume_adapter *adapter = nf_priv->adapter;
1113
1114	if (!adapter->running || !(ifp->if_flags & IFF_UP))
1115		return;
1116
1117	SUME_LOCK(adapter);
1118	if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
1119	    SUME_RIFFA_CHAN_STATE_IDLE)
1120		sume_if_start_locked(ifp);
1121	SUME_UNLOCK(adapter);
1122}
1123
1124/*
1125 * We call this function at the end of every TX transaction to check for
1126 * remaining packets in the TX queues for every UP interface.
1127 */
1128static void
1129check_tx_queues(struct sume_adapter *adapter)
1130{
1131	int i, last_ifc;
1132
1133	KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1134
1135	last_ifc = adapter->last_ifc;
1136
1137	/* Check all interfaces */
1138	for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
1139		struct ifnet *ifp = adapter->ifp[i % SUME_NPORTS];
1140
1141		if (!(ifp->if_flags & IFF_UP))
1142			continue;
1143
1144		if (!sume_if_start_locked(ifp))
1145			break;
1146	}
1147}
1148
1149static int
1150sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
1151{
1152	struct ifnet *ifp;
1153	struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
1154	    M_ZERO | M_WAITOK);
1155
1156	ifp = if_alloc(IFT_ETHER);
1157	if (ifp == NULL) {
1158		device_printf(adapter->dev, "cannot allocate ifnet\n");
1159		return (ENOMEM);
1160	}
1161
1162	adapter->ifp[port] = ifp;
1163	ifp->if_softc = nf_priv;
1164
1165	nf_priv->adapter = adapter;
1166	nf_priv->unit = alloc_unr(unr);
1167	nf_priv->port = port;
1168	nf_priv->link_up = 0;
1169
1170	if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
1171	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1172
1173	ifp->if_init = sume_if_init;
1174	ifp->if_start = sume_if_start;
1175	ifp->if_ioctl = sume_if_ioctl;
1176
1177	uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
1178	hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
1179	ether_ifattach(ifp, hw_addr);
1180
1181	ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
1182	    sume_media_status);
1183	ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1184	ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
1185
1186	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1187
1188	return (0);
1189}
1190
1191static void
1192callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1193{
1194	if (err)
1195		return;
1196
1197	KASSERT(nseg == 1, ("%d segments returned!", nseg));
1198
1199	*(bus_addr_t *) arg = segs[0].ds_addr;
1200}
1201
1202static int
1203sume_probe_riffa_buffer(const struct sume_adapter *adapter,
1204    struct riffa_chnl_dir ***p, const char *dir)
1205{
1206	struct riffa_chnl_dir **rp;
1207	bus_addr_t hw_addr;
1208	int error, ch;
1209	device_t dev = adapter->dev;
1210
1211	error = ENOMEM;
1212	*p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
1213	    M_SUME, M_ZERO | M_WAITOK);
1214	if (*p == NULL) {
1215		device_printf(dev, "malloc(%s) failed.\n", dir);
1216		return (error);
1217	}
1218
1219	rp = *p;
1220	/* Allocate the chnl_dir structs themselves. */
1221	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1222		/* One direction. */
1223		rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
1224		    M_ZERO | M_WAITOK);
1225		if (rp[ch] == NULL) {
1226			device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir "
1227			    "failed.\n", dir, ch);
1228			return (error);
1229		}
1230
1231		int err = bus_dma_tag_create(bus_get_dma_tag(dev),
1232		    4, 0,
1233		    BUS_SPACE_MAXADDR,
1234		    BUS_SPACE_MAXADDR,
1235		    NULL, NULL,
1236		    adapter->sg_buf_size,
1237		    1,
1238		    adapter->sg_buf_size,
1239		    0,
1240		    NULL,
1241		    NULL,
1242		    &rp[ch]->ch_tag);
1243
1244		if (err) {
1245			device_printf(dev, "bus_dma_tag_create(%s[%d]) "
1246			    "failed.\n", dir, ch);
1247			return (err);
1248		}
1249
1250		err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
1251		    &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
1252		    BUS_DMA_ZERO, &rp[ch]->ch_map);
1253		if (err) {
1254			device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
1255			    dir, ch);
1256			return (err);
1257		}
1258
1259		bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
1260
1261		err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
1262		    rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
1263		    &hw_addr, BUS_DMA_NOWAIT);
1264		if (err) {
1265			device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
1266			    dir, ch);
1267			return (err);
1268		}
1269		rp[ch]->buf_hw_addr = hw_addr;
1270		rp[ch]->num_sg = 1;
1271		rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
1272
1273		rp[ch]->rtag = SUME_INIT_RTAG;
1274	}
1275
1276	return (0);
1277}
1278
1279static int
1280sume_probe_riffa_buffers(struct sume_adapter *adapter)
1281{
1282	int error;
1283
1284	error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
1285	if (error)
1286		return (error);
1287
1288	error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
1289
1290	return (error);
1291}
1292
1293static void
1294sume_sysctl_init(struct sume_adapter *adapter)
1295{
1296	device_t dev = adapter->dev;
1297	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
1298	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
1299	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
1300	struct sysctl_oid *tmp_tree;
1301	char namebuf[MAX_IFC_NAME_LEN];
1302	int i;
1303
1304	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
1305	    0, "SUME top-level tree");
1306	if (tree == NULL) {
1307		device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1308		return;
1309	}
1310	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
1311	    &adapter->sume_debug, 0, "debug int leaf");
1312
1313	/* total RX error stats */
1314	SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
1315	    CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
1316	SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
1317	    CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
1318
1319	for (i = SUME_NPORTS - 1; i >= 0; i--) {
1320		struct ifnet *ifp = adapter->ifp[i];
1321		if (ifp == NULL)
1322			continue;
1323
1324		struct nf_priv *nf_priv = ifp->if_softc;
1325
1326		snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
1327		    SUME_ETH_DEVICE_NAME, nf_priv->unit);
1328		tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
1329		    CTLFLAG_RW, 0, "SUME ifc tree");
1330		if (tmp_tree == NULL) {
1331			device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1332			return;
1333		}
1334
1335		/* Packets dropped by down interface. */
1336		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1337		    "ifc_down_bytes", CTLFLAG_RD,
1338		    &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
1339		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1340		    "ifc_down_packets", CTLFLAG_RD,
1341		    &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
1342
1343		/* HW RX stats */
1344		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1345		    "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
1346		    0, "hw_rx packets");
1347
1348		/* HW TX stats */
1349		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1350		    "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
1351		    0, "hw_tx packets");
1352
1353		/* RX stats */
1354		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1355		    "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
1356		    "rx bytes");
1357		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1358		    "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
1359		    "rx dropped");
1360		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1361		    "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
1362		    "rx packets");
1363
1364		/* TX stats */
1365		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1366		    "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
1367		    "tx bytes");
1368		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1369		    "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
1370		    "tx dropped");
1371		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1372		    "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
1373		    "tx packets");
1374	}
1375}
1376
1377static void
1378sume_local_timer(void *arg)
1379{
1380	struct sume_adapter *adapter = arg;
1381
1382	if (!adapter->running)
1383		return;
1384
1385	taskqueue_enqueue(adapter->tq, &adapter->stat_task);
1386
1387	SUME_LOCK(adapter);
1388	if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
1389	    SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
1390		/* Resetting interfaces if stuck for 3 seconds. */
1391		device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
1392		read_reg(adapter, RIFFA_INFO_REG_OFF);
1393
1394		adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
1395		    SUME_RIFFA_CHAN_STATE_IDLE;
1396		adapter->wd_counter = 0;
1397
1398		check_tx_queues(adapter);
1399	}
1400	SUME_UNLOCK(adapter);
1401
1402	callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1403}
1404
1405static void
1406sume_get_stats(void *context, int pending)
1407{
1408	struct sume_adapter *adapter = context;
1409	int i;
1410
1411	for (i = 0; i < SUME_NPORTS; i++) {
1412		struct ifnet *ifp = adapter->ifp[i];
1413
1414		if (ifp->if_flags & IFF_UP) {
1415			struct nf_priv *nf_priv = ifp->if_softc;
1416			struct sume_ifreq sifr;
1417
1418			sume_update_link_status(ifp);
1419
1420			/* Get RX counter. */
1421			sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
1422			sifr.val = 0;
1423
1424			if (!get_modreg_value(nf_priv, &sifr))
1425				nf_priv->stats.hw_rx_packets += sifr.val;
1426
1427			/* Get TX counter. */
1428			sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
1429			sifr.val = 0;
1430
1431			if (!get_modreg_value(nf_priv, &sifr))
1432				nf_priv->stats.hw_tx_packets += sifr.val;
1433		}
1434	}
1435}
1436
1437static int
1438sume_attach(device_t dev)
1439{
1440	struct sume_adapter *adapter = device_get_softc(dev);
1441	adapter->dev = dev;
1442	int error, i;
1443
1444	mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
1445
1446	adapter->running = 0;
1447
1448	/* OK finish up RIFFA. */
1449	error = sume_probe_riffa_pci(adapter);
1450	if (error != 0)
1451		goto error;
1452
1453	error = sume_probe_riffa_buffers(adapter);
1454	if (error != 0)
1455		goto error;
1456
1457	/* Now do the network interfaces. */
1458	for (i = 0; i < SUME_NPORTS; i++) {
1459		error = sume_ifp_alloc(adapter, i);
1460		if (error != 0)
1461			goto error;
1462	}
1463
1464	/*  Register stats and register sysctls. */
1465	sume_sysctl_init(adapter);
1466
1467	/* Reset the HW. */
1468	read_reg(adapter, RIFFA_INFO_REG_OFF);
1469
1470	/* Ready to go, "enable" IRQ. */
1471	adapter->running = 1;
1472
1473	callout_init(&adapter->timer, 1);
1474	TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
1475
1476	adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
1477	    taskqueue_thread_enqueue, &adapter->tq);
1478	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
1479	    device_get_nameunit(adapter->dev));
1480
1481	callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1482
1483	return (0);
1484
1485error:
1486	sume_detach(dev);
1487
1488	return (error);
1489}
1490
1491static void
1492sume_remove_riffa_buffer(const struct sume_adapter *adapter,
1493    struct riffa_chnl_dir **pp)
1494{
1495	int ch;
1496
1497	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1498		if (pp[ch] == NULL)
1499			continue;
1500
1501		if (pp[ch]->buf_hw_addr != 0) {
1502			bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
1503			    pp[ch]->ch_map);
1504			pp[ch]->buf_hw_addr = 0;
1505		}
1506
1507		free(pp[ch], M_SUME);
1508	}
1509}
1510
1511static void
1512sume_remove_riffa_buffers(struct sume_adapter *adapter)
1513{
1514	if (adapter->send != NULL) {
1515		sume_remove_riffa_buffer(adapter, adapter->send);
1516		free(adapter->send, M_SUME);
1517		adapter->send = NULL;
1518	}
1519	if (adapter->recv != NULL) {
1520		sume_remove_riffa_buffer(adapter, adapter->recv);
1521		free(adapter->recv, M_SUME);
1522		adapter->recv = NULL;
1523	}
1524}
1525
1526static int
1527sume_detach(device_t dev)
1528{
1529	struct sume_adapter *adapter = device_get_softc(dev);
1530	int i;
1531	struct nf_priv *nf_priv;
1532
1533	KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
1534	    "initialized"));
1535	adapter->running = 0;
1536
1537	/* Drain the stats callout and task queue. */
1538	callout_drain(&adapter->timer);
1539
1540	if (adapter->tq) {
1541		taskqueue_drain(adapter->tq, &adapter->stat_task);
1542		taskqueue_free(adapter->tq);
1543	}
1544
1545	for (i = 0; i < SUME_NPORTS; i++) {
1546		struct ifnet *ifp = adapter->ifp[i];
1547		if (ifp == NULL)
1548			continue;
1549
1550		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1551		nf_priv = ifp->if_softc;
1552
1553		if (ifp->if_flags & IFF_UP)
1554			if_down(ifp);
1555		ifmedia_removeall(&nf_priv->media);
1556		free_unr(unr, nf_priv->unit);
1557
1558		ifp->if_flags &= ~IFF_UP;
1559		ether_ifdetach(ifp);
1560		if_free(ifp);
1561
1562		free(nf_priv, M_SUME);
1563	}
1564
1565	sume_remove_riffa_buffers(adapter);
1566
1567	if (adapter->irq.tag)
1568		bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
1569	if (adapter->irq.res)
1570		bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
1571		    adapter->irq.res);
1572
1573	pci_release_msi(dev);
1574
1575	if (adapter->bar0_addr)
1576		bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
1577		    adapter->bar0_addr);
1578
1579	mtx_destroy(&adapter->lock);
1580
1581	return (0);
1582}
1583
1584static int
1585mod_event(module_t mod, int cmd, void *arg)
1586{
1587	switch (cmd) {
1588	case MOD_LOAD:
1589		unr = new_unrhdr(0, INT_MAX, NULL);
1590		break;
1591
1592	case MOD_UNLOAD:
1593		delete_unrhdr(unr);
1594		break;
1595	}
1596
1597	return (0);
1598}
1599static devclass_t sume_devclass;
1600
1601DRIVER_MODULE(sume, pci, sume_driver, sume_devclass, mod_event, 0);
1602MODULE_VERSION(sume, 1);
1603