1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015 Bjoern A. Zeeb
5 * Copyright (c) 2020 Denis Salopek
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
9 * ("MRC2"), as part of the DARPA MRC research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/endian.h>
36#include <sys/kernel.h>
37#include <sys/limits.h>
38#include <sys/module.h>
39#include <sys/rman.h>
40#include <sys/socket.h>
41#include <sys/sockio.h>
42#include <sys/sysctl.h>
43#include <sys/taskqueue.h>
44
45#include <net/if.h>
46#include <net/if_media.h>
47#include <net/if_types.h>
48#include <net/if_var.h>
49
50#include <netinet/in.h>
51#include <netinet/if_ether.h>
52
53#include <dev/pci/pcivar.h>
54#include <dev/pci/pcireg.h>
55
56#include <machine/bus.h>
57
58#include "adapter.h"
59
60#define	PCI_VENDOR_ID_XILINX	0x10ee
61#define	PCI_DEVICE_ID_SUME	0x7028
62
63/* SUME bus driver interface */
64static int sume_probe(device_t);
65static int sume_attach(device_t);
66static int sume_detach(device_t);
67
68static device_method_t sume_methods[] = {
69	DEVMETHOD(device_probe,		sume_probe),
70	DEVMETHOD(device_attach,	sume_attach),
71	DEVMETHOD(device_detach,	sume_detach),
72	DEVMETHOD_END
73};
74
75static driver_t sume_driver = {
76	"sume",
77	sume_methods,
78	sizeof(struct sume_adapter)
79};
80
81/*
82 * The DMA engine for SUME generates interrupts for each RX/TX transaction.
83 * Depending on the channel (0 if packet transaction, 1 if register transaction)
84 * the used bits of the interrupt vector will be the lowest or the second lowest
85 * 5 bits.
86 *
87 * When receiving packets from SUME (RX):
88 * (1) SUME received a packet on one of the interfaces.
89 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX
90 *     transaction).
91 * (3) We read the length of the incoming packet and the offset along with the
92 *     'last' flag from the SUME registers.
93 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the
94 *     address buf_addr. For now, this is how it's done:
95 *     - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical
96 *     address where we want the data to arrive (buf_addr[0] and buf_addr[1]),
97 *     and length of incoming data (buf_addr[2]).
98 *     - Data will start right after, at buf_addr+3*sizeof(uint32_t). The
99 *     physical address buf_hw_addr is a block of contiguous memory mapped to
100 *     buf_addr, so we can set the incoming data's physical address (buf_addr[0]
101 *     and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t).
102 * (5) We notify SUME that the bouncebuffer is ready for the transaction by
103 *     writing the lower/upper physical address buf_hw_addr to the SUME
104 *     registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as
105 *     well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF.
106 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 -
107 *     bouncebuffer received).
108 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 -
109 *     transaction is done).
110 * (8) SUME can do both steps (6) and (7) using the same interrupt.
111 * (8) We read the first 16 bytes (metadata) of the received data and note the
112 *     incoming interface so we can later forward it to the right one in the OS
113 *     (sume0, sume1, sume2 or sume3).
114 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf
115 *     and set the mbuf rcvif to the incoming interface.
116 * (11) We forward the mbuf to the appropriate interface via ifp->if_input.
117 *
118 * When sending packets to SUME (TX):
119 * (1) The OS calls sume_if_start() function on TX.
120 * (2) We get the mbuf packet data and copy it to the
121 *     buf_addr+3*sizeof(uint32_t) + metadata 16 bytes.
122 * (3) We create the metadata based on the output interface and copy it to the
123 *     buf_addr+3*sizeof(uint32_t).
124 * (4) We write the offset/last and length of the packet to the SUME registers
125 *     RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF.
126 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes
127 *     with the physical address and length just as in RX step (4).
128 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME
129 *     registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and
130 *     RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5).
131 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 -
132 *     bouncebuffer is read).
133 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 -
134 *     transaction is done).
135 * (9) SUME can do both steps (7) and (8) using the same interrupt.
136 *
137 * Internal registers
138 * Every module in the SUME hardware has its own set of internal registers
139 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are
140 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the
141 * offsets to different memory locations of every module are defined in their
142 * corresponding folder inside the library. These registers can be RO/RW and
143 * there is a special method to fetch/change this data over 1 or 2 DMA
144 * transactions. For writing, by calling the sume_module_reg_write(). For
145 * reading, by calling the sume_module_reg_write() and then
146 * sume_module_reg_read(). Check those functions for more information.
147 */
148
149MALLOC_DECLARE(M_SUME);
150MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver");
151
152static void check_tx_queues(struct sume_adapter *);
153static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *,
154    uint64_t);
155
156static struct unrhdr *unr;
157
158static struct {
159	uint16_t device;
160	char *desc;
161} sume_pciids[] = {
162	{PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"},
163};
164
165static inline uint32_t
166read_reg(struct sume_adapter *adapter, int offset)
167{
168
169	return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2));
170}
171
172static inline void
173write_reg(struct sume_adapter *adapter, int offset, uint32_t val)
174{
175
176	bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val);
177}
178
179static int
180sume_probe(device_t dev)
181{
182	int i;
183	uint16_t v = pci_get_vendor(dev);
184	uint16_t d = pci_get_device(dev);
185
186	if (v != PCI_VENDOR_ID_XILINX)
187		return (ENXIO);
188
189	for (i = 0; i < nitems(sume_pciids); i++) {
190		if (d == sume_pciids[i].device) {
191			device_set_desc(dev, sume_pciids[i].desc);
192			return (BUS_PROBE_DEFAULT);
193		}
194	}
195
196	return (ENXIO);
197}
198
199/*
200 * Building mbuf for packet received from SUME. We expect to receive 'len'
201 * bytes of data (including metadata) written from the bouncebuffer address
202 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface
203 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen),
204 * and the magic word needs to be 0xcafe. When we have the packet data, we
205 * create an mbuf and copy the data to it using m_copyback() function, set the
206 * correct interface to rcvif and return the mbuf to be later sent to the OS
207 * with if_input.
208 */
209static struct mbuf *
210sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len)
211{
212	struct nf_priv *nf_priv;
213	struct mbuf *m;
214	if_t ifp = NULL;
215	int np;
216	uint16_t dport, plen, magic;
217	device_t dev = adapter->dev;
218	uint8_t *indata = (uint8_t *)
219	    adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr +
220	    sizeof(struct nf_bb_desc);
221	struct nf_metadata *mdata = (struct nf_metadata *) indata;
222
223	/* The metadata header is 16 bytes. */
224	if (len < sizeof(struct nf_metadata)) {
225		device_printf(dev, "short frame (%d)\n", len);
226		adapter->packets_err++;
227		adapter->bytes_err += len;
228		return (NULL);
229	}
230
231	dport = le16toh(mdata->dport);
232	plen = le16toh(mdata->plen);
233	magic = le16toh(mdata->magic);
234
235	if (sizeof(struct nf_metadata) + plen > len ||
236	    magic != SUME_RIFFA_MAGIC) {
237		device_printf(dev, "corrupted packet (%zd + %d > %d || magic "
238		    "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen,
239		    len, magic, SUME_RIFFA_MAGIC);
240		return (NULL);
241	}
242
243	/* We got the packet from one of the even bits */
244	np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1;
245	if (np > SUME_NPORTS) {
246		device_printf(dev, "invalid destination port 0x%04x (%d)\n",
247		    dport, np);
248		adapter->packets_err++;
249		adapter->bytes_err += plen;
250		return (NULL);
251	}
252	ifp = adapter->ifp[np];
253	nf_priv = if_getsoftc(ifp);
254	nf_priv->stats.rx_packets++;
255	nf_priv->stats.rx_bytes += plen;
256
257	/* If the interface is down, well, we are done. */
258	if (!(if_getflags(ifp) & IFF_UP)) {
259		nf_priv->stats.ifc_down_packets++;
260		nf_priv->stats.ifc_down_bytes += plen;
261		return (NULL);
262	}
263
264	if (adapter->sume_debug)
265		printf("Building mbuf with length: %d\n", plen);
266
267	m = m_getm(NULL, plen, M_NOWAIT, MT_DATA);
268	if (m == NULL) {
269		adapter->packets_err++;
270		adapter->bytes_err += plen;
271		return (NULL);
272	}
273
274	/* Copy the data in at the right offset. */
275	m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata)));
276	m->m_pkthdr.rcvif = ifp;
277
278	return (m);
279}
280
281/*
282 * SUME interrupt handler for when we get a valid interrupt from the board.
283 * Theoretically, we can receive interrupt for any of the available channels,
284 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32
285 * bit number, using 5 bits for every channel, the least significant bits
286 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector
287 * bits for RX/TX are:
288 * RX
289 * bit 0 - new transaction from SUME
290 * bit 1 - SUME received our bouncebuffer address
291 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done
292 * TX
293 * bit 3 - SUME received our bouncebuffer address
294 * bit 4 - SUME copied the data from our bouncebuffer, transaction done
295 *
296 * There are two finite state machines (one for TX, one for RX). We loop
297 * through channels 0 and 1 to check and our current state and which interrupt
298 * bit is set.
299 * TX
300 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction.
301 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer
302 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3
303 * to go to the next state.
304 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send
305 * our packet). Then we get the length of the sent data and go back to the
306 * IDLE state.
307 * RX
308 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX
309 * transaction). When we get it, we prepare our bouncebuffer for reading and
310 * trigger the SUME to start the transaction. Go to the next state.
311 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our
312 * bouncebuffer). Go to the next state.
313 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready,
314 * we can build the mbuf and go back to the IDLE state.
315 */
316static void
317sume_intr_handler(void *arg)
318{
319	struct sume_adapter *adapter = arg;
320	uint32_t vect, vect0, len;
321	int ch, loops;
322	device_t dev = adapter->dev;
323	struct mbuf *m = NULL;
324	if_t ifp = NULL;
325	struct riffa_chnl_dir *send, *recv;
326
327	SUME_LOCK(adapter);
328
329	vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF);
330	if ((vect0 & SUME_INVALID_VECT) != 0) {
331		SUME_UNLOCK(adapter);
332		return;
333	}
334
335	/*
336	 * We only have one interrupt for all channels and no way
337	 * to quickly lookup for which channel(s) we got an interrupt?
338	 */
339	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
340		vect = vect0 >> (5 * ch);
341		send = adapter->send[ch];
342		recv = adapter->recv[ch];
343
344		loops = 0;
345		while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
346		    loops <= 5) {
347			if (adapter->sume_debug)
348				device_printf(dev, "TX ch %d state %u vect = "
349				    "0x%08x\n", ch, send->state, vect);
350			switch (send->state) {
351			case SUME_RIFFA_CHAN_STATE_IDLE:
352				break;
353			case SUME_RIFFA_CHAN_STATE_READY:
354				if (!(vect & SUME_MSI_TXBUF)) {
355					device_printf(dev, "ch %d unexpected "
356					    "interrupt in send+3 state %u: "
357					    "vect = 0x%08x\n", ch, send->state,
358					    vect);
359					send->recovery = 1;
360					break;
361				}
362				send->state = SUME_RIFFA_CHAN_STATE_READ;
363				vect &= ~SUME_MSI_TXBUF;
364				break;
365			case SUME_RIFFA_CHAN_STATE_READ:
366				if (!(vect & SUME_MSI_TXDONE)) {
367					device_printf(dev, "ch %d unexpected "
368					    "interrupt in send+4 state %u: "
369					    "vect = 0x%08x\n", ch, send->state,
370					    vect);
371					send->recovery = 1;
372					break;
373				}
374				send->state = SUME_RIFFA_CHAN_STATE_LEN;
375
376				len = read_reg(adapter, RIFFA_CHNL_REG(ch,
377				    RIFFA_RX_TNFR_LEN_REG_OFF));
378				if (ch == SUME_RIFFA_CHANNEL_DATA) {
379					send->state =
380					    SUME_RIFFA_CHAN_STATE_IDLE;
381					check_tx_queues(adapter);
382				} else if (ch == SUME_RIFFA_CHANNEL_REG)
383					wakeup(&send->event);
384				else {
385					device_printf(dev, "ch %d unexpected "
386					    "interrupt in send+4 state %u: "
387					    "vect = 0x%08x\n", ch, send->state,
388					    vect);
389					send->recovery = 1;
390				}
391				vect &= ~SUME_MSI_TXDONE;
392				break;
393			case SUME_RIFFA_CHAN_STATE_LEN:
394				break;
395			default:
396				device_printf(dev, "unknown TX state!\n");
397			}
398			loops++;
399		}
400
401		if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) &&
402		    send->recovery)
403			device_printf(dev, "ch %d ignoring vect = 0x%08x "
404			    "during TX; not in recovery; state = %d loops = "
405			    "%d\n", ch, vect, send->state, loops);
406
407		loops = 0;
408		while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
409		    SUME_MSI_RXDONE)) && loops < 5) {
410			if (adapter->sume_debug)
411				device_printf(dev, "RX ch %d state %u vect = "
412				    "0x%08x\n", ch, recv->state, vect);
413			switch (recv->state) {
414			case SUME_RIFFA_CHAN_STATE_IDLE:
415				if (!(vect & SUME_MSI_RXQUE)) {
416					device_printf(dev, "ch %d unexpected "
417					    "interrupt in recv+0 state %u: "
418					    "vect = 0x%08x\n", ch, recv->state,
419					    vect);
420					recv->recovery = 1;
421					break;
422				}
423				uint32_t max_ptr;
424
425				/* Clear recovery state. */
426				recv->recovery = 0;
427
428				/* Get offset and length. */
429				recv->offlast = read_reg(adapter,
430				    RIFFA_CHNL_REG(ch,
431				    RIFFA_TX_OFFLAST_REG_OFF));
432				recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch,
433				    RIFFA_TX_LEN_REG_OFF));
434
435				/* Boundary checks. */
436				max_ptr = (uint32_t)((uintptr_t)recv->buf_addr
437				    + SUME_RIFFA_OFFSET(recv->offlast)
438				    + SUME_RIFFA_LEN(recv->len) - 1);
439				if (max_ptr <
440				    (uint32_t)((uintptr_t)recv->buf_addr))
441					device_printf(dev, "receive buffer "
442					    "wrap-around overflow.\n");
443				if (SUME_RIFFA_OFFSET(recv->offlast) +
444				    SUME_RIFFA_LEN(recv->len) >
445				    adapter->sg_buf_size)
446					device_printf(dev, "receive buffer too"
447					    " small.\n");
448
449				/* Fill the bouncebuf "descriptor". */
450				sume_fill_bb_desc(adapter, recv,
451				    SUME_RIFFA_LEN(recv->len));
452
453				bus_dmamap_sync(recv->ch_tag, recv->ch_map,
454				    BUS_DMASYNC_PREREAD |
455				    BUS_DMASYNC_PREWRITE);
456				write_reg(adapter, RIFFA_CHNL_REG(ch,
457				    RIFFA_TX_SG_ADDR_LO_REG_OFF),
458				    SUME_RIFFA_LO_ADDR(recv->buf_hw_addr));
459				write_reg(adapter, RIFFA_CHNL_REG(ch,
460				    RIFFA_TX_SG_ADDR_HI_REG_OFF),
461				    SUME_RIFFA_HI_ADDR(recv->buf_hw_addr));
462				write_reg(adapter, RIFFA_CHNL_REG(ch,
463				    RIFFA_TX_SG_LEN_REG_OFF),
464				    4 * recv->num_sg);
465				bus_dmamap_sync(recv->ch_tag, recv->ch_map,
466				    BUS_DMASYNC_POSTREAD |
467				    BUS_DMASYNC_POSTWRITE);
468
469				recv->state = SUME_RIFFA_CHAN_STATE_READY;
470				vect &= ~SUME_MSI_RXQUE;
471				break;
472			case SUME_RIFFA_CHAN_STATE_READY:
473				if (!(vect & SUME_MSI_RXBUF)) {
474					device_printf(dev, "ch %d unexpected "
475					    "interrupt in recv+1 state %u: "
476					    "vect = 0x%08x\n", ch, recv->state,
477					    vect);
478					recv->recovery = 1;
479					break;
480				}
481				recv->state = SUME_RIFFA_CHAN_STATE_READ;
482				vect &= ~SUME_MSI_RXBUF;
483				break;
484			case SUME_RIFFA_CHAN_STATE_READ:
485				if (!(vect & SUME_MSI_RXDONE)) {
486					device_printf(dev, "ch %d unexpected "
487					    "interrupt in recv+2 state %u: "
488					    "vect = 0x%08x\n", ch, recv->state,
489					    vect);
490					recv->recovery = 1;
491					break;
492				}
493				len = read_reg(adapter, RIFFA_CHNL_REG(ch,
494				    RIFFA_TX_TNFR_LEN_REG_OFF));
495
496				/* Remember, len and recv->len are words. */
497				if (ch == SUME_RIFFA_CHANNEL_DATA) {
498					m = sume_rx_build_mbuf(adapter,
499					    len << 2);
500					recv->state =
501					    SUME_RIFFA_CHAN_STATE_IDLE;
502				} else if (ch == SUME_RIFFA_CHANNEL_REG)
503					wakeup(&recv->event);
504				else {
505					device_printf(dev, "ch %d unexpected "
506					    "interrupt in recv+2 state %u: "
507					    "vect = 0x%08x\n", ch, recv->state,
508					    vect);
509					recv->recovery = 1;
510				}
511				vect &= ~SUME_MSI_RXDONE;
512				break;
513			case SUME_RIFFA_CHAN_STATE_LEN:
514				break;
515			default:
516				device_printf(dev, "unknown RX state!\n");
517			}
518			loops++;
519		}
520
521		if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF |
522		    SUME_MSI_RXDONE)) && recv->recovery) {
523			device_printf(dev, "ch %d ignoring vect = 0x%08x "
524			    "during RX; not in recovery; state = %d, loops = "
525			    "%d\n", ch, vect, recv->state, loops);
526
527			/* Clean the unfinished transaction. */
528			if (ch == SUME_RIFFA_CHANNEL_REG &&
529			    vect & SUME_MSI_RXDONE) {
530				read_reg(adapter, RIFFA_CHNL_REG(ch,
531				    RIFFA_TX_TNFR_LEN_REG_OFF));
532				recv->recovery = 0;
533			}
534		}
535	}
536	SUME_UNLOCK(adapter);
537
538	if (m != NULL) {
539		ifp = m->m_pkthdr.rcvif;
540		if_input(ifp, m);
541	}
542}
543
544/*
545 * As we cannot disable interrupt generation, ignore early interrupts by waiting
546 * for the adapter to go into the 'running' state.
547 */
548static int
549sume_intr_filter(void *arg)
550{
551	struct sume_adapter *adapter = arg;
552
553	if (adapter->running == 0)
554		return (FILTER_STRAY);
555
556	return (FILTER_SCHEDULE_THREAD);
557}
558
559static int
560sume_probe_riffa_pci(struct sume_adapter *adapter)
561{
562	device_t dev = adapter->dev;
563	int error, count, capmem;
564	uint32_t reg, devctl, linkctl;
565
566	pci_enable_busmaster(dev);
567
568	adapter->rid = PCIR_BAR(0);
569	adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
570	    &adapter->rid, RF_ACTIVE);
571	if (adapter->bar0_addr == NULL) {
572		device_printf(dev, "unable to allocate bus resource: "
573		    "BAR0 address\n");
574		return (ENXIO);
575	}
576	adapter->bt = rman_get_bustag(adapter->bar0_addr);
577	adapter->bh = rman_get_bushandle(adapter->bar0_addr);
578	adapter->bar0_len = rman_get_size(adapter->bar0_addr);
579	if (adapter->bar0_len != 1024) {
580		device_printf(dev, "BAR0 resource length %lu != 1024\n",
581		    adapter->bar0_len);
582		return (ENXIO);
583	}
584
585	count = pci_msi_count(dev);
586	error = pci_alloc_msi(dev, &count);
587	if (error) {
588		device_printf(dev, "unable to allocate bus resource: PCI "
589		    "MSI\n");
590		return (error);
591	}
592
593	adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */
594	adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
595	    &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE);
596	if (adapter->irq.res == NULL) {
597		device_printf(dev, "unable to allocate bus resource: IRQ "
598		    "memory\n");
599		return (ENXIO);
600	}
601
602	error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE |
603	    INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter,
604	    &adapter->irq.tag);
605	if (error) {
606		device_printf(dev, "failed to setup interrupt for rid %d, name"
607		    " %s: %d\n", adapter->irq.rid, "SUME_INTR", error);
608		return (ENXIO);
609	}
610
611	if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) {
612		device_printf(dev, "PCI not PCIe capable\n");
613		return (ENXIO);
614	}
615
616	devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2);
617	pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl |
618	    PCIEM_CTL_EXT_TAG_FIELD), 2);
619
620	devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2);
621	pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl |
622	    PCIEM_CTL2_ID_ORDERED_REQ_EN), 2);
623
624	linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2);
625	pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl |
626	    PCIEM_LINK_CTL_RCB), 2);
627
628	reg = read_reg(adapter, RIFFA_INFO_REG_OFF);
629	adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf);
630	adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf);
631
632	error = ENODEV;
633	/* Check bus master is enabled. */
634	if (((reg >> 4) & 0x1) != 1) {
635		device_printf(dev, "bus master not enabled: %d\n",
636		    (reg >> 4) & 0x1);
637		return (error);
638	}
639	/* Check link parameters are valid. */
640	if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) {
641		device_printf(dev, "link parameters not valid: %d %d\n",
642		    (reg >> 5) & 0x3f, (reg >> 11) & 0x3);
643		return (error);
644	}
645	/* Check # of channels are within valid range. */
646	if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) {
647		device_printf(dev, "number of channels out of range: %d\n",
648		    reg & 0xf);
649		return (error);
650	}
651	/* Check bus width. */
652	if (((reg >> 19) & 0xf) == 0 ||
653	    ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) {
654		device_printf(dev, "bus width out of range: %d\n",
655		    (reg >> 19) & 0xf);
656		return (error);
657	}
658
659	device_printf(dev, "[riffa] # of channels: %d\n",
660	    reg & 0xf);
661	device_printf(dev, "[riffa] bus interface width: %d\n",
662	    ((reg >> 19) & 0xf) << 5);
663	device_printf(dev, "[riffa] bus master enabled: %d\n",
664	    (reg >> 4) & 0x1);
665	device_printf(dev, "[riffa] negotiated link width: %d\n",
666	    (reg >> 5) & 0x3f);
667	device_printf(dev, "[riffa] negotiated rate width: %d MTs\n",
668	    ((reg >> 11) & 0x3) * 2500);
669	device_printf(dev, "[riffa] max downstream payload: %d B\n",
670	    128 << ((reg >> 13) & 0x7));
671	device_printf(dev, "[riffa] max upstream payload: %d B\n",
672	    128 << ((reg >> 16) & 0x7));
673
674	return (0);
675}
676
677/* If there is no sume_if_init, the ether_ioctl panics. */
678static void
679sume_if_init(void *sc)
680{
681}
682
683/* Write the address and length for our incoming / outgoing transaction. */
684static void
685sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p,
686    uint64_t len)
687{
688	struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr;
689
690	bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc));
691	bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32;
692	bouncebuf->len = len >> 2;
693}
694
695/* Module register locked write. */
696static int
697sume_modreg_write_locked(struct sume_adapter *adapter)
698{
699	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
700
701	/* Let the FPGA know about the transfer. */
702	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
703	    RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
704	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
705	    RIFFA_RX_LEN_REG_OFF), send->len);	/* words */
706
707	/* Fill the bouncebuf "descriptor". */
708	sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
709
710	/* Update the state before intiating the DMA to avoid races. */
711	send->state = SUME_RIFFA_CHAN_STATE_READY;
712
713	bus_dmamap_sync(send->ch_tag, send->ch_map,
714	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
715	/* DMA. */
716	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
717	    RIFFA_RX_SG_ADDR_LO_REG_OFF),
718	    SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
719	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
720	    RIFFA_RX_SG_ADDR_HI_REG_OFF),
721	    SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
722	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG,
723	    RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
724	bus_dmamap_sync(send->ch_tag, send->ch_map,
725	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
726
727	return (0);
728}
729
730/*
731 * Request a register read or write (depending on optype).
732 * If optype is set (0x1f) this will result in a register write,
733 * otherwise this will result in a register read request at the given
734 * address and the result will need to be DMAed back.
735 */
736static int
737sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr,
738    uint32_t optype)
739{
740	struct sume_adapter *adapter = nf_priv->adapter;
741	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
742	struct nf_regop_data *data;
743	int error;
744
745	/*
746	 * 1. Make sure the channel is free;  otherwise return EBUSY.
747	 * 2. Prepare the memory in the bounce buffer (which we always
748	 *    use for regs).
749	 * 3. Start the DMA process.
750	 * 4. Sleep and wait for result and return success or error.
751	 */
752	SUME_LOCK(adapter);
753
754	if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) {
755		SUME_UNLOCK(adapter);
756		return (EBUSY);
757	}
758
759	data = (struct nf_regop_data *) (send->buf_addr +
760	    sizeof(struct nf_bb_desc));
761	data->addr = htole32(sifr->addr);
762	data->val = htole32(sifr->val);
763	/* Tag to indentify request. */
764	data->rtag = htole32(++send->rtag);
765	data->optype = htole32(optype);
766	send->len = sizeof(struct nf_regop_data) / 4; /* words */
767
768	error = sume_modreg_write_locked(adapter);
769	if (error) {
770		SUME_UNLOCK(adapter);
771		return (EFAULT);
772	}
773
774	/* Timeout after 1s. */
775	if (send->state != SUME_RIFFA_CHAN_STATE_LEN)
776		error = msleep(&send->event, &adapter->lock, 0,
777		    "Waiting recv finish", 1 * hz);
778
779	/* This was a write so we are done; were interrupted, or timed out. */
780	if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) {
781		send->state = SUME_RIFFA_CHAN_STATE_IDLE;
782		if (optype == SUME_MR_READ)
783			error = EWOULDBLOCK;
784		else
785			error = 0;
786	} else
787		error = 0;
788
789	/*
790	 * For read requests we will update state once we are done
791	 * having read the result to avoid any two outstanding
792	 * transactions, or we need a queue and validate tags,
793	 * which is a lot of work for a low priority, infrequent
794	 * event.
795	 */
796
797	SUME_UNLOCK(adapter);
798
799	return (error);
800}
801
802/* Module register read. */
803static int
804sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
805{
806	struct sume_adapter *adapter = nf_priv->adapter;
807	struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG];
808	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG];
809	struct nf_regop_data *data;
810	int error = 0;
811
812	/*
813	 * 0. Sleep waiting for result if needed (unless condition is
814	 *    true already).
815	 * 1. Read DMA results.
816	 * 2. Update state on *TX* to IDLE to allow next read to start.
817	 */
818	SUME_LOCK(adapter);
819
820	bus_dmamap_sync(recv->ch_tag, recv->ch_map,
821	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
822	/*
823	 * We only need to be woken up at the end of the transaction.
824	 * Timeout after 1s.
825	 */
826	if (recv->state != SUME_RIFFA_CHAN_STATE_READ)
827		error = msleep(&recv->event, &adapter->lock, 0,
828		    "Waiting transaction finish", 1 * hz);
829
830	if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) {
831		SUME_UNLOCK(adapter);
832		device_printf(adapter->dev, "wait error: %d\n", error);
833		return (EWOULDBLOCK);
834	}
835
836	bus_dmamap_sync(recv->ch_tag, recv->ch_map,
837	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
838
839	/*
840	 * Read reply data and validate address and tag.
841	 * Note: we do access the send side without lock but the state
842	 * machine does prevent the data from changing.
843	 */
844	data = (struct nf_regop_data *) (recv->buf_addr +
845	    sizeof(struct nf_bb_desc));
846
847	if (le32toh(data->rtag) != send->rtag)
848		device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n",
849		    le32toh(data->rtag), send->rtag);
850
851	sifr->val = le32toh(data->val);
852	recv->state = SUME_RIFFA_CHAN_STATE_IDLE;
853
854	/* We are done. */
855	send->state = SUME_RIFFA_CHAN_STATE_IDLE;
856
857	SUME_UNLOCK(adapter);
858
859	return (0);
860}
861
862/* Read value from a module register and return it to a sume_ifreq. */
863static int
864get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr)
865{
866	int error;
867
868	error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ);
869	if (!error)
870		error = sume_module_reg_read(nf_priv, sifr);
871
872	return (error);
873}
874
875static int
876sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data)
877{
878	struct ifreq *ifr = (struct ifreq *) data;
879	struct nf_priv *nf_priv = if_getsoftc(ifp);
880	struct sume_ifreq sifr;
881	int error = 0;
882
883	switch (cmd) {
884	case SIOCGIFMEDIA:
885	case SIOCGIFXMEDIA:
886		error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd);
887		break;
888
889	case SUME_IOCTL_CMD_WRITE_REG:
890		error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
891		if (error) {
892			error = EINVAL;
893			break;
894		}
895		error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE);
896		break;
897
898	case SUME_IOCTL_CMD_READ_REG:
899		error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr));
900		if (error) {
901			error = EINVAL;
902			break;
903		}
904
905		error = get_modreg_value(nf_priv, &sifr);
906		if (error)
907			break;
908
909		error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr));
910		if (error)
911			error = EINVAL;
912
913		break;
914
915	case SIOCSIFFLAGS:
916		/* Silence tcpdump 'promisc mode not supported' warning. */
917		if (if_getflags(ifp) & IFF_PROMISC)
918			break;
919
920	default:
921		error = ether_ioctl(ifp, cmd, data);
922		break;
923	}
924
925	return (error);
926}
927
928static int
929sume_media_change(if_t ifp)
930{
931	struct nf_priv *nf_priv = if_getsoftc(ifp);
932	struct ifmedia *ifm = &nf_priv->media;
933
934	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
935		return (EINVAL);
936
937	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR)
938		if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR));
939	else
940		if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media));
941
942	return (0);
943}
944
945static void
946sume_update_link_status(if_t ifp)
947{
948	struct nf_priv *nf_priv = if_getsoftc(ifp);
949	struct sume_adapter *adapter = nf_priv->adapter;
950	struct sume_ifreq sifr;
951	int link_status;
952
953	sifr.addr = SUME_STATUS_ADDR(nf_priv->port);
954	sifr.val = 0;
955
956	if (get_modreg_value(nf_priv, &sifr))
957		return;
958
959	link_status = SUME_LINK_STATUS(sifr.val);
960
961	if (!link_status && nf_priv->link_up) {
962		if_link_state_change(ifp, LINK_STATE_DOWN);
963		nf_priv->link_up = 0;
964		if (adapter->sume_debug)
965			device_printf(adapter->dev, "port %d link state "
966			    "changed to DOWN\n", nf_priv->unit);
967	} else if (link_status && !nf_priv->link_up) {
968		nf_priv->link_up = 1;
969		if_link_state_change(ifp, LINK_STATE_UP);
970		if (adapter->sume_debug)
971			device_printf(adapter->dev, "port %d link state "
972			    "changed to UP\n", nf_priv->unit);
973	}
974}
975
976static void
977sume_media_status(if_t ifp, struct ifmediareq *ifmr)
978{
979	struct nf_priv *nf_priv = if_getsoftc(ifp);
980	struct ifmedia *ifm = &nf_priv->media;
981
982	if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) &&
983	    (if_getflags(ifp) & IFF_UP))
984		ifmr->ifm_active = IFM_ETHER | IFM_10G_SR;
985	else
986		ifmr->ifm_active = ifm->ifm_cur->ifm_media;
987
988	ifmr->ifm_status |= IFM_AVALID;
989
990	sume_update_link_status(ifp);
991
992	if (nf_priv->link_up)
993		ifmr->ifm_status |= IFM_ACTIVE;
994}
995
996/*
997 * Packet to transmit. We take the packet data from the mbuf and copy it to the
998 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the
999 * packet data are for metadata: sport/dport (depending on our source
1000 * interface), packet length and magic 0xcafe. We tell the SUME about the
1001 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with
1002 * the information about the start and length of the packet and trigger the
1003 * transaction.
1004 */
1005static int
1006sume_if_start_locked(if_t ifp)
1007{
1008	struct mbuf *m;
1009	struct nf_priv *nf_priv = if_getsoftc(ifp);
1010	struct sume_adapter *adapter = nf_priv->adapter;
1011	struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA];
1012	uint8_t *outbuf;
1013	struct nf_metadata *mdata;
1014	int plen = SUME_MIN_PKT_SIZE;
1015
1016	KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1017	KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE,
1018	    ("SUME not in IDLE state"));
1019
1020	m = if_dequeue(ifp);
1021	if (m == NULL)
1022		return (EINVAL);
1023
1024	/* Packets large enough do not need to be padded */
1025	if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE)
1026		plen = m->m_pkthdr.len;
1027
1028	if (adapter->sume_debug)
1029		device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen,
1030		    SUME_ETH_DEVICE_NAME, nf_priv->unit);
1031
1032	outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc);
1033	mdata = (struct nf_metadata *) outbuf;
1034
1035	/* Clear the recovery flag. */
1036	send->recovery = 0;
1037
1038	/* Make sure we fit with the 16 bytes nf_metadata. */
1039	if (m->m_pkthdr.len + sizeof(struct nf_metadata) >
1040	    adapter->sg_buf_size) {
1041		device_printf(adapter->dev, "packet too big for bounce buffer "
1042		    "(%d)\n", m->m_pkthdr.len);
1043		m_freem(m);
1044		nf_priv->stats.tx_dropped++;
1045		return (ENOMEM);
1046	}
1047
1048	bus_dmamap_sync(send->ch_tag, send->ch_map,
1049	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1050
1051	/* Zero out the padded data */
1052	if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE)
1053		bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE);
1054	/* Skip the first 16 bytes for the metadata. */
1055	m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata));
1056	send->len = (sizeof(struct nf_metadata) + plen + 3) / 4;
1057
1058	/* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */
1059	mdata->sport = htole16(1 << (nf_priv->port * 2 + 1));
1060	mdata->dport = htole16(1 << (nf_priv->port * 2));
1061	mdata->plen = htole16(plen);
1062	mdata->magic = htole16(SUME_RIFFA_MAGIC);
1063	mdata->t1 = htole32(0);
1064	mdata->t2 = htole32(0);
1065
1066	/* Let the FPGA know about the transfer. */
1067	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1068	    RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST);
1069	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1070	    RIFFA_RX_LEN_REG_OFF), send->len);
1071
1072	/* Fill the bouncebuf "descriptor". */
1073	sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len));
1074
1075	/* Update the state before intiating the DMA to avoid races. */
1076	send->state = SUME_RIFFA_CHAN_STATE_READY;
1077
1078	/* DMA. */
1079	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1080	    RIFFA_RX_SG_ADDR_LO_REG_OFF),
1081	    SUME_RIFFA_LO_ADDR(send->buf_hw_addr));
1082	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1083	    RIFFA_RX_SG_ADDR_HI_REG_OFF),
1084	    SUME_RIFFA_HI_ADDR(send->buf_hw_addr));
1085	write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA,
1086	    RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg);
1087
1088	bus_dmamap_sync(send->ch_tag, send->ch_map,
1089	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1090
1091	nf_priv->stats.tx_packets++;
1092	nf_priv->stats.tx_bytes += plen;
1093
1094	/* We can free as long as we use the bounce buffer. */
1095	m_freem(m);
1096
1097	adapter->last_ifc = nf_priv->port;
1098
1099	/* Reset watchdog counter. */
1100	adapter->wd_counter = 0;
1101
1102	return (0);
1103}
1104
1105static void
1106sume_if_start(if_t ifp)
1107{
1108	struct nf_priv *nf_priv = if_getsoftc(ifp);
1109	struct sume_adapter *adapter = nf_priv->adapter;
1110
1111	if (!adapter->running || !(if_getflags(ifp) & IFF_UP))
1112		return;
1113
1114	SUME_LOCK(adapter);
1115	if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state ==
1116	    SUME_RIFFA_CHAN_STATE_IDLE)
1117		sume_if_start_locked(ifp);
1118	SUME_UNLOCK(adapter);
1119}
1120
1121/*
1122 * We call this function at the end of every TX transaction to check for
1123 * remaining packets in the TX queues for every UP interface.
1124 */
1125static void
1126check_tx_queues(struct sume_adapter *adapter)
1127{
1128	int i, last_ifc;
1129
1130	KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned"));
1131
1132	last_ifc = adapter->last_ifc;
1133
1134	/* Check all interfaces */
1135	for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) {
1136		if_t ifp = adapter->ifp[i % SUME_NPORTS];
1137
1138		if (!(if_getflags(ifp) & IFF_UP))
1139			continue;
1140
1141		if (!sume_if_start_locked(ifp))
1142			break;
1143	}
1144}
1145
1146static int
1147sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port)
1148{
1149	if_t ifp;
1150	struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME,
1151	    M_ZERO | M_WAITOK);
1152
1153	ifp = if_alloc(IFT_ETHER);
1154	if (ifp == NULL) {
1155		device_printf(adapter->dev, "cannot allocate ifnet\n");
1156		return (ENOMEM);
1157	}
1158
1159	adapter->ifp[port] = ifp;
1160	if_setsoftc(ifp, nf_priv);
1161
1162	nf_priv->adapter = adapter;
1163	nf_priv->unit = alloc_unr(unr);
1164	nf_priv->port = port;
1165	nf_priv->link_up = 0;
1166
1167	if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit);
1168	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1169
1170	if_setinitfn(ifp, sume_if_init);
1171	if_setstartfn(ifp, sume_if_start);
1172	if_setioctlfn(ifp, sume_if_ioctl);
1173
1174	uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS;
1175	hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit;
1176	ether_ifattach(ifp, hw_addr);
1177
1178	ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change,
1179	    sume_media_status);
1180	ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
1181	ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR);
1182
1183	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
1184
1185	return (0);
1186}
1187
1188static void
1189callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err)
1190{
1191	if (err)
1192		return;
1193
1194	KASSERT(nseg == 1, ("%d segments returned!", nseg));
1195
1196	*(bus_addr_t *) arg = segs[0].ds_addr;
1197}
1198
1199static int
1200sume_probe_riffa_buffer(const struct sume_adapter *adapter,
1201    struct riffa_chnl_dir ***p, const char *dir)
1202{
1203	struct riffa_chnl_dir **rp;
1204	bus_addr_t hw_addr;
1205	int error, ch;
1206	device_t dev = adapter->dev;
1207
1208	error = ENOMEM;
1209	*p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *),
1210	    M_SUME, M_ZERO | M_WAITOK);
1211	if (*p == NULL) {
1212		device_printf(dev, "malloc(%s) failed.\n", dir);
1213		return (error);
1214	}
1215
1216	rp = *p;
1217	/* Allocate the chnl_dir structs themselves. */
1218	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1219		/* One direction. */
1220		rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME,
1221		    M_ZERO | M_WAITOK);
1222		if (rp[ch] == NULL) {
1223			device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir "
1224			    "failed.\n", dir, ch);
1225			return (error);
1226		}
1227
1228		int err = bus_dma_tag_create(bus_get_dma_tag(dev),
1229		    4, 0,
1230		    BUS_SPACE_MAXADDR,
1231		    BUS_SPACE_MAXADDR,
1232		    NULL, NULL,
1233		    adapter->sg_buf_size,
1234		    1,
1235		    adapter->sg_buf_size,
1236		    0,
1237		    NULL,
1238		    NULL,
1239		    &rp[ch]->ch_tag);
1240
1241		if (err) {
1242			device_printf(dev, "bus_dma_tag_create(%s[%d]) "
1243			    "failed.\n", dir, ch);
1244			return (err);
1245		}
1246
1247		err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **)
1248		    &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT |
1249		    BUS_DMA_ZERO, &rp[ch]->ch_map);
1250		if (err) {
1251			device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n",
1252			    dir, ch);
1253			return (err);
1254		}
1255
1256		bzero(rp[ch]->buf_addr, adapter->sg_buf_size);
1257
1258		err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map,
1259		    rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma,
1260		    &hw_addr, BUS_DMA_NOWAIT);
1261		if (err) {
1262			device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n",
1263			    dir, ch);
1264			return (err);
1265		}
1266		rp[ch]->buf_hw_addr = hw_addr;
1267		rp[ch]->num_sg = 1;
1268		rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE;
1269
1270		rp[ch]->rtag = SUME_INIT_RTAG;
1271	}
1272
1273	return (0);
1274}
1275
1276static int
1277sume_probe_riffa_buffers(struct sume_adapter *adapter)
1278{
1279	int error;
1280
1281	error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv");
1282	if (error)
1283		return (error);
1284
1285	error = sume_probe_riffa_buffer(adapter, &adapter->send, "send");
1286
1287	return (error);
1288}
1289
1290static void
1291sume_sysctl_init(struct sume_adapter *adapter)
1292{
1293	device_t dev = adapter->dev;
1294	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
1295	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
1296	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
1297	struct sysctl_oid *tmp_tree;
1298	char namebuf[MAX_IFC_NAME_LEN];
1299	int i;
1300
1301	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW,
1302	    0, "SUME top-level tree");
1303	if (tree == NULL) {
1304		device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1305		return;
1306	}
1307	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW,
1308	    &adapter->sume_debug, 0, "debug int leaf");
1309
1310	/* total RX error stats */
1311	SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts",
1312	    CTLFLAG_RD, &adapter->packets_err, 0, "rx errors");
1313	SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes",
1314	    CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes");
1315
1316	for (i = SUME_NPORTS - 1; i >= 0; i--) {
1317		if_t ifp = adapter->ifp[i];
1318		if (ifp == NULL)
1319			continue;
1320
1321		struct nf_priv *nf_priv = if_getsoftc(ifp);
1322
1323		snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d",
1324		    SUME_ETH_DEVICE_NAME, nf_priv->unit);
1325		tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
1326		    CTLFLAG_RW, 0, "SUME ifc tree");
1327		if (tmp_tree == NULL) {
1328			device_printf(dev, "SYSCTL_ADD_NODE failed.\n");
1329			return;
1330		}
1331
1332		/* Packets dropped by down interface. */
1333		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1334		    "ifc_down_bytes", CTLFLAG_RD,
1335		    &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes");
1336		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1337		    "ifc_down_packets", CTLFLAG_RD,
1338		    &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets");
1339
1340		/* HW RX stats */
1341		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1342		    "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets,
1343		    0, "hw_rx packets");
1344
1345		/* HW TX stats */
1346		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1347		    "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets,
1348		    0, "hw_tx packets");
1349
1350		/* RX stats */
1351		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1352		    "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0,
1353		    "rx bytes");
1354		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1355		    "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0,
1356		    "rx dropped");
1357		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1358		    "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0,
1359		    "rx packets");
1360
1361		/* TX stats */
1362		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1363		    "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0,
1364		    "tx bytes");
1365		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1366		    "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0,
1367		    "tx dropped");
1368		SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO,
1369		    "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0,
1370		    "tx packets");
1371	}
1372}
1373
1374static void
1375sume_local_timer(void *arg)
1376{
1377	struct sume_adapter *adapter = arg;
1378
1379	if (!adapter->running)
1380		return;
1381
1382	taskqueue_enqueue(adapter->tq, &adapter->stat_task);
1383
1384	SUME_LOCK(adapter);
1385	if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state !=
1386	    SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) {
1387		/* Resetting interfaces if stuck for 3 seconds. */
1388		device_printf(adapter->dev, "TX stuck, resetting adapter.\n");
1389		read_reg(adapter, RIFFA_INFO_REG_OFF);
1390
1391		adapter->send[SUME_RIFFA_CHANNEL_DATA]->state =
1392		    SUME_RIFFA_CHAN_STATE_IDLE;
1393		adapter->wd_counter = 0;
1394
1395		check_tx_queues(adapter);
1396	}
1397	SUME_UNLOCK(adapter);
1398
1399	callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1400}
1401
1402static void
1403sume_get_stats(void *context, int pending)
1404{
1405	struct sume_adapter *adapter = context;
1406	int i;
1407
1408	for (i = 0; i < SUME_NPORTS; i++) {
1409		if_t ifp = adapter->ifp[i];
1410
1411		if (if_getflags(ifp) & IFF_UP) {
1412			struct nf_priv *nf_priv = if_getsoftc(ifp);
1413			struct sume_ifreq sifr;
1414
1415			sume_update_link_status(ifp);
1416
1417			/* Get RX counter. */
1418			sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port);
1419			sifr.val = 0;
1420
1421			if (!get_modreg_value(nf_priv, &sifr))
1422				nf_priv->stats.hw_rx_packets += sifr.val;
1423
1424			/* Get TX counter. */
1425			sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port);
1426			sifr.val = 0;
1427
1428			if (!get_modreg_value(nf_priv, &sifr))
1429				nf_priv->stats.hw_tx_packets += sifr.val;
1430		}
1431	}
1432}
1433
1434static int
1435sume_attach(device_t dev)
1436{
1437	struct sume_adapter *adapter = device_get_softc(dev);
1438	adapter->dev = dev;
1439	int error, i;
1440
1441	mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF);
1442
1443	adapter->running = 0;
1444
1445	/* OK finish up RIFFA. */
1446	error = sume_probe_riffa_pci(adapter);
1447	if (error != 0)
1448		goto error;
1449
1450	error = sume_probe_riffa_buffers(adapter);
1451	if (error != 0)
1452		goto error;
1453
1454	/* Now do the network interfaces. */
1455	for (i = 0; i < SUME_NPORTS; i++) {
1456		error = sume_ifp_alloc(adapter, i);
1457		if (error != 0)
1458			goto error;
1459	}
1460
1461	/*  Register stats and register sysctls. */
1462	sume_sysctl_init(adapter);
1463
1464	/* Reset the HW. */
1465	read_reg(adapter, RIFFA_INFO_REG_OFF);
1466
1467	/* Ready to go, "enable" IRQ. */
1468	adapter->running = 1;
1469
1470	callout_init(&adapter->timer, 1);
1471	TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter);
1472
1473	adapter->tq = taskqueue_create("sume_stats", M_NOWAIT,
1474	    taskqueue_thread_enqueue, &adapter->tq);
1475	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq",
1476	    device_get_nameunit(adapter->dev));
1477
1478	callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter);
1479
1480	return (0);
1481
1482error:
1483	sume_detach(dev);
1484
1485	return (error);
1486}
1487
1488static void
1489sume_remove_riffa_buffer(const struct sume_adapter *adapter,
1490    struct riffa_chnl_dir **pp)
1491{
1492	int ch;
1493
1494	for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) {
1495		if (pp[ch] == NULL)
1496			continue;
1497
1498		if (pp[ch]->buf_hw_addr != 0) {
1499			bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr,
1500			    pp[ch]->ch_map);
1501			pp[ch]->buf_hw_addr = 0;
1502		}
1503
1504		free(pp[ch], M_SUME);
1505	}
1506}
1507
1508static void
1509sume_remove_riffa_buffers(struct sume_adapter *adapter)
1510{
1511	if (adapter->send != NULL) {
1512		sume_remove_riffa_buffer(adapter, adapter->send);
1513		free(adapter->send, M_SUME);
1514		adapter->send = NULL;
1515	}
1516	if (adapter->recv != NULL) {
1517		sume_remove_riffa_buffer(adapter, adapter->recv);
1518		free(adapter->recv, M_SUME);
1519		adapter->recv = NULL;
1520	}
1521}
1522
1523static int
1524sume_detach(device_t dev)
1525{
1526	struct sume_adapter *adapter = device_get_softc(dev);
1527	int i;
1528	struct nf_priv *nf_priv;
1529
1530	KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not "
1531	    "initialized"));
1532	adapter->running = 0;
1533
1534	/* Drain the stats callout and task queue. */
1535	callout_drain(&adapter->timer);
1536
1537	if (adapter->tq) {
1538		taskqueue_drain(adapter->tq, &adapter->stat_task);
1539		taskqueue_free(adapter->tq);
1540	}
1541
1542	for (i = 0; i < SUME_NPORTS; i++) {
1543		if_t ifp = adapter->ifp[i];
1544		if (ifp == NULL)
1545			continue;
1546
1547		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1548		nf_priv = if_getsoftc(ifp);
1549
1550		if (if_getflags(ifp) & IFF_UP)
1551			if_down(ifp);
1552		ifmedia_removeall(&nf_priv->media);
1553		free_unr(unr, nf_priv->unit);
1554
1555		if_setflagbits(ifp, 0, IFF_UP);
1556		ether_ifdetach(ifp);
1557		if_free(ifp);
1558
1559		free(nf_priv, M_SUME);
1560	}
1561
1562	sume_remove_riffa_buffers(adapter);
1563
1564	if (adapter->irq.tag)
1565		bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag);
1566	if (adapter->irq.res)
1567		bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid,
1568		    adapter->irq.res);
1569
1570	pci_release_msi(dev);
1571
1572	if (adapter->bar0_addr)
1573		bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid,
1574		    adapter->bar0_addr);
1575
1576	mtx_destroy(&adapter->lock);
1577
1578	return (0);
1579}
1580
1581static int
1582mod_event(module_t mod, int cmd, void *arg)
1583{
1584	switch (cmd) {
1585	case MOD_LOAD:
1586		unr = new_unrhdr(0, INT_MAX, NULL);
1587		break;
1588
1589	case MOD_UNLOAD:
1590		delete_unrhdr(unr);
1591		break;
1592	}
1593
1594	return (0);
1595}
1596
1597DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL);
1598MODULE_VERSION(sume, 1);
1599