1/*-
2 * Copyright (c) 2010-2015 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * This software was developed in part by Philip Paeps under contract for
6 * Solarflare Communications, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 *    this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * The views and conclusions contained in the software and documentation are
30 * those of the authors and should not be interpreted as representing official
31 * policies, either expressed or implied, of the FreeBSD Project.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: releng/10.3/sys/dev/sfxge/sfxge.c 295526 2016-02-11 16:39:30Z arybchik $");
36
37#include <sys/param.h>
38#include <sys/kernel.h>
39#include <sys/bus.h>
40#include <sys/rman.h>
41#include <sys/lock.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/smp.h>
45#include <sys/socket.h>
46#include <sys/taskqueue.h>
47#include <sys/sockio.h>
48#include <sys/sysctl.h>
49#include <sys/priv.h>
50#include <sys/syslog.h>
51
52#include <dev/pci/pcireg.h>
53#include <dev/pci/pcivar.h>
54
55#include <net/ethernet.h>
56#include <net/if.h>
57#include <net/if_media.h>
58#include <net/if_types.h>
59
60#include "common/efx.h"
61
62#include "sfxge.h"
63#include "sfxge_rx.h"
64#include "sfxge_ioc.h"
65#include "sfxge_version.h"
66
67#define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
68		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
69		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
70		   IFCAP_TSO4 | IFCAP_TSO6 |				\
71		   IFCAP_JUMBO_MTU |					\
72		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
73#define	SFXGE_CAP_ENABLE SFXGE_CAP
74#define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
75			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
76
77MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
78
79
80SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
81	    "SFXGE driver parameters");
82
83#define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
84static int sfxge_rx_ring_entries = SFXGE_NDESCS;
85TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
86SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
87	   &sfxge_rx_ring_entries, 0,
88	   "Maximum number of descriptors in a receive ring");
89
90#define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
91static int sfxge_tx_ring_entries = SFXGE_NDESCS;
92TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
93SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
94	   &sfxge_tx_ring_entries, 0,
95	   "Maximum number of descriptors in a transmit ring");
96
97#define	SFXGE_PARAM_STATS_UPDATE_PERIOD	SFXGE_PARAM(stats_update_period)
98static int sfxge_stats_update_period = SFXGE_CALLOUT_TICKS;
99TUNABLE_INT(SFXGE_PARAM_STATS_UPDATE_PERIOD,
100	    &sfxge_stats_update_period);
101SYSCTL_INT(_hw_sfxge, OID_AUTO, stats_update_period, CTLFLAG_RDTUN,
102	   &sfxge_stats_update_period, 0,
103	   "netstat interface statistics update period in ticks");
104
105#define	SFXGE_PARAM_RESTART_ATTEMPTS	SFXGE_PARAM(restart_attempts)
106static int sfxge_restart_attempts = 3;
107TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
108SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
109	   &sfxge_restart_attempts, 0,
110	   "Maximum number of attempts to bring interface up after reset");
111
112#if EFSYS_OPT_MCDI_LOGGING
113#define	SFXGE_PARAM_MCDI_LOGGING	SFXGE_PARAM(mcdi_logging)
114static int sfxge_mcdi_logging = 0;
115TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
116#endif
117
118static void
119sfxge_reset(void *arg, int npending);
120
121static int
122sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
123{
124	efx_drv_limits_t limits;
125	int rc;
126	unsigned int evq_max;
127	uint32_t evq_allocated;
128	uint32_t rxq_allocated;
129	uint32_t txq_allocated;
130
131	/*
132	 * Limit the number of event queues to:
133	 *  - number of CPUs
134	 *  - hardwire maximum RSS channels
135	 *  - administratively specified maximum RSS channels
136	 */
137	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
138	if (sc->max_rss_channels > 0)
139		evq_max = MIN(evq_max, sc->max_rss_channels);
140
141	memset(&limits, 0, sizeof(limits));
142
143	limits.edl_min_evq_count = 1;
144	limits.edl_max_evq_count = evq_max;
145	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
146	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
147	limits.edl_min_rxq_count = 1;
148	limits.edl_max_rxq_count = evq_max;
149
150	efx_nic_set_drv_limits(sc->enp, &limits);
151
152	if ((rc = efx_nic_init(sc->enp)) != 0)
153		return (rc);
154
155	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
156				 &txq_allocated);
157	if (rc != 0) {
158		efx_nic_fini(sc->enp);
159		return (rc);
160	}
161
162	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
163		("txq_allocated < SFXGE_TXQ_NTYPES"));
164
165	sc->evq_max = MIN(evq_allocated, evq_max);
166	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
167	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
168			  sc->evq_max);
169
170	KASSERT(sc->evq_max <= evq_max,
171		("allocated more than maximum requested"));
172
173	/*
174	 * NIC is kept initialized in the case of success to be able to
175	 * initialize port to find out media types.
176	 */
177	return (0);
178}
179
180static int
181sfxge_set_drv_limits(struct sfxge_softc *sc)
182{
183	efx_drv_limits_t limits;
184
185	memset(&limits, 0, sizeof(limits));
186
187	/* Limits are strict since take into account initial estimation */
188	limits.edl_min_evq_count = limits.edl_max_evq_count =
189	    sc->intr.n_alloc;
190	limits.edl_min_txq_count = limits.edl_max_txq_count =
191	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
192	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
193	    sc->intr.n_alloc;
194
195	return (efx_nic_set_drv_limits(sc->enp, &limits));
196}
197
198static int
199sfxge_start(struct sfxge_softc *sc)
200{
201	int rc;
202
203	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
204
205	if (sc->init_state == SFXGE_STARTED)
206		return (0);
207
208	if (sc->init_state != SFXGE_REGISTERED) {
209		rc = EINVAL;
210		goto fail;
211	}
212
213	/* Set required resource limits */
214	if ((rc = sfxge_set_drv_limits(sc)) != 0)
215		goto fail;
216
217	if ((rc = efx_nic_init(sc->enp)) != 0)
218		goto fail;
219
220	/* Start processing interrupts. */
221	if ((rc = sfxge_intr_start(sc)) != 0)
222		goto fail2;
223
224	/* Start processing events. */
225	if ((rc = sfxge_ev_start(sc)) != 0)
226		goto fail3;
227
228	/* Fire up the port. */
229	if ((rc = sfxge_port_start(sc)) != 0)
230		goto fail4;
231
232	/* Start the receiver side. */
233	if ((rc = sfxge_rx_start(sc)) != 0)
234		goto fail5;
235
236	/* Start the transmitter side. */
237	if ((rc = sfxge_tx_start(sc)) != 0)
238		goto fail6;
239
240	sc->init_state = SFXGE_STARTED;
241
242	/* Tell the stack we're running. */
243	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
244	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
245
246	return (0);
247
248fail6:
249	sfxge_rx_stop(sc);
250
251fail5:
252	sfxge_port_stop(sc);
253
254fail4:
255	sfxge_ev_stop(sc);
256
257fail3:
258	sfxge_intr_stop(sc);
259
260fail2:
261	efx_nic_fini(sc->enp);
262
263fail:
264	device_printf(sc->dev, "sfxge_start: %d\n", rc);
265
266	return (rc);
267}
268
269static void
270sfxge_if_init(void *arg)
271{
272	struct sfxge_softc *sc;
273
274	sc = (struct sfxge_softc *)arg;
275
276	SFXGE_ADAPTER_LOCK(sc);
277	(void)sfxge_start(sc);
278	SFXGE_ADAPTER_UNLOCK(sc);
279}
280
281static void
282sfxge_stop(struct sfxge_softc *sc)
283{
284	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
285
286	if (sc->init_state != SFXGE_STARTED)
287		return;
288
289	sc->init_state = SFXGE_REGISTERED;
290
291	/* Stop the transmitter. */
292	sfxge_tx_stop(sc);
293
294	/* Stop the receiver. */
295	sfxge_rx_stop(sc);
296
297	/* Stop the port. */
298	sfxge_port_stop(sc);
299
300	/* Stop processing events. */
301	sfxge_ev_stop(sc);
302
303	/* Stop processing interrupts. */
304	sfxge_intr_stop(sc);
305
306	efx_nic_fini(sc->enp);
307
308	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
309}
310
311
312static int
313sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
314{
315	efx_vpd_value_t value;
316	int rc = 0;
317
318	switch (ioc->u.vpd.op) {
319	case SFXGE_VPD_OP_GET_KEYWORD:
320		value.evv_tag = ioc->u.vpd.tag;
321		value.evv_keyword = ioc->u.vpd.keyword;
322		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
323		if (rc != 0)
324			break;
325		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
326		if (ioc->u.vpd.payload != 0) {
327			rc = copyout(value.evv_value, ioc->u.vpd.payload,
328				     ioc->u.vpd.len);
329		}
330		break;
331	case SFXGE_VPD_OP_SET_KEYWORD:
332		if (ioc->u.vpd.len > sizeof(value.evv_value))
333			return (EINVAL);
334		value.evv_tag = ioc->u.vpd.tag;
335		value.evv_keyword = ioc->u.vpd.keyword;
336		value.evv_length = ioc->u.vpd.len;
337		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
338		if (rc != 0)
339			break;
340		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
341		if (rc != 0)
342			break;
343		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
344		if (rc != 0)
345			break;
346		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
347		break;
348	default:
349		rc = EOPNOTSUPP;
350		break;
351	}
352
353	return (rc);
354}
355
356static int
357sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
358{
359	switch (ioc->op) {
360	case SFXGE_MCDI_IOC:
361		return (sfxge_mcdi_ioctl(sc, ioc));
362	case SFXGE_NVRAM_IOC:
363		return (sfxge_nvram_ioctl(sc, ioc));
364	case SFXGE_VPD_IOC:
365		return (sfxge_vpd_ioctl(sc, ioc));
366	default:
367		return (EOPNOTSUPP);
368	}
369}
370
371
372static int
373sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
374{
375	struct sfxge_softc *sc;
376	struct ifreq *ifr;
377	sfxge_ioc_t ioc;
378	int error;
379
380	ifr = (struct ifreq *)data;
381	sc = ifp->if_softc;
382	error = 0;
383
384	switch (command) {
385	case SIOCSIFFLAGS:
386		SFXGE_ADAPTER_LOCK(sc);
387		if (ifp->if_flags & IFF_UP) {
388			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
389				if ((ifp->if_flags ^ sc->if_flags) &
390				    (IFF_PROMISC | IFF_ALLMULTI)) {
391					sfxge_mac_filter_set(sc);
392				}
393			} else
394				sfxge_start(sc);
395		} else
396			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
397				sfxge_stop(sc);
398		sc->if_flags = ifp->if_flags;
399		SFXGE_ADAPTER_UNLOCK(sc);
400		break;
401	case SIOCSIFMTU:
402		if (ifr->ifr_mtu == ifp->if_mtu) {
403			/* Nothing to do */
404			error = 0;
405		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
406			error = EINVAL;
407		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
408			ifp->if_mtu = ifr->ifr_mtu;
409			error = 0;
410		} else {
411			/* Restart required */
412			SFXGE_ADAPTER_LOCK(sc);
413			sfxge_stop(sc);
414			ifp->if_mtu = ifr->ifr_mtu;
415			error = sfxge_start(sc);
416			SFXGE_ADAPTER_UNLOCK(sc);
417			if (error != 0) {
418				ifp->if_flags &= ~IFF_UP;
419				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
420				if_down(ifp);
421			}
422		}
423		break;
424	case SIOCADDMULTI:
425	case SIOCDELMULTI:
426		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
427			sfxge_mac_filter_set(sc);
428		break;
429	case SIOCSIFCAP:
430	{
431		int reqcap = ifr->ifr_reqcap;
432		int capchg_mask;
433
434		SFXGE_ADAPTER_LOCK(sc);
435
436		/* Capabilities to be changed in accordance with request */
437		capchg_mask = ifp->if_capenable ^ reqcap;
438
439		/*
440		 * The networking core already rejects attempts to
441		 * enable capabilities we don't have.  We still have
442		 * to reject attempts to disable capabilities that we
443		 * can't (yet) disable.
444		 */
445		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
446		    ("Unsupported capabilities 0x%x requested 0x%x vs "
447		     "supported 0x%x",
448		     reqcap & ~ifp->if_capabilities,
449		     reqcap , ifp->if_capabilities));
450		if (capchg_mask & SFXGE_CAP_FIXED) {
451			error = EINVAL;
452			SFXGE_ADAPTER_UNLOCK(sc);
453			break;
454		}
455
456		/* Check request before any changes */
457		if ((capchg_mask & IFCAP_TSO4) &&
458		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
459			error = EAGAIN;
460			SFXGE_ADAPTER_UNLOCK(sc);
461			if_printf(ifp, "enable txcsum before tso4\n");
462			break;
463		}
464		if ((capchg_mask & IFCAP_TSO6) &&
465		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
466			error = EAGAIN;
467			SFXGE_ADAPTER_UNLOCK(sc);
468			if_printf(ifp, "enable txcsum6 before tso6\n");
469			break;
470		}
471
472		if (reqcap & IFCAP_TXCSUM) {
473			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
474		} else {
475			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
476			if (reqcap & IFCAP_TSO4) {
477				reqcap &= ~IFCAP_TSO4;
478				if_printf(ifp,
479				    "tso4 disabled due to -txcsum\n");
480			}
481		}
482		if (reqcap & IFCAP_TXCSUM_IPV6) {
483			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
484		} else {
485			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
486			if (reqcap & IFCAP_TSO6) {
487				reqcap &= ~IFCAP_TSO6;
488				if_printf(ifp,
489				    "tso6 disabled due to -txcsum6\n");
490			}
491		}
492
493		/*
494		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
495		 * account before using TSO. So, we do not touch
496		 * checksum flags when IFCAP_TSOx is modified.
497		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
498		 * but both bits are set in IPv4 and IPv6 mbufs.
499		 */
500
501		ifp->if_capenable = reqcap;
502
503		SFXGE_ADAPTER_UNLOCK(sc);
504		break;
505	}
506	case SIOCSIFMEDIA:
507	case SIOCGIFMEDIA:
508		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
509		break;
510#ifdef SIOCGI2C
511	case SIOCGI2C:
512	{
513		struct ifi2creq i2c;
514
515		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
516		if (error != 0)
517			break;
518
519		if (i2c.len > sizeof(i2c.data)) {
520			error = EINVAL;
521			break;
522		}
523
524		SFXGE_ADAPTER_LOCK(sc);
525		error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
526						i2c.offset, i2c.len,
527						&i2c.data[0]);
528		SFXGE_ADAPTER_UNLOCK(sc);
529		if (error == 0)
530			error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
531		break;
532	}
533#endif
534	case SIOCGPRIVATE_0:
535		error = priv_check(curthread, PRIV_DRIVER);
536		if (error != 0)
537			break;
538		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
539		if (error != 0)
540			return (error);
541		error = sfxge_private_ioctl(sc, &ioc);
542		if (error == 0) {
543			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
544		}
545		break;
546	default:
547		error = ether_ioctl(ifp, command, data);
548	}
549
550	return (error);
551}
552
553static void
554sfxge_tick(void *arg)
555{
556	struct sfxge_softc *sc = arg;
557
558	sfxge_port_update_stats(sc);
559	sfxge_tx_update_stats(sc);
560
561	callout_reset(&sc->tick_callout, sfxge_stats_update_period,
562		      sfxge_tick, sc);
563}
564
565static void
566sfxge_ifnet_fini(struct ifnet *ifp)
567{
568	struct sfxge_softc *sc = ifp->if_softc;
569
570	callout_drain(&sc->tick_callout);
571
572	SFXGE_ADAPTER_LOCK(sc);
573	sfxge_stop(sc);
574	SFXGE_ADAPTER_UNLOCK(sc);
575
576	ifmedia_removeall(&sc->media);
577	ether_ifdetach(ifp);
578	if_free(ifp);
579}
580
581static int
582sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
583{
584	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
585	device_t dev;
586	int rc;
587
588	dev = sc->dev;
589	sc->ifnet = ifp;
590
591	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
592	ifp->if_init = sfxge_if_init;
593	ifp->if_softc = sc;
594	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
595	ifp->if_ioctl = sfxge_if_ioctl;
596
597	ifp->if_capabilities = SFXGE_CAP;
598	ifp->if_capenable = SFXGE_CAP_ENABLE;
599	ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
600	ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
601	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
602
603#ifdef SFXGE_LRO
604	ifp->if_capabilities |= IFCAP_LRO;
605	ifp->if_capenable |= IFCAP_LRO;
606#endif
607
608	if (encp->enc_hw_tx_insert_vlan_enabled) {
609		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
610		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
611	}
612	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
613			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
614
615	ether_ifattach(ifp, encp->enc_mac_addr);
616
617	ifp->if_transmit = sfxge_if_transmit;
618	ifp->if_qflush = sfxge_if_qflush;
619
620	callout_init(&sc->tick_callout, B_TRUE);
621
622	DBGPRINT(sc->dev, "ifmedia_init");
623	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
624		goto fail;
625
626	callout_reset(&sc->tick_callout, sfxge_stats_update_period,
627		      sfxge_tick, sc);
628
629	return (0);
630
631fail:
632	ether_ifdetach(sc->ifnet);
633	return (rc);
634}
635
636void
637sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
638{
639	KASSERT(sc->buffer_table_next + n <=
640		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
641		("buffer table full"));
642
643	*idp = sc->buffer_table_next;
644	sc->buffer_table_next += n;
645}
646
647static int
648sfxge_bar_init(struct sfxge_softc *sc)
649{
650	efsys_bar_t *esbp = &sc->bar;
651
652	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
653	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
654	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
655		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
656		    EFX_MEM_BAR);
657		return (ENXIO);
658	}
659	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
660	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
661
662	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
663
664	return (0);
665}
666
667static void
668sfxge_bar_fini(struct sfxge_softc *sc)
669{
670	efsys_bar_t *esbp = &sc->bar;
671
672	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
673	    esbp->esb_res);
674	SFXGE_BAR_LOCK_DESTROY(esbp);
675}
676
677static int
678sfxge_create(struct sfxge_softc *sc)
679{
680	device_t dev;
681	efx_nic_t *enp;
682	int error;
683	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
684#if EFSYS_OPT_MCDI_LOGGING
685	char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
686#endif
687
688	dev = sc->dev;
689
690	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
691
692	sc->max_rss_channels = 0;
693	snprintf(rss_param_name, sizeof(rss_param_name),
694		 SFXGE_PARAM(%d.max_rss_channels),
695		 (int)device_get_unit(dev));
696	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
697#if EFSYS_OPT_MCDI_LOGGING
698	sc->mcdi_logging = sfxge_mcdi_logging;
699	snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
700		 SFXGE_PARAM(%d.mcdi_logging),
701		 (int)device_get_unit(dev));
702	TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
703#endif
704
705	sc->stats_node = SYSCTL_ADD_NODE(
706		device_get_sysctl_ctx(dev),
707		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
708		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
709	if (sc->stats_node == NULL) {
710		error = ENOMEM;
711		goto fail;
712	}
713
714	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
715
716	(void) pci_enable_busmaster(dev);
717
718	/* Initialize DMA mappings. */
719	DBGPRINT(sc->dev, "dma_init...");
720	if ((error = sfxge_dma_init(sc)) != 0)
721		goto fail;
722
723	/* Map the device registers. */
724	DBGPRINT(sc->dev, "bar_init...");
725	if ((error = sfxge_bar_init(sc)) != 0)
726		goto fail;
727
728	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
729	    &sc->family);
730	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
731
732	DBGPRINT(sc->dev, "nic_create...");
733
734	/* Create the common code nic object. */
735	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
736			      device_get_nameunit(sc->dev), "nic");
737	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
738	    &sc->bar, &sc->enp_lock, &enp)) != 0)
739		goto fail3;
740	sc->enp = enp;
741
742	if (!ISP2(sfxge_rx_ring_entries) ||
743	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
744	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
745		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
746		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
747		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
748		error = EINVAL;
749		goto fail_rx_ring_entries;
750	}
751	sc->rxq_entries = sfxge_rx_ring_entries;
752
753	if (!ISP2(sfxge_tx_ring_entries) ||
754	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
755	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
756		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
757		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
758		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
759		error = EINVAL;
760		goto fail_tx_ring_entries;
761	}
762	sc->txq_entries = sfxge_tx_ring_entries;
763
764	/* Initialize MCDI to talk to the microcontroller. */
765	DBGPRINT(sc->dev, "mcdi_init...");
766	if ((error = sfxge_mcdi_init(sc)) != 0)
767		goto fail4;
768
769	/* Probe the NIC and build the configuration data area. */
770	DBGPRINT(sc->dev, "nic_probe...");
771	if ((error = efx_nic_probe(enp)) != 0)
772		goto fail5;
773
774	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
775			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
776			  OID_AUTO, "version", CTLFLAG_RD,
777			  SFXGE_VERSION_STRING, 0,
778			  "Driver version");
779
780	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
781			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
782			OID_AUTO, "phy_type", CTLFLAG_RD,
783			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
784			"PHY type");
785
786	/* Initialize the NVRAM. */
787	DBGPRINT(sc->dev, "nvram_init...");
788	if ((error = efx_nvram_init(enp)) != 0)
789		goto fail6;
790
791	/* Initialize the VPD. */
792	DBGPRINT(sc->dev, "vpd_init...");
793	if ((error = efx_vpd_init(enp)) != 0)
794		goto fail7;
795
796	efx_mcdi_new_epoch(enp);
797
798	/* Reset the NIC. */
799	DBGPRINT(sc->dev, "nic_reset...");
800	if ((error = efx_nic_reset(enp)) != 0)
801		goto fail8;
802
803	/* Initialize buffer table allocation. */
804	sc->buffer_table_next = 0;
805
806	/*
807	 * Guarantee minimum and estimate maximum number of event queues
808	 * to take it into account when MSI-X interrupts are allocated.
809	 * It initializes NIC and keeps it initialized on success.
810	 */
811	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
812		goto fail8;
813
814	/* Set up interrupts. */
815	DBGPRINT(sc->dev, "intr_init...");
816	if ((error = sfxge_intr_init(sc)) != 0)
817		goto fail9;
818
819	/* Initialize event processing state. */
820	DBGPRINT(sc->dev, "ev_init...");
821	if ((error = sfxge_ev_init(sc)) != 0)
822		goto fail11;
823
824	/* Initialize port state. */
825	DBGPRINT(sc->dev, "port_init...");
826	if ((error = sfxge_port_init(sc)) != 0)
827		goto fail12;
828
829	/* Initialize receive state. */
830	DBGPRINT(sc->dev, "rx_init...");
831	if ((error = sfxge_rx_init(sc)) != 0)
832		goto fail13;
833
834	/* Initialize transmit state. */
835	DBGPRINT(sc->dev, "tx_init...");
836	if ((error = sfxge_tx_init(sc)) != 0)
837		goto fail14;
838
839	sc->init_state = SFXGE_INITIALIZED;
840
841	DBGPRINT(sc->dev, "success");
842	return (0);
843
844fail14:
845	sfxge_rx_fini(sc);
846
847fail13:
848	sfxge_port_fini(sc);
849
850fail12:
851	sfxge_ev_fini(sc);
852
853fail11:
854	sfxge_intr_fini(sc);
855
856fail9:
857	efx_nic_fini(sc->enp);
858
859fail8:
860	efx_vpd_fini(enp);
861
862fail7:
863	efx_nvram_fini(enp);
864
865fail6:
866	efx_nic_unprobe(enp);
867
868fail5:
869	sfxge_mcdi_fini(sc);
870
871fail4:
872fail_tx_ring_entries:
873fail_rx_ring_entries:
874	sc->enp = NULL;
875	efx_nic_destroy(enp);
876	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
877
878fail3:
879	sfxge_bar_fini(sc);
880	(void) pci_disable_busmaster(sc->dev);
881
882fail:
883	DBGPRINT(sc->dev, "failed %d", error);
884	sc->dev = NULL;
885	SFXGE_ADAPTER_LOCK_DESTROY(sc);
886	return (error);
887}
888
889static void
890sfxge_destroy(struct sfxge_softc *sc)
891{
892	efx_nic_t *enp;
893
894	/* Clean up transmit state. */
895	sfxge_tx_fini(sc);
896
897	/* Clean up receive state. */
898	sfxge_rx_fini(sc);
899
900	/* Clean up port state. */
901	sfxge_port_fini(sc);
902
903	/* Clean up event processing state. */
904	sfxge_ev_fini(sc);
905
906	/* Clean up interrupts. */
907	sfxge_intr_fini(sc);
908
909	/* Tear down common code subsystems. */
910	efx_nic_reset(sc->enp);
911	efx_vpd_fini(sc->enp);
912	efx_nvram_fini(sc->enp);
913	efx_nic_unprobe(sc->enp);
914
915	/* Tear down MCDI. */
916	sfxge_mcdi_fini(sc);
917
918	/* Destroy common code context. */
919	enp = sc->enp;
920	sc->enp = NULL;
921	efx_nic_destroy(enp);
922
923	/* Free DMA memory. */
924	sfxge_dma_fini(sc);
925
926	/* Free mapped BARs. */
927	sfxge_bar_fini(sc);
928
929	(void) pci_disable_busmaster(sc->dev);
930
931	taskqueue_drain(taskqueue_thread, &sc->task_reset);
932
933	/* Destroy the softc lock. */
934	SFXGE_ADAPTER_LOCK_DESTROY(sc);
935}
936
937static int
938sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
939{
940	struct sfxge_softc *sc = arg1;
941	efx_vpd_value_t value;
942	int rc;
943
944	value.evv_tag = arg2 >> 16;
945	value.evv_keyword = arg2 & 0xffff;
946	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
947	    != 0)
948		return (rc);
949
950	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
951}
952
953static void
954sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
955		  efx_vpd_tag_t tag, const char *keyword)
956{
957	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
958	efx_vpd_value_t value;
959
960	/* Check whether VPD tag/keyword is present */
961	value.evv_tag = tag;
962	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
963	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
964		return;
965
966	SYSCTL_ADD_PROC(
967		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
968		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
969		sfxge_vpd_handler, "A", "");
970}
971
972static int
973sfxge_vpd_init(struct sfxge_softc *sc)
974{
975	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
976	struct sysctl_oid *vpd_node;
977	struct sysctl_oid_list *vpd_list;
978	char keyword[3];
979	efx_vpd_value_t value;
980	int rc;
981
982	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
983		/*
984		 * Unpriviledged functions deny VPD access.
985		 * Simply skip VPD in this case.
986		 */
987		if (rc == EACCES)
988			goto done;
989		goto fail;
990	}
991	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
992	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
993		goto fail2;
994
995	/* Copy ID (product name) into device description, and log it. */
996	value.evv_tag = EFX_VPD_ID;
997	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
998		value.evv_value[value.evv_length] = 0;
999		device_set_desc_copy(sc->dev, value.evv_value);
1000		device_printf(sc->dev, "%s\n", value.evv_value);
1001	}
1002
1003	vpd_node = SYSCTL_ADD_NODE(
1004		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1005		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
1006	vpd_list = SYSCTL_CHILDREN(vpd_node);
1007
1008	/* Add sysctls for all expected and any vendor-defined keywords. */
1009	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1010	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1011	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1012	keyword[0] = 'V';
1013	keyword[2] = 0;
1014	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1015		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1016	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1017		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1018
1019done:
1020	return (0);
1021
1022fail2:
1023	free(sc->vpd_data, M_SFXGE);
1024fail:
1025	return (rc);
1026}
1027
1028static void
1029sfxge_vpd_fini(struct sfxge_softc *sc)
1030{
1031	free(sc->vpd_data, M_SFXGE);
1032}
1033
1034static void
1035sfxge_reset(void *arg, int npending)
1036{
1037	struct sfxge_softc *sc;
1038	int rc;
1039	unsigned attempt;
1040
1041	(void)npending;
1042
1043	sc = (struct sfxge_softc *)arg;
1044
1045	SFXGE_ADAPTER_LOCK(sc);
1046
1047	if (sc->init_state != SFXGE_STARTED)
1048		goto done;
1049
1050	sfxge_stop(sc);
1051	efx_nic_reset(sc->enp);
1052	for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1053		if ((rc = sfxge_start(sc)) == 0)
1054			goto done;
1055
1056		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1057		DELAY(100000);
1058	}
1059
1060	device_printf(sc->dev, "reset failed; interface is now stopped\n");
1061
1062done:
1063	SFXGE_ADAPTER_UNLOCK(sc);
1064}
1065
1066void
1067sfxge_schedule_reset(struct sfxge_softc *sc)
1068{
1069	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1070}
1071
1072static int
1073sfxge_attach(device_t dev)
1074{
1075	struct sfxge_softc *sc;
1076	struct ifnet *ifp;
1077	int error;
1078
1079	sc = device_get_softc(dev);
1080	sc->dev = dev;
1081
1082	/* Allocate ifnet. */
1083	ifp = if_alloc(IFT_ETHER);
1084	if (ifp == NULL) {
1085		device_printf(dev, "Couldn't allocate ifnet\n");
1086		error = ENOMEM;
1087		goto fail;
1088	}
1089	sc->ifnet = ifp;
1090
1091	/* Initialize hardware. */
1092	DBGPRINT(sc->dev, "create nic");
1093	if ((error = sfxge_create(sc)) != 0)
1094		goto fail2;
1095
1096	/* Create the ifnet for the port. */
1097	DBGPRINT(sc->dev, "init ifnet");
1098	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1099		goto fail3;
1100
1101	DBGPRINT(sc->dev, "init vpd");
1102	if ((error = sfxge_vpd_init(sc)) != 0)
1103		goto fail4;
1104
1105	/*
1106	 * NIC is initialized inside sfxge_create() and kept inialized
1107	 * to be able to initialize port to discover media types in
1108	 * sfxge_ifnet_init().
1109	 */
1110	efx_nic_fini(sc->enp);
1111
1112	sc->init_state = SFXGE_REGISTERED;
1113
1114	DBGPRINT(sc->dev, "success");
1115	return (0);
1116
1117fail4:
1118	sfxge_ifnet_fini(ifp);
1119fail3:
1120	efx_nic_fini(sc->enp);
1121	sfxge_destroy(sc);
1122
1123fail2:
1124	if_free(sc->ifnet);
1125
1126fail:
1127	DBGPRINT(sc->dev, "failed %d", error);
1128	return (error);
1129}
1130
1131static int
1132sfxge_detach(device_t dev)
1133{
1134	struct sfxge_softc *sc;
1135
1136	sc = device_get_softc(dev);
1137
1138	sfxge_vpd_fini(sc);
1139
1140	/* Destroy the ifnet. */
1141	sfxge_ifnet_fini(sc->ifnet);
1142
1143	/* Tear down hardware. */
1144	sfxge_destroy(sc);
1145
1146	return (0);
1147}
1148
1149static int
1150sfxge_probe(device_t dev)
1151{
1152	uint16_t pci_vendor_id;
1153	uint16_t pci_device_id;
1154	efx_family_t family;
1155	int rc;
1156
1157	pci_vendor_id = pci_get_vendor(dev);
1158	pci_device_id = pci_get_device(dev);
1159
1160	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1161	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1162	if (rc != 0) {
1163		DBGPRINT(dev, "efx_family fail %d", rc);
1164		return (ENXIO);
1165	}
1166
1167	if (family == EFX_FAMILY_SIENA) {
1168		device_set_desc(dev, "Solarflare SFC9000 family");
1169		return (0);
1170	}
1171
1172	if (family == EFX_FAMILY_HUNTINGTON) {
1173		device_set_desc(dev, "Solarflare SFC9100 family");
1174		return (0);
1175	}
1176
1177	DBGPRINT(dev, "impossible controller family %d", family);
1178	return (ENXIO);
1179}
1180
1181static device_method_t sfxge_methods[] = {
1182	DEVMETHOD(device_probe,		sfxge_probe),
1183	DEVMETHOD(device_attach,	sfxge_attach),
1184	DEVMETHOD(device_detach,	sfxge_detach),
1185
1186	DEVMETHOD_END
1187};
1188
1189static devclass_t sfxge_devclass;
1190
1191static driver_t sfxge_driver = {
1192	"sfxge",
1193	sfxge_methods,
1194	sizeof(struct sfxge_softc)
1195};
1196
1197DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1198