sfxge.c revision 302408
1/*-
2 * Copyright (c) 2010-2016 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * This software was developed in part by Philip Paeps under contract for
6 * Solarflare Communications, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 *    this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * The views and conclusions contained in the software and documentation are
30 * those of the authors and should not be interpreted as representing official
31 * policies, either expressed or implied, of the FreeBSD Project.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/dev/sfxge/sfxge.c 301491 2016-06-06 09:05:52Z arybchik $");
36
37#include "opt_rss.h"
38
39#include <sys/param.h>
40#include <sys/kernel.h>
41#include <sys/bus.h>
42#include <sys/rman.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/mutex.h>
46#include <sys/smp.h>
47#include <sys/socket.h>
48#include <sys/taskqueue.h>
49#include <sys/sockio.h>
50#include <sys/sysctl.h>
51#include <sys/priv.h>
52#include <sys/syslog.h>
53
54#include <dev/pci/pcireg.h>
55#include <dev/pci/pcivar.h>
56
57#include <net/ethernet.h>
58#include <net/if.h>
59#include <net/if_var.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#ifdef RSS
64#include <net/rss_config.h>
65#endif
66
67#include "common/efx.h"
68
69#include "sfxge.h"
70#include "sfxge_rx.h"
71#include "sfxge_ioc.h"
72#include "sfxge_version.h"
73
74#define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
75		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
76		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
77		   IFCAP_TSO4 | IFCAP_TSO6 |				\
78		   IFCAP_JUMBO_MTU |					\
79		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS)
80#define	SFXGE_CAP_ENABLE SFXGE_CAP
81#define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
82			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS)
83
84MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
85
86
87SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
88	    "SFXGE driver parameters");
89
90#define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
91static int sfxge_rx_ring_entries = SFXGE_NDESCS;
92TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
93SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
94	   &sfxge_rx_ring_entries, 0,
95	   "Maximum number of descriptors in a receive ring");
96
97#define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
98static int sfxge_tx_ring_entries = SFXGE_NDESCS;
99TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
100SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
101	   &sfxge_tx_ring_entries, 0,
102	   "Maximum number of descriptors in a transmit ring");
103
104#define	SFXGE_PARAM_RESTART_ATTEMPTS	SFXGE_PARAM(restart_attempts)
105static int sfxge_restart_attempts = 3;
106TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts);
107SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN,
108	   &sfxge_restart_attempts, 0,
109	   "Maximum number of attempts to bring interface up after reset");
110
111#if EFSYS_OPT_MCDI_LOGGING
112#define	SFXGE_PARAM_MCDI_LOGGING	SFXGE_PARAM(mcdi_logging)
113static int sfxge_mcdi_logging = 0;
114TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging);
115#endif
116
117static void
118sfxge_reset(void *arg, int npending);
119
120static int
121sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
122{
123	efx_drv_limits_t limits;
124	int rc;
125	unsigned int evq_max;
126	uint32_t evq_allocated;
127	uint32_t rxq_allocated;
128	uint32_t txq_allocated;
129
130	/*
131	 * Limit the number of event queues to:
132	 *  - number of CPUs
133	 *  - hardwire maximum RSS channels
134	 *  - administratively specified maximum RSS channels
135	 */
136#ifdef RSS
137	/*
138	 * Avoid extra limitations so that the number of queues
139	 * may be configured at administrator's will
140	 */
141	evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS);
142#else
143	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
144#endif
145	if (sc->max_rss_channels > 0)
146		evq_max = MIN(evq_max, sc->max_rss_channels);
147
148	memset(&limits, 0, sizeof(limits));
149
150	limits.edl_min_evq_count = 1;
151	limits.edl_max_evq_count = evq_max;
152	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
153	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
154	limits.edl_min_rxq_count = 1;
155	limits.edl_max_rxq_count = evq_max;
156
157	efx_nic_set_drv_limits(sc->enp, &limits);
158
159	if ((rc = efx_nic_init(sc->enp)) != 0)
160		return (rc);
161
162	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
163				 &txq_allocated);
164	if (rc != 0) {
165		efx_nic_fini(sc->enp);
166		return (rc);
167	}
168
169	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
170		("txq_allocated < SFXGE_TXQ_NTYPES"));
171
172	sc->evq_max = MIN(evq_allocated, evq_max);
173	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
174	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
175			  sc->evq_max);
176
177	KASSERT(sc->evq_max <= evq_max,
178		("allocated more than maximum requested"));
179
180#ifdef RSS
181	if (sc->evq_max < rss_getnumbuckets())
182		device_printf(sc->dev, "The number of allocated queues (%u) "
183			      "is less than the number of RSS buckets (%u); "
184			      "performance degradation might be observed",
185			      sc->evq_max, rss_getnumbuckets());
186#endif
187
188	/*
189	 * NIC is kept initialized in the case of success to be able to
190	 * initialize port to find out media types.
191	 */
192	return (0);
193}
194
195static int
196sfxge_set_drv_limits(struct sfxge_softc *sc)
197{
198	efx_drv_limits_t limits;
199
200	memset(&limits, 0, sizeof(limits));
201
202	/* Limits are strict since take into account initial estimation */
203	limits.edl_min_evq_count = limits.edl_max_evq_count =
204	    sc->intr.n_alloc;
205	limits.edl_min_txq_count = limits.edl_max_txq_count =
206	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
207	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
208	    sc->intr.n_alloc;
209
210	return (efx_nic_set_drv_limits(sc->enp, &limits));
211}
212
213static int
214sfxge_start(struct sfxge_softc *sc)
215{
216	int rc;
217
218	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
219
220	if (sc->init_state == SFXGE_STARTED)
221		return (0);
222
223	if (sc->init_state != SFXGE_REGISTERED) {
224		rc = EINVAL;
225		goto fail;
226	}
227
228	/* Set required resource limits */
229	if ((rc = sfxge_set_drv_limits(sc)) != 0)
230		goto fail;
231
232	if ((rc = efx_nic_init(sc->enp)) != 0)
233		goto fail;
234
235	/* Start processing interrupts. */
236	if ((rc = sfxge_intr_start(sc)) != 0)
237		goto fail2;
238
239	/* Start processing events. */
240	if ((rc = sfxge_ev_start(sc)) != 0)
241		goto fail3;
242
243	/* Fire up the port. */
244	if ((rc = sfxge_port_start(sc)) != 0)
245		goto fail4;
246
247	/* Start the receiver side. */
248	if ((rc = sfxge_rx_start(sc)) != 0)
249		goto fail5;
250
251	/* Start the transmitter side. */
252	if ((rc = sfxge_tx_start(sc)) != 0)
253		goto fail6;
254
255	sc->init_state = SFXGE_STARTED;
256
257	/* Tell the stack we're running. */
258	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
259	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
260
261	return (0);
262
263fail6:
264	sfxge_rx_stop(sc);
265
266fail5:
267	sfxge_port_stop(sc);
268
269fail4:
270	sfxge_ev_stop(sc);
271
272fail3:
273	sfxge_intr_stop(sc);
274
275fail2:
276	efx_nic_fini(sc->enp);
277
278fail:
279	device_printf(sc->dev, "sfxge_start: %d\n", rc);
280
281	return (rc);
282}
283
284static void
285sfxge_if_init(void *arg)
286{
287	struct sfxge_softc *sc;
288
289	sc = (struct sfxge_softc *)arg;
290
291	SFXGE_ADAPTER_LOCK(sc);
292	(void)sfxge_start(sc);
293	SFXGE_ADAPTER_UNLOCK(sc);
294}
295
296static void
297sfxge_stop(struct sfxge_softc *sc)
298{
299	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
300
301	if (sc->init_state != SFXGE_STARTED)
302		return;
303
304	sc->init_state = SFXGE_REGISTERED;
305
306	/* Stop the transmitter. */
307	sfxge_tx_stop(sc);
308
309	/* Stop the receiver. */
310	sfxge_rx_stop(sc);
311
312	/* Stop the port. */
313	sfxge_port_stop(sc);
314
315	/* Stop processing events. */
316	sfxge_ev_stop(sc);
317
318	/* Stop processing interrupts. */
319	sfxge_intr_stop(sc);
320
321	efx_nic_fini(sc->enp);
322
323	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
324}
325
326
327static int
328sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
329{
330	efx_vpd_value_t value;
331	int rc = 0;
332
333	switch (ioc->u.vpd.op) {
334	case SFXGE_VPD_OP_GET_KEYWORD:
335		value.evv_tag = ioc->u.vpd.tag;
336		value.evv_keyword = ioc->u.vpd.keyword;
337		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
338		if (rc != 0)
339			break;
340		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
341		if (ioc->u.vpd.payload != 0) {
342			rc = copyout(value.evv_value, ioc->u.vpd.payload,
343				     ioc->u.vpd.len);
344		}
345		break;
346	case SFXGE_VPD_OP_SET_KEYWORD:
347		if (ioc->u.vpd.len > sizeof(value.evv_value))
348			return (EINVAL);
349		value.evv_tag = ioc->u.vpd.tag;
350		value.evv_keyword = ioc->u.vpd.keyword;
351		value.evv_length = ioc->u.vpd.len;
352		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
353		if (rc != 0)
354			break;
355		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
356		if (rc != 0)
357			break;
358		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
359		if (rc != 0)
360			break;
361		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
362		break;
363	default:
364		rc = EOPNOTSUPP;
365		break;
366	}
367
368	return (rc);
369}
370
371static int
372sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
373{
374	switch (ioc->op) {
375	case SFXGE_MCDI_IOC:
376		return (sfxge_mcdi_ioctl(sc, ioc));
377	case SFXGE_NVRAM_IOC:
378		return (sfxge_nvram_ioctl(sc, ioc));
379	case SFXGE_VPD_IOC:
380		return (sfxge_vpd_ioctl(sc, ioc));
381	default:
382		return (EOPNOTSUPP);
383	}
384}
385
386
387static int
388sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
389{
390	struct sfxge_softc *sc;
391	struct ifreq *ifr;
392	sfxge_ioc_t ioc;
393	int error;
394
395	ifr = (struct ifreq *)data;
396	sc = ifp->if_softc;
397	error = 0;
398
399	switch (command) {
400	case SIOCSIFFLAGS:
401		SFXGE_ADAPTER_LOCK(sc);
402		if (ifp->if_flags & IFF_UP) {
403			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
404				if ((ifp->if_flags ^ sc->if_flags) &
405				    (IFF_PROMISC | IFF_ALLMULTI)) {
406					sfxge_mac_filter_set(sc);
407				}
408			} else
409				sfxge_start(sc);
410		} else
411			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
412				sfxge_stop(sc);
413		sc->if_flags = ifp->if_flags;
414		SFXGE_ADAPTER_UNLOCK(sc);
415		break;
416	case SIOCSIFMTU:
417		if (ifr->ifr_mtu == ifp->if_mtu) {
418			/* Nothing to do */
419			error = 0;
420		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
421			error = EINVAL;
422		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
423			ifp->if_mtu = ifr->ifr_mtu;
424			error = 0;
425		} else {
426			/* Restart required */
427			SFXGE_ADAPTER_LOCK(sc);
428			sfxge_stop(sc);
429			ifp->if_mtu = ifr->ifr_mtu;
430			error = sfxge_start(sc);
431			SFXGE_ADAPTER_UNLOCK(sc);
432			if (error != 0) {
433				ifp->if_flags &= ~IFF_UP;
434				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
435				if_down(ifp);
436			}
437		}
438		break;
439	case SIOCADDMULTI:
440	case SIOCDELMULTI:
441		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
442			sfxge_mac_filter_set(sc);
443		break;
444	case SIOCSIFCAP:
445	{
446		int reqcap = ifr->ifr_reqcap;
447		int capchg_mask;
448
449		SFXGE_ADAPTER_LOCK(sc);
450
451		/* Capabilities to be changed in accordance with request */
452		capchg_mask = ifp->if_capenable ^ reqcap;
453
454		/*
455		 * The networking core already rejects attempts to
456		 * enable capabilities we don't have.  We still have
457		 * to reject attempts to disable capabilities that we
458		 * can't (yet) disable.
459		 */
460		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
461		    ("Unsupported capabilities 0x%x requested 0x%x vs "
462		     "supported 0x%x",
463		     reqcap & ~ifp->if_capabilities,
464		     reqcap , ifp->if_capabilities));
465		if (capchg_mask & SFXGE_CAP_FIXED) {
466			error = EINVAL;
467			SFXGE_ADAPTER_UNLOCK(sc);
468			break;
469		}
470
471		/* Check request before any changes */
472		if ((capchg_mask & IFCAP_TSO4) &&
473		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
474			error = EAGAIN;
475			SFXGE_ADAPTER_UNLOCK(sc);
476			if_printf(ifp, "enable txcsum before tso4\n");
477			break;
478		}
479		if ((capchg_mask & IFCAP_TSO6) &&
480		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
481			error = EAGAIN;
482			SFXGE_ADAPTER_UNLOCK(sc);
483			if_printf(ifp, "enable txcsum6 before tso6\n");
484			break;
485		}
486
487		if (reqcap & IFCAP_TXCSUM) {
488			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
489		} else {
490			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
491			if (reqcap & IFCAP_TSO4) {
492				reqcap &= ~IFCAP_TSO4;
493				if_printf(ifp,
494				    "tso4 disabled due to -txcsum\n");
495			}
496		}
497		if (reqcap & IFCAP_TXCSUM_IPV6) {
498			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
499		} else {
500			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
501			if (reqcap & IFCAP_TSO6) {
502				reqcap &= ~IFCAP_TSO6;
503				if_printf(ifp,
504				    "tso6 disabled due to -txcsum6\n");
505			}
506		}
507
508		/*
509		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
510		 * account before using TSO. So, we do not touch
511		 * checksum flags when IFCAP_TSOx is modified.
512		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
513		 * but both bits are set in IPv4 and IPv6 mbufs.
514		 */
515
516		ifp->if_capenable = reqcap;
517
518		SFXGE_ADAPTER_UNLOCK(sc);
519		break;
520	}
521	case SIOCSIFMEDIA:
522	case SIOCGIFMEDIA:
523		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
524		break;
525#ifdef SIOCGI2C
526	case SIOCGI2C:
527	{
528		struct ifi2creq i2c;
529
530		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
531		if (error != 0)
532			break;
533
534		if (i2c.len > sizeof(i2c.data)) {
535			error = EINVAL;
536			break;
537		}
538
539		SFXGE_ADAPTER_LOCK(sc);
540		error = efx_phy_module_get_info(sc->enp, i2c.dev_addr,
541						i2c.offset, i2c.len,
542						&i2c.data[0]);
543		SFXGE_ADAPTER_UNLOCK(sc);
544		if (error == 0)
545			error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
546		break;
547	}
548#endif
549	case SIOCGPRIVATE_0:
550		error = priv_check(curthread, PRIV_DRIVER);
551		if (error != 0)
552			break;
553		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
554		if (error != 0)
555			return (error);
556		error = sfxge_private_ioctl(sc, &ioc);
557		if (error == 0) {
558			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
559		}
560		break;
561	default:
562		error = ether_ioctl(ifp, command, data);
563	}
564
565	return (error);
566}
567
568static void
569sfxge_ifnet_fini(struct ifnet *ifp)
570{
571	struct sfxge_softc *sc = ifp->if_softc;
572
573	SFXGE_ADAPTER_LOCK(sc);
574	sfxge_stop(sc);
575	SFXGE_ADAPTER_UNLOCK(sc);
576
577	ifmedia_removeall(&sc->media);
578	ether_ifdetach(ifp);
579	if_free(ifp);
580}
581
582static int
583sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
584{
585	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
586	device_t dev;
587	int rc;
588
589	dev = sc->dev;
590	sc->ifnet = ifp;
591
592	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
593	ifp->if_init = sfxge_if_init;
594	ifp->if_softc = sc;
595	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
596	ifp->if_ioctl = sfxge_if_ioctl;
597
598	ifp->if_capabilities = SFXGE_CAP;
599	ifp->if_capenable = SFXGE_CAP_ENABLE;
600	ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE;
601	ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG;
602	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
603
604#ifdef SFXGE_LRO
605	ifp->if_capabilities |= IFCAP_LRO;
606	ifp->if_capenable |= IFCAP_LRO;
607#endif
608
609	if (encp->enc_hw_tx_insert_vlan_enabled) {
610		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
611		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
612	}
613	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
614			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
615
616	ether_ifattach(ifp, encp->enc_mac_addr);
617
618	ifp->if_transmit = sfxge_if_transmit;
619	ifp->if_qflush = sfxge_if_qflush;
620
621	ifp->if_get_counter = sfxge_get_counter;
622
623	DBGPRINT(sc->dev, "ifmedia_init");
624	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
625		goto fail;
626
627	return (0);
628
629fail:
630	ether_ifdetach(sc->ifnet);
631	return (rc);
632}
633
634void
635sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
636{
637	KASSERT(sc->buffer_table_next + n <=
638		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
639		("buffer table full"));
640
641	*idp = sc->buffer_table_next;
642	sc->buffer_table_next += n;
643}
644
645static int
646sfxge_bar_init(struct sfxge_softc *sc)
647{
648	efsys_bar_t *esbp = &sc->bar;
649
650	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
651	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
652	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
653		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
654		    EFX_MEM_BAR);
655		return (ENXIO);
656	}
657	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
658	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
659
660	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
661
662	return (0);
663}
664
665static void
666sfxge_bar_fini(struct sfxge_softc *sc)
667{
668	efsys_bar_t *esbp = &sc->bar;
669
670	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
671	    esbp->esb_res);
672	SFXGE_BAR_LOCK_DESTROY(esbp);
673}
674
675static int
676sfxge_create(struct sfxge_softc *sc)
677{
678	device_t dev;
679	efx_nic_t *enp;
680	int error;
681	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
682#if EFSYS_OPT_MCDI_LOGGING
683	char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))];
684#endif
685
686	dev = sc->dev;
687
688	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
689
690	sc->max_rss_channels = 0;
691	snprintf(rss_param_name, sizeof(rss_param_name),
692		 SFXGE_PARAM(%d.max_rss_channels),
693		 (int)device_get_unit(dev));
694	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
695#if EFSYS_OPT_MCDI_LOGGING
696	sc->mcdi_logging = sfxge_mcdi_logging;
697	snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name),
698		 SFXGE_PARAM(%d.mcdi_logging),
699		 (int)device_get_unit(dev));
700	TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging);
701#endif
702
703	sc->stats_node = SYSCTL_ADD_NODE(
704		device_get_sysctl_ctx(dev),
705		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
706		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
707	if (sc->stats_node == NULL) {
708		error = ENOMEM;
709		goto fail;
710	}
711
712	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
713
714	(void) pci_enable_busmaster(dev);
715
716	/* Initialize DMA mappings. */
717	DBGPRINT(sc->dev, "dma_init...");
718	if ((error = sfxge_dma_init(sc)) != 0)
719		goto fail;
720
721	/* Map the device registers. */
722	DBGPRINT(sc->dev, "bar_init...");
723	if ((error = sfxge_bar_init(sc)) != 0)
724		goto fail;
725
726	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
727	    &sc->family);
728	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
729
730	DBGPRINT(sc->dev, "nic_create...");
731
732	/* Create the common code nic object. */
733	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
734			      device_get_nameunit(sc->dev), "nic");
735	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
736	    &sc->bar, &sc->enp_lock, &enp)) != 0)
737		goto fail3;
738	sc->enp = enp;
739
740	if (!ISP2(sfxge_rx_ring_entries) ||
741	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
742	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
743		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
744		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
745		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
746		error = EINVAL;
747		goto fail_rx_ring_entries;
748	}
749	sc->rxq_entries = sfxge_rx_ring_entries;
750
751	if (!ISP2(sfxge_tx_ring_entries) ||
752	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
753	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
754		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
755		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
756		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
757		error = EINVAL;
758		goto fail_tx_ring_entries;
759	}
760	sc->txq_entries = sfxge_tx_ring_entries;
761
762	/* Initialize MCDI to talk to the microcontroller. */
763	DBGPRINT(sc->dev, "mcdi_init...");
764	if ((error = sfxge_mcdi_init(sc)) != 0)
765		goto fail4;
766
767	/* Probe the NIC and build the configuration data area. */
768	DBGPRINT(sc->dev, "nic_probe...");
769	if ((error = efx_nic_probe(enp)) != 0)
770		goto fail5;
771
772	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
773			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
774			  OID_AUTO, "version", CTLFLAG_RD,
775			  SFXGE_VERSION_STRING, 0,
776			  "Driver version");
777
778	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
779			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
780			OID_AUTO, "phy_type", CTLFLAG_RD,
781			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
782			"PHY type");
783
784	/* Initialize the NVRAM. */
785	DBGPRINT(sc->dev, "nvram_init...");
786	if ((error = efx_nvram_init(enp)) != 0)
787		goto fail6;
788
789	/* Initialize the VPD. */
790	DBGPRINT(sc->dev, "vpd_init...");
791	if ((error = efx_vpd_init(enp)) != 0)
792		goto fail7;
793
794	efx_mcdi_new_epoch(enp);
795
796	/* Reset the NIC. */
797	DBGPRINT(sc->dev, "nic_reset...");
798	if ((error = efx_nic_reset(enp)) != 0)
799		goto fail8;
800
801	/* Initialize buffer table allocation. */
802	sc->buffer_table_next = 0;
803
804	/*
805	 * Guarantee minimum and estimate maximum number of event queues
806	 * to take it into account when MSI-X interrupts are allocated.
807	 * It initializes NIC and keeps it initialized on success.
808	 */
809	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
810		goto fail8;
811
812	/* Set up interrupts. */
813	DBGPRINT(sc->dev, "intr_init...");
814	if ((error = sfxge_intr_init(sc)) != 0)
815		goto fail9;
816
817	/* Initialize event processing state. */
818	DBGPRINT(sc->dev, "ev_init...");
819	if ((error = sfxge_ev_init(sc)) != 0)
820		goto fail11;
821
822	/* Initialize port state. */
823	DBGPRINT(sc->dev, "port_init...");
824	if ((error = sfxge_port_init(sc)) != 0)
825		goto fail12;
826
827	/* Initialize receive state. */
828	DBGPRINT(sc->dev, "rx_init...");
829	if ((error = sfxge_rx_init(sc)) != 0)
830		goto fail13;
831
832	/* Initialize transmit state. */
833	DBGPRINT(sc->dev, "tx_init...");
834	if ((error = sfxge_tx_init(sc)) != 0)
835		goto fail14;
836
837	sc->init_state = SFXGE_INITIALIZED;
838
839	DBGPRINT(sc->dev, "success");
840	return (0);
841
842fail14:
843	sfxge_rx_fini(sc);
844
845fail13:
846	sfxge_port_fini(sc);
847
848fail12:
849	sfxge_ev_fini(sc);
850
851fail11:
852	sfxge_intr_fini(sc);
853
854fail9:
855	efx_nic_fini(sc->enp);
856
857fail8:
858	efx_vpd_fini(enp);
859
860fail7:
861	efx_nvram_fini(enp);
862
863fail6:
864	efx_nic_unprobe(enp);
865
866fail5:
867	sfxge_mcdi_fini(sc);
868
869fail4:
870fail_tx_ring_entries:
871fail_rx_ring_entries:
872	sc->enp = NULL;
873	efx_nic_destroy(enp);
874	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
875
876fail3:
877	sfxge_bar_fini(sc);
878	(void) pci_disable_busmaster(sc->dev);
879
880fail:
881	DBGPRINT(sc->dev, "failed %d", error);
882	sc->dev = NULL;
883	SFXGE_ADAPTER_LOCK_DESTROY(sc);
884	return (error);
885}
886
887static void
888sfxge_destroy(struct sfxge_softc *sc)
889{
890	efx_nic_t *enp;
891
892	/* Clean up transmit state. */
893	sfxge_tx_fini(sc);
894
895	/* Clean up receive state. */
896	sfxge_rx_fini(sc);
897
898	/* Clean up port state. */
899	sfxge_port_fini(sc);
900
901	/* Clean up event processing state. */
902	sfxge_ev_fini(sc);
903
904	/* Clean up interrupts. */
905	sfxge_intr_fini(sc);
906
907	/* Tear down common code subsystems. */
908	efx_nic_reset(sc->enp);
909	efx_vpd_fini(sc->enp);
910	efx_nvram_fini(sc->enp);
911	efx_nic_unprobe(sc->enp);
912
913	/* Tear down MCDI. */
914	sfxge_mcdi_fini(sc);
915
916	/* Destroy common code context. */
917	enp = sc->enp;
918	sc->enp = NULL;
919	efx_nic_destroy(enp);
920
921	/* Free DMA memory. */
922	sfxge_dma_fini(sc);
923
924	/* Free mapped BARs. */
925	sfxge_bar_fini(sc);
926
927	(void) pci_disable_busmaster(sc->dev);
928
929	taskqueue_drain(taskqueue_thread, &sc->task_reset);
930
931	/* Destroy the softc lock. */
932	SFXGE_ADAPTER_LOCK_DESTROY(sc);
933}
934
935static int
936sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
937{
938	struct sfxge_softc *sc = arg1;
939	efx_vpd_value_t value;
940	int rc;
941
942	value.evv_tag = arg2 >> 16;
943	value.evv_keyword = arg2 & 0xffff;
944	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
945	    != 0)
946		return (rc);
947
948	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
949}
950
951static void
952sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
953		  efx_vpd_tag_t tag, const char *keyword)
954{
955	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
956	efx_vpd_value_t value;
957
958	/* Check whether VPD tag/keyword is present */
959	value.evv_tag = tag;
960	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
961	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
962		return;
963
964	SYSCTL_ADD_PROC(
965		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
966		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
967		sfxge_vpd_handler, "A", "");
968}
969
970static int
971sfxge_vpd_init(struct sfxge_softc *sc)
972{
973	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
974	struct sysctl_oid *vpd_node;
975	struct sysctl_oid_list *vpd_list;
976	char keyword[3];
977	efx_vpd_value_t value;
978	int rc;
979
980	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) {
981		/*
982		 * Unpriviledged functions deny VPD access.
983		 * Simply skip VPD in this case.
984		 */
985		if (rc == EACCES)
986			goto done;
987		goto fail;
988	}
989	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
990	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
991		goto fail2;
992
993	/* Copy ID (product name) into device description, and log it. */
994	value.evv_tag = EFX_VPD_ID;
995	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
996		value.evv_value[value.evv_length] = 0;
997		device_set_desc_copy(sc->dev, value.evv_value);
998		device_printf(sc->dev, "%s\n", value.evv_value);
999	}
1000
1001	vpd_node = SYSCTL_ADD_NODE(
1002		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
1003		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
1004	vpd_list = SYSCTL_CHILDREN(vpd_node);
1005
1006	/* Add sysctls for all expected and any vendor-defined keywords. */
1007	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
1008	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
1009	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
1010	keyword[0] = 'V';
1011	keyword[2] = 0;
1012	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
1013		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1014	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
1015		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
1016
1017done:
1018	return (0);
1019
1020fail2:
1021	free(sc->vpd_data, M_SFXGE);
1022fail:
1023	return (rc);
1024}
1025
1026static void
1027sfxge_vpd_fini(struct sfxge_softc *sc)
1028{
1029	free(sc->vpd_data, M_SFXGE);
1030}
1031
1032static void
1033sfxge_reset(void *arg, int npending)
1034{
1035	struct sfxge_softc *sc;
1036	int rc;
1037	unsigned attempt;
1038
1039	(void)npending;
1040
1041	sc = (struct sfxge_softc *)arg;
1042
1043	SFXGE_ADAPTER_LOCK(sc);
1044
1045	if (sc->init_state != SFXGE_STARTED)
1046		goto done;
1047
1048	sfxge_stop(sc);
1049	efx_nic_reset(sc->enp);
1050	for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) {
1051		if ((rc = sfxge_start(sc)) == 0)
1052			goto done;
1053
1054		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
1055		DELAY(100000);
1056	}
1057
1058	device_printf(sc->dev, "reset failed; interface is now stopped\n");
1059
1060done:
1061	SFXGE_ADAPTER_UNLOCK(sc);
1062}
1063
1064void
1065sfxge_schedule_reset(struct sfxge_softc *sc)
1066{
1067	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
1068}
1069
1070static int
1071sfxge_attach(device_t dev)
1072{
1073	struct sfxge_softc *sc;
1074	struct ifnet *ifp;
1075	int error;
1076
1077	sc = device_get_softc(dev);
1078	sc->dev = dev;
1079
1080	/* Allocate ifnet. */
1081	ifp = if_alloc(IFT_ETHER);
1082	if (ifp == NULL) {
1083		device_printf(dev, "Couldn't allocate ifnet\n");
1084		error = ENOMEM;
1085		goto fail;
1086	}
1087	sc->ifnet = ifp;
1088
1089	/* Initialize hardware. */
1090	DBGPRINT(sc->dev, "create nic");
1091	if ((error = sfxge_create(sc)) != 0)
1092		goto fail2;
1093
1094	/* Create the ifnet for the port. */
1095	DBGPRINT(sc->dev, "init ifnet");
1096	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1097		goto fail3;
1098
1099	DBGPRINT(sc->dev, "init vpd");
1100	if ((error = sfxge_vpd_init(sc)) != 0)
1101		goto fail4;
1102
1103	/*
1104	 * NIC is initialized inside sfxge_create() and kept inialized
1105	 * to be able to initialize port to discover media types in
1106	 * sfxge_ifnet_init().
1107	 */
1108	efx_nic_fini(sc->enp);
1109
1110	sc->init_state = SFXGE_REGISTERED;
1111
1112	DBGPRINT(sc->dev, "success");
1113	return (0);
1114
1115fail4:
1116	sfxge_ifnet_fini(ifp);
1117fail3:
1118	efx_nic_fini(sc->enp);
1119	sfxge_destroy(sc);
1120
1121fail2:
1122	if_free(sc->ifnet);
1123
1124fail:
1125	DBGPRINT(sc->dev, "failed %d", error);
1126	return (error);
1127}
1128
1129static int
1130sfxge_detach(device_t dev)
1131{
1132	struct sfxge_softc *sc;
1133
1134	sc = device_get_softc(dev);
1135
1136	sfxge_vpd_fini(sc);
1137
1138	/* Destroy the ifnet. */
1139	sfxge_ifnet_fini(sc->ifnet);
1140
1141	/* Tear down hardware. */
1142	sfxge_destroy(sc);
1143
1144	return (0);
1145}
1146
1147static int
1148sfxge_probe(device_t dev)
1149{
1150	uint16_t pci_vendor_id;
1151	uint16_t pci_device_id;
1152	efx_family_t family;
1153	int rc;
1154
1155	pci_vendor_id = pci_get_vendor(dev);
1156	pci_device_id = pci_get_device(dev);
1157
1158	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1159	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1160	if (rc != 0) {
1161		DBGPRINT(dev, "efx_family fail %d", rc);
1162		return (ENXIO);
1163	}
1164
1165	if (family == EFX_FAMILY_SIENA) {
1166		device_set_desc(dev, "Solarflare SFC9000 family");
1167		return (0);
1168	}
1169
1170	if (family == EFX_FAMILY_HUNTINGTON) {
1171		device_set_desc(dev, "Solarflare SFC9100 family");
1172		return (0);
1173	}
1174
1175	if (family == EFX_FAMILY_MEDFORD) {
1176		device_set_desc(dev, "Solarflare SFC9200 family");
1177		return (0);
1178	}
1179
1180	DBGPRINT(dev, "impossible controller family %d", family);
1181	return (ENXIO);
1182}
1183
1184static device_method_t sfxge_methods[] = {
1185	DEVMETHOD(device_probe,		sfxge_probe),
1186	DEVMETHOD(device_attach,	sfxge_attach),
1187	DEVMETHOD(device_detach,	sfxge_detach),
1188
1189	DEVMETHOD_END
1190};
1191
1192static devclass_t sfxge_devclass;
1193
1194static driver_t sfxge_driver = {
1195	"sfxge",
1196	sfxge_methods,
1197	sizeof(struct sfxge_softc)
1198};
1199
1200DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1201