if_re.c revision 217868
1/*-
2 * Copyright (c) 1997, 1998-2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/dev/re/if_re.c 217868 2011-01-25 23:27:28Z yongari $");
35
36/*
37 * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38 *
39 * Written by Bill Paul <wpaul@windriver.com>
40 * Senior Networking Software Engineer
41 * Wind River Systems
42 */
43
44/*
45 * This driver is designed to support RealTek's next generation of
46 * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47 * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48 * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49 *
50 * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51 * with the older 8139 family, however it also supports a special
52 * C+ mode of operation that provides several new performance enhancing
53 * features. These include:
54 *
55 *	o Descriptor based DMA mechanism. Each descriptor represents
56 *	  a single packet fragment. Data buffers may be aligned on
57 *	  any byte boundary.
58 *
59 *	o 64-bit DMA
60 *
61 *	o TCP/IP checksum offload for both RX and TX
62 *
63 *	o High and normal priority transmit DMA rings
64 *
65 *	o VLAN tag insertion and extraction
66 *
67 *	o TCP large send (segmentation offload)
68 *
69 * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70 * programming API is fairly straightforward. The RX filtering, EEPROM
71 * access and PHY access is the same as it is on the older 8139 series
72 * chips.
73 *
74 * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75 * same programming API and feature set as the 8139C+ with the following
76 * differences and additions:
77 *
78 *	o 1000Mbps mode
79 *
80 *	o Jumbo frames
81 *
82 *	o GMII and TBI ports/registers for interfacing with copper
83 *	  or fiber PHYs
84 *
85 *	o RX and TX DMA rings can have up to 1024 descriptors
86 *	  (the 8139C+ allows a maximum of 64)
87 *
88 *	o Slight differences in register layout from the 8139C+
89 *
90 * The TX start and timer interrupt registers are at different locations
91 * on the 8169 than they are on the 8139C+. Also, the status word in the
92 * RX descriptor has a slightly different bit layout. The 8169 does not
93 * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94 * copper gigE PHY.
95 *
96 * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97 * (the 'S' stands for 'single-chip'). These devices have the same
98 * programming API as the older 8169, but also have some vendor-specific
99 * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100 * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101 *
102 * This driver takes advantage of the RX and TX checksum offload and
103 * VLAN tag insertion/extraction features. It also implements TX
104 * interrupt moderation using the timer interrupt registers, which
105 * significantly reduces TX interrupt load. There is also support
106 * for jumbo frames, however the 8169/8169S/8110S can not transmit
107 * jumbo frames larger than 7440, so the max MTU possible with this
108 * driver is 7422 bytes.
109 */
110
111#ifdef HAVE_KERNEL_OPTION_HEADERS
112#include "opt_device_polling.h"
113#endif
114
115#include <sys/param.h>
116#include <sys/endian.h>
117#include <sys/systm.h>
118#include <sys/sockio.h>
119#include <sys/mbuf.h>
120#include <sys/malloc.h>
121#include <sys/module.h>
122#include <sys/kernel.h>
123#include <sys/socket.h>
124#include <sys/lock.h>
125#include <sys/mutex.h>
126#include <sys/sysctl.h>
127#include <sys/taskqueue.h>
128
129#include <net/if.h>
130#include <net/if_arp.h>
131#include <net/ethernet.h>
132#include <net/if_dl.h>
133#include <net/if_media.h>
134#include <net/if_types.h>
135#include <net/if_vlan_var.h>
136
137#include <net/bpf.h>
138
139#include <machine/bus.h>
140#include <machine/resource.h>
141#include <sys/bus.h>
142#include <sys/rman.h>
143
144#include <dev/mii/mii.h>
145#include <dev/mii/miivar.h>
146
147#include <dev/pci/pcireg.h>
148#include <dev/pci/pcivar.h>
149
150#include <pci/if_rlreg.h>
151
152MODULE_DEPEND(re, pci, 1, 1, 1);
153MODULE_DEPEND(re, ether, 1, 1, 1);
154MODULE_DEPEND(re, miibus, 1, 1, 1);
155
156/* "device miibus" required.  See GENERIC if you get errors here. */
157#include "miibus_if.h"
158
159/* Tunables. */
160static int msi_disable = 0;
161TUNABLE_INT("hw.re.msi_disable", &msi_disable);
162static int msix_disable = 0;
163TUNABLE_INT("hw.re.msix_disable", &msix_disable);
164static int prefer_iomap = 0;
165TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
166
167#define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
168
169/*
170 * Various supported device vendors/types and their names.
171 */
172static struct rl_type re_devs[] = {
173	{ DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
174	    "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
175	{ RT_VENDORID, RT_DEVICEID_8139, 0,
176	    "RealTek 8139C+ 10/100BaseTX" },
177	{ RT_VENDORID, RT_DEVICEID_8101E, 0,
178	    "RealTek 8101E/8102E/8102EL/8103E PCIe 10/100baseTX" },
179	{ RT_VENDORID, RT_DEVICEID_8168, 0,
180	    "RealTek 8168/8111 B/C/CP/D/DP/E PCIe Gigabit Ethernet" },
181	{ RT_VENDORID, RT_DEVICEID_8169, 0,
182	    "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
183	{ RT_VENDORID, RT_DEVICEID_8169SC, 0,
184	    "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
185	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
186	    "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
187	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
188	    "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
189	{ USR_VENDORID, USR_DEVICEID_997902, 0,
190	    "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
191};
192
193static struct rl_hwrev re_hwrevs[] = {
194	{ RL_HWREV_8139, RL_8139,  "", RL_MTU },
195	{ RL_HWREV_8139A, RL_8139, "A", RL_MTU },
196	{ RL_HWREV_8139AG, RL_8139, "A-G", RL_MTU },
197	{ RL_HWREV_8139B, RL_8139, "B", RL_MTU },
198	{ RL_HWREV_8130, RL_8139, "8130", RL_MTU },
199	{ RL_HWREV_8139C, RL_8139, "C", RL_MTU },
200	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C", RL_MTU },
201	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+", RL_MTU },
202	{ RL_HWREV_8168B_SPIN1, RL_8169, "8168", RL_JUMBO_MTU },
203	{ RL_HWREV_8169, RL_8169, "8169", RL_JUMBO_MTU },
204	{ RL_HWREV_8169S, RL_8169, "8169S", RL_JUMBO_MTU },
205	{ RL_HWREV_8110S, RL_8169, "8110S", RL_JUMBO_MTU },
206	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB", RL_JUMBO_MTU },
207	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
208	{ RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL", RL_JUMBO_MTU },
209	{ RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC", RL_JUMBO_MTU },
210	{ RL_HWREV_8100, RL_8139, "8100", RL_MTU },
211	{ RL_HWREV_8101, RL_8139, "8101", RL_MTU },
212	{ RL_HWREV_8100E, RL_8169, "8100E", RL_MTU },
213	{ RL_HWREV_8101E, RL_8169, "8101E", RL_MTU },
214	{ RL_HWREV_8102E, RL_8169, "8102E", RL_MTU },
215	{ RL_HWREV_8102EL, RL_8169, "8102EL", RL_MTU },
216	{ RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL", RL_MTU },
217	{ RL_HWREV_8103E, RL_8169, "8103E", RL_MTU },
218	{ RL_HWREV_8168B_SPIN2, RL_8169, "8168", RL_JUMBO_MTU },
219	{ RL_HWREV_8168B_SPIN3, RL_8169, "8168", RL_JUMBO_MTU },
220	{ RL_HWREV_8168C, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
221	{ RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C", RL_JUMBO_MTU_6K },
222	{ RL_HWREV_8168CP, RL_8169, "8168CP/8111CP", RL_JUMBO_MTU_6K },
223	{ RL_HWREV_8168D, RL_8169, "8168D/8111D", RL_JUMBO_MTU_9K },
224	{ RL_HWREV_8168DP, RL_8169, "8168DP/8111DP", RL_JUMBO_MTU_9K },
225	{ RL_HWREV_8168E, RL_8169, "8168E/8111E", RL_JUMBO_MTU_9K},
226	{ RL_HWREV_8168E_VL, RL_8169, "8168E/8111E-VL", RL_JUMBO_MTU_6K},
227	{ 0, 0, NULL, 0 }
228};
229
230static int re_probe		(device_t);
231static int re_attach		(device_t);
232static int re_detach		(device_t);
233
234static int re_encap		(struct rl_softc *, struct mbuf **);
235
236static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
237static int re_allocmem		(device_t, struct rl_softc *);
238static __inline void re_discard_rxbuf
239				(struct rl_softc *, int);
240static int re_newbuf		(struct rl_softc *, int);
241static int re_jumbo_newbuf	(struct rl_softc *, int);
242static int re_rx_list_init	(struct rl_softc *);
243static int re_jrx_list_init	(struct rl_softc *);
244static int re_tx_list_init	(struct rl_softc *);
245#ifdef RE_FIXUP_RX
246static __inline void re_fixup_rx
247				(struct mbuf *);
248#endif
249static int re_rxeof		(struct rl_softc *, int *);
250static void re_txeof		(struct rl_softc *);
251#ifdef DEVICE_POLLING
252static int re_poll		(struct ifnet *, enum poll_cmd, int);
253static int re_poll_locked	(struct ifnet *, enum poll_cmd, int);
254#endif
255static int re_intr		(void *);
256static void re_tick		(void *);
257static void re_int_task		(void *, int);
258static void re_start		(struct ifnet *);
259static void re_start_locked	(struct ifnet *);
260static int re_ioctl		(struct ifnet *, u_long, caddr_t);
261static void re_init		(void *);
262static void re_init_locked	(struct rl_softc *);
263static void re_stop		(struct rl_softc *);
264static void re_watchdog		(struct rl_softc *);
265static int re_suspend		(device_t);
266static int re_resume		(device_t);
267static int re_shutdown		(device_t);
268static int re_ifmedia_upd	(struct ifnet *);
269static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
270
271static void re_eeprom_putbyte	(struct rl_softc *, int);
272static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
273static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
274static int re_gmii_readreg	(device_t, int, int);
275static int re_gmii_writereg	(device_t, int, int, int);
276
277static int re_miibus_readreg	(device_t, int, int);
278static int re_miibus_writereg	(device_t, int, int, int);
279static void re_miibus_statchg	(device_t);
280
281static void re_set_jumbo	(struct rl_softc *, int);
282static void re_set_rxmode		(struct rl_softc *);
283static void re_reset		(struct rl_softc *);
284static void re_setwol		(struct rl_softc *);
285static void re_clrwol		(struct rl_softc *);
286
287#ifdef RE_DIAG
288static int re_diag		(struct rl_softc *);
289#endif
290
291static void re_add_sysctls	(struct rl_softc *);
292static int re_sysctl_stats	(SYSCTL_HANDLER_ARGS);
293
294static device_method_t re_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe,		re_probe),
297	DEVMETHOD(device_attach,	re_attach),
298	DEVMETHOD(device_detach,	re_detach),
299	DEVMETHOD(device_suspend,	re_suspend),
300	DEVMETHOD(device_resume,	re_resume),
301	DEVMETHOD(device_shutdown,	re_shutdown),
302
303	/* bus interface */
304	DEVMETHOD(bus_print_child,	bus_generic_print_child),
305	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
306
307	/* MII interface */
308	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
309	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
310	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
311
312	{ 0, 0 }
313};
314
315static driver_t re_driver = {
316	"re",
317	re_methods,
318	sizeof(struct rl_softc)
319};
320
321static devclass_t re_devclass;
322
323DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
324DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
325
326#define EE_SET(x)					\
327	CSR_WRITE_1(sc, RL_EECMD,			\
328		CSR_READ_1(sc, RL_EECMD) | x)
329
330#define EE_CLR(x)					\
331	CSR_WRITE_1(sc, RL_EECMD,			\
332		CSR_READ_1(sc, RL_EECMD) & ~x)
333
334/*
335 * Send a read command and address to the EEPROM, check for ACK.
336 */
337static void
338re_eeprom_putbyte(struct rl_softc *sc, int addr)
339{
340	int			d, i;
341
342	d = addr | (RL_9346_READ << sc->rl_eewidth);
343
344	/*
345	 * Feed in each bit and strobe the clock.
346	 */
347
348	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
349		if (d & i) {
350			EE_SET(RL_EE_DATAIN);
351		} else {
352			EE_CLR(RL_EE_DATAIN);
353		}
354		DELAY(100);
355		EE_SET(RL_EE_CLK);
356		DELAY(150);
357		EE_CLR(RL_EE_CLK);
358		DELAY(100);
359	}
360}
361
362/*
363 * Read a word of data stored in the EEPROM at address 'addr.'
364 */
365static void
366re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
367{
368	int			i;
369	u_int16_t		word = 0;
370
371	/*
372	 * Send address of word we want to read.
373	 */
374	re_eeprom_putbyte(sc, addr);
375
376	/*
377	 * Start reading bits from EEPROM.
378	 */
379	for (i = 0x8000; i; i >>= 1) {
380		EE_SET(RL_EE_CLK);
381		DELAY(100);
382		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
383			word |= i;
384		EE_CLR(RL_EE_CLK);
385		DELAY(100);
386	}
387
388	*dest = word;
389}
390
391/*
392 * Read a sequence of words from the EEPROM.
393 */
394static void
395re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
396{
397	int			i;
398	u_int16_t		word = 0, *ptr;
399
400	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
401
402        DELAY(100);
403
404	for (i = 0; i < cnt; i++) {
405		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
406		re_eeprom_getword(sc, off + i, &word);
407		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
408		ptr = (u_int16_t *)(dest + (i * 2));
409                *ptr = word;
410	}
411
412	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
413}
414
415static int
416re_gmii_readreg(device_t dev, int phy, int reg)
417{
418	struct rl_softc		*sc;
419	u_int32_t		rval;
420	int			i;
421
422	sc = device_get_softc(dev);
423
424	/* Let the rgephy driver read the GMEDIASTAT register */
425
426	if (reg == RL_GMEDIASTAT) {
427		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
428		return (rval);
429	}
430
431	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
432
433	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
434		rval = CSR_READ_4(sc, RL_PHYAR);
435		if (rval & RL_PHYAR_BUSY)
436			break;
437		DELAY(25);
438	}
439
440	if (i == RL_PHY_TIMEOUT) {
441		device_printf(sc->rl_dev, "PHY read failed\n");
442		return (0);
443	}
444
445	/*
446	 * Controller requires a 20us delay to process next MDIO request.
447	 */
448	DELAY(20);
449
450	return (rval & RL_PHYAR_PHYDATA);
451}
452
453static int
454re_gmii_writereg(device_t dev, int phy, int reg, int data)
455{
456	struct rl_softc		*sc;
457	u_int32_t		rval;
458	int			i;
459
460	sc = device_get_softc(dev);
461
462	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
463	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
464
465	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
466		rval = CSR_READ_4(sc, RL_PHYAR);
467		if (!(rval & RL_PHYAR_BUSY))
468			break;
469		DELAY(25);
470	}
471
472	if (i == RL_PHY_TIMEOUT) {
473		device_printf(sc->rl_dev, "PHY write failed\n");
474		return (0);
475	}
476
477	/*
478	 * Controller requires a 20us delay to process next MDIO request.
479	 */
480	DELAY(20);
481
482	return (0);
483}
484
485static int
486re_miibus_readreg(device_t dev, int phy, int reg)
487{
488	struct rl_softc		*sc;
489	u_int16_t		rval = 0;
490	u_int16_t		re8139_reg = 0;
491
492	sc = device_get_softc(dev);
493
494	if (sc->rl_type == RL_8169) {
495		rval = re_gmii_readreg(dev, phy, reg);
496		return (rval);
497	}
498
499	switch (reg) {
500	case MII_BMCR:
501		re8139_reg = RL_BMCR;
502		break;
503	case MII_BMSR:
504		re8139_reg = RL_BMSR;
505		break;
506	case MII_ANAR:
507		re8139_reg = RL_ANAR;
508		break;
509	case MII_ANER:
510		re8139_reg = RL_ANER;
511		break;
512	case MII_ANLPAR:
513		re8139_reg = RL_LPAR;
514		break;
515	case MII_PHYIDR1:
516	case MII_PHYIDR2:
517		return (0);
518	/*
519	 * Allow the rlphy driver to read the media status
520	 * register. If we have a link partner which does not
521	 * support NWAY, this is the register which will tell
522	 * us the results of parallel detection.
523	 */
524	case RL_MEDIASTAT:
525		rval = CSR_READ_1(sc, RL_MEDIASTAT);
526		return (rval);
527	default:
528		device_printf(sc->rl_dev, "bad phy register\n");
529		return (0);
530	}
531	rval = CSR_READ_2(sc, re8139_reg);
532	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
533		/* 8139C+ has different bit layout. */
534		rval &= ~(BMCR_LOOP | BMCR_ISO);
535	}
536	return (rval);
537}
538
539static int
540re_miibus_writereg(device_t dev, int phy, int reg, int data)
541{
542	struct rl_softc		*sc;
543	u_int16_t		re8139_reg = 0;
544	int			rval = 0;
545
546	sc = device_get_softc(dev);
547
548	if (sc->rl_type == RL_8169) {
549		rval = re_gmii_writereg(dev, phy, reg, data);
550		return (rval);
551	}
552
553	switch (reg) {
554	case MII_BMCR:
555		re8139_reg = RL_BMCR;
556		if (sc->rl_type == RL_8139CPLUS) {
557			/* 8139C+ has different bit layout. */
558			data &= ~(BMCR_LOOP | BMCR_ISO);
559		}
560		break;
561	case MII_BMSR:
562		re8139_reg = RL_BMSR;
563		break;
564	case MII_ANAR:
565		re8139_reg = RL_ANAR;
566		break;
567	case MII_ANER:
568		re8139_reg = RL_ANER;
569		break;
570	case MII_ANLPAR:
571		re8139_reg = RL_LPAR;
572		break;
573	case MII_PHYIDR1:
574	case MII_PHYIDR2:
575		return (0);
576		break;
577	default:
578		device_printf(sc->rl_dev, "bad phy register\n");
579		return (0);
580	}
581	CSR_WRITE_2(sc, re8139_reg, data);
582	return (0);
583}
584
585static void
586re_miibus_statchg(device_t dev)
587{
588	struct rl_softc		*sc;
589	struct ifnet		*ifp;
590	struct mii_data		*mii;
591
592	sc = device_get_softc(dev);
593	mii = device_get_softc(sc->rl_miibus);
594	ifp = sc->rl_ifp;
595	if (mii == NULL || ifp == NULL ||
596	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
597		return;
598
599	sc->rl_flags &= ~RL_FLAG_LINK;
600	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
601	    (IFM_ACTIVE | IFM_AVALID)) {
602		switch (IFM_SUBTYPE(mii->mii_media_active)) {
603		case IFM_10_T:
604		case IFM_100_TX:
605			sc->rl_flags |= RL_FLAG_LINK;
606			break;
607		case IFM_1000_T:
608			if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
609				break;
610			sc->rl_flags |= RL_FLAG_LINK;
611			break;
612		default:
613			break;
614		}
615	}
616	/*
617	 * RealTek controllers does not provide any interface to
618	 * Tx/Rx MACs for resolved speed, duplex and flow-control
619	 * parameters.
620	 */
621}
622
623/*
624 * Set the RX configuration and 64-bit multicast hash filter.
625 */
626static void
627re_set_rxmode(struct rl_softc *sc)
628{
629	struct ifnet		*ifp;
630	struct ifmultiaddr	*ifma;
631	uint32_t		hashes[2] = { 0, 0 };
632	uint32_t		h, rxfilt;
633
634	RL_LOCK_ASSERT(sc);
635
636	ifp = sc->rl_ifp;
637
638	rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
639
640	if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
641		if (ifp->if_flags & IFF_PROMISC)
642			rxfilt |= RL_RXCFG_RX_ALLPHYS;
643		/*
644		 * Unlike other hardwares, we have to explicitly set
645		 * RL_RXCFG_RX_MULTI to receive multicast frames in
646		 * promiscuous mode.
647		 */
648		rxfilt |= RL_RXCFG_RX_MULTI;
649		hashes[0] = hashes[1] = 0xffffffff;
650		goto done;
651	}
652
653	if_maddr_rlock(ifp);
654	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
655		if (ifma->ifma_addr->sa_family != AF_LINK)
656			continue;
657		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
658		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
659		if (h < 32)
660			hashes[0] |= (1 << h);
661		else
662			hashes[1] |= (1 << (h - 32));
663	}
664	if_maddr_runlock(ifp);
665
666	if (hashes[0] != 0 || hashes[1] != 0) {
667		/*
668		 * For some unfathomable reason, RealTek decided to
669		 * reverse the order of the multicast hash registers
670		 * in the PCI Express parts.  This means we have to
671		 * write the hash pattern in reverse order for those
672		 * devices.
673		 */
674		if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
675			h = bswap32(hashes[0]);
676			hashes[0] = bswap32(hashes[1]);
677			hashes[1] = h;
678		}
679		rxfilt |= RL_RXCFG_RX_MULTI;
680	}
681
682done:
683	CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
684	CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
685	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
686}
687
688static void
689re_reset(struct rl_softc *sc)
690{
691	int			i;
692
693	RL_LOCK_ASSERT(sc);
694
695	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
696
697	for (i = 0; i < RL_TIMEOUT; i++) {
698		DELAY(10);
699		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
700			break;
701	}
702	if (i == RL_TIMEOUT)
703		device_printf(sc->rl_dev, "reset never completed!\n");
704
705	if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
706		CSR_WRITE_1(sc, 0x82, 1);
707	if (sc->rl_hwrev->rl_rev == RL_HWREV_8169S)
708		re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
709}
710
711#ifdef RE_DIAG
712
713/*
714 * The following routine is designed to test for a defect on some
715 * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
716 * lines connected to the bus, however for a 32-bit only card, they
717 * should be pulled high. The result of this defect is that the
718 * NIC will not work right if you plug it into a 64-bit slot: DMA
719 * operations will be done with 64-bit transfers, which will fail
720 * because the 64-bit data lines aren't connected.
721 *
722 * There's no way to work around this (short of talking a soldering
723 * iron to the board), however we can detect it. The method we use
724 * here is to put the NIC into digital loopback mode, set the receiver
725 * to promiscuous mode, and then try to send a frame. We then compare
726 * the frame data we sent to what was received. If the data matches,
727 * then the NIC is working correctly, otherwise we know the user has
728 * a defective NIC which has been mistakenly plugged into a 64-bit PCI
729 * slot. In the latter case, there's no way the NIC can work correctly,
730 * so we print out a message on the console and abort the device attach.
731 */
732
733static int
734re_diag(struct rl_softc *sc)
735{
736	struct ifnet		*ifp = sc->rl_ifp;
737	struct mbuf		*m0;
738	struct ether_header	*eh;
739	struct rl_desc		*cur_rx;
740	u_int16_t		status;
741	u_int32_t		rxstat;
742	int			total_len, i, error = 0, phyaddr;
743	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
744	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
745
746	/* Allocate a single mbuf */
747	MGETHDR(m0, M_DONTWAIT, MT_DATA);
748	if (m0 == NULL)
749		return (ENOBUFS);
750
751	RL_LOCK(sc);
752
753	/*
754	 * Initialize the NIC in test mode. This sets the chip up
755	 * so that it can send and receive frames, but performs the
756	 * following special functions:
757	 * - Puts receiver in promiscuous mode
758	 * - Enables digital loopback mode
759	 * - Leaves interrupts turned off
760	 */
761
762	ifp->if_flags |= IFF_PROMISC;
763	sc->rl_testmode = 1;
764	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
765	re_init_locked(sc);
766	sc->rl_flags |= RL_FLAG_LINK;
767	if (sc->rl_type == RL_8169)
768		phyaddr = 1;
769	else
770		phyaddr = 0;
771
772	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
773	for (i = 0; i < RL_TIMEOUT; i++) {
774		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
775		if (!(status & BMCR_RESET))
776			break;
777	}
778
779	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
780	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
781
782	DELAY(100000);
783
784	/* Put some data in the mbuf */
785
786	eh = mtod(m0, struct ether_header *);
787	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
788	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
789	eh->ether_type = htons(ETHERTYPE_IP);
790	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
791
792	/*
793	 * Queue the packet, start transmission.
794	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
795	 */
796
797	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
798	RL_UNLOCK(sc);
799	/* XXX: re_diag must not be called when in ALTQ mode */
800	IF_HANDOFF(&ifp->if_snd, m0, ifp);
801	RL_LOCK(sc);
802	m0 = NULL;
803
804	/* Wait for it to propagate through the chip */
805
806	DELAY(100000);
807	for (i = 0; i < RL_TIMEOUT; i++) {
808		status = CSR_READ_2(sc, RL_ISR);
809		CSR_WRITE_2(sc, RL_ISR, status);
810		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
811		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
812			break;
813		DELAY(10);
814	}
815
816	if (i == RL_TIMEOUT) {
817		device_printf(sc->rl_dev,
818		    "diagnostic failed, failed to receive packet in"
819		    " loopback mode\n");
820		error = EIO;
821		goto done;
822	}
823
824	/*
825	 * The packet should have been dumped into the first
826	 * entry in the RX DMA ring. Grab it from there.
827	 */
828
829	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
830	    sc->rl_ldata.rl_rx_list_map,
831	    BUS_DMASYNC_POSTREAD);
832	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
833	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
834	    BUS_DMASYNC_POSTREAD);
835	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
836	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
837
838	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
839	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
840	eh = mtod(m0, struct ether_header *);
841
842	cur_rx = &sc->rl_ldata.rl_rx_list[0];
843	total_len = RL_RXBYTES(cur_rx);
844	rxstat = le32toh(cur_rx->rl_cmdstat);
845
846	if (total_len != ETHER_MIN_LEN) {
847		device_printf(sc->rl_dev,
848		    "diagnostic failed, received short packet\n");
849		error = EIO;
850		goto done;
851	}
852
853	/* Test that the received packet data matches what we sent. */
854
855	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
856	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
857	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
858		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
859		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
860		    dst, ":", src, ":", ETHERTYPE_IP);
861		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
862		    eh->ether_dhost, ":",  eh->ether_shost, ":",
863		    ntohs(eh->ether_type));
864		device_printf(sc->rl_dev, "You may have a defective 32-bit "
865		    "NIC plugged into a 64-bit PCI slot.\n");
866		device_printf(sc->rl_dev, "Please re-install the NIC in a "
867		    "32-bit slot for proper operation.\n");
868		device_printf(sc->rl_dev, "Read the re(4) man page for more "
869		    "details.\n");
870		error = EIO;
871	}
872
873done:
874	/* Turn interface off, release resources */
875
876	sc->rl_testmode = 0;
877	sc->rl_flags &= ~RL_FLAG_LINK;
878	ifp->if_flags &= ~IFF_PROMISC;
879	re_stop(sc);
880	if (m0 != NULL)
881		m_freem(m0);
882
883	RL_UNLOCK(sc);
884
885	return (error);
886}
887
888#endif
889
890/*
891 * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
892 * IDs against our list and return a device name if we find a match.
893 */
894static int
895re_probe(device_t dev)
896{
897	struct rl_type		*t;
898	uint16_t		devid, vendor;
899	uint16_t		revid, sdevid;
900	int			i;
901
902	vendor = pci_get_vendor(dev);
903	devid = pci_get_device(dev);
904	revid = pci_get_revid(dev);
905	sdevid = pci_get_subdevice(dev);
906
907	if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
908		if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
909			/*
910			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
911			 * Rev. 2 is supported by sk(4).
912			 */
913			return (ENXIO);
914		}
915	}
916
917	if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
918		if (revid != 0x20) {
919			/* 8139, let rl(4) take care of this device. */
920			return (ENXIO);
921		}
922	}
923
924	t = re_devs;
925	for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
926		if (vendor == t->rl_vid && devid == t->rl_did) {
927			device_set_desc(dev, t->rl_name);
928			return (BUS_PROBE_DEFAULT);
929		}
930	}
931
932	return (ENXIO);
933}
934
935/*
936 * Map a single buffer address.
937 */
938
939static void
940re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
941{
942	bus_addr_t		*addr;
943
944	if (error)
945		return;
946
947	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
948	addr = arg;
949	*addr = segs->ds_addr;
950}
951
952static int
953re_allocmem(device_t dev, struct rl_softc *sc)
954{
955	bus_addr_t		lowaddr;
956	bus_size_t		rx_list_size, tx_list_size;
957	int			error;
958	int			i;
959
960	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
961	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
962
963	/*
964	 * Allocate the parent bus DMA tag appropriate for PCI.
965	 * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
966	 * register should be set. However some RealTek chips are known
967	 * to be buggy on DAC handling, therefore disable DAC by limiting
968	 * DMA address space to 32bit. PCIe variants of RealTek chips
969	 * may not have the limitation.
970	 */
971	lowaddr = BUS_SPACE_MAXADDR;
972	if ((sc->rl_flags & RL_FLAG_PCIE) == 0)
973		lowaddr = BUS_SPACE_MAXADDR_32BIT;
974	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
975	    lowaddr, BUS_SPACE_MAXADDR, NULL, NULL,
976	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
977	    NULL, NULL, &sc->rl_parent_tag);
978	if (error) {
979		device_printf(dev, "could not allocate parent DMA tag\n");
980		return (error);
981	}
982
983	/*
984	 * Allocate map for TX mbufs.
985	 */
986	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
987	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
988	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
989	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
990	if (error) {
991		device_printf(dev, "could not allocate TX DMA tag\n");
992		return (error);
993	}
994
995	/*
996	 * Allocate map for RX mbufs.
997	 */
998
999	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
1000		error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t),
1001		    0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1002		    MJUM9BYTES, 1, MJUM9BYTES, 0, NULL, NULL,
1003		    &sc->rl_ldata.rl_jrx_mtag);
1004		if (error) {
1005			device_printf(dev,
1006			    "could not allocate jumbo RX DMA tag\n");
1007			return (error);
1008		}
1009	}
1010	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
1011	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1012	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
1013	if (error) {
1014		device_printf(dev, "could not allocate RX DMA tag\n");
1015		return (error);
1016	}
1017
1018	/*
1019	 * Allocate map for TX descriptor list.
1020	 */
1021	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1022	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1023	    NULL, tx_list_size, 1, tx_list_size, 0,
1024	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1025	if (error) {
1026		device_printf(dev, "could not allocate TX DMA ring tag\n");
1027		return (error);
1028	}
1029
1030	/* Allocate DMA'able memory for the TX ring */
1031
1032	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1033	    (void **)&sc->rl_ldata.rl_tx_list,
1034	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1035	    &sc->rl_ldata.rl_tx_list_map);
1036	if (error) {
1037		device_printf(dev, "could not allocate TX DMA ring\n");
1038		return (error);
1039	}
1040
1041	/* Load the map for the TX ring. */
1042
1043	sc->rl_ldata.rl_tx_list_addr = 0;
1044	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1045	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1046	     tx_list_size, re_dma_map_addr,
1047	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1048	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1049		device_printf(dev, "could not load TX DMA ring\n");
1050		return (ENOMEM);
1051	}
1052
1053	/* Create DMA maps for TX buffers */
1054
1055	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1056		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1057		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1058		if (error) {
1059			device_printf(dev, "could not create DMA map for TX\n");
1060			return (error);
1061		}
1062	}
1063
1064	/*
1065	 * Allocate map for RX descriptor list.
1066	 */
1067	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1068	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1069	    NULL, rx_list_size, 1, rx_list_size, 0,
1070	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1071	if (error) {
1072		device_printf(dev, "could not create RX DMA ring tag\n");
1073		return (error);
1074	}
1075
1076	/* Allocate DMA'able memory for the RX ring */
1077
1078	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1079	    (void **)&sc->rl_ldata.rl_rx_list,
1080	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1081	    &sc->rl_ldata.rl_rx_list_map);
1082	if (error) {
1083		device_printf(dev, "could not allocate RX DMA ring\n");
1084		return (error);
1085	}
1086
1087	/* Load the map for the RX ring. */
1088
1089	sc->rl_ldata.rl_rx_list_addr = 0;
1090	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1091	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1092	     rx_list_size, re_dma_map_addr,
1093	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1094	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1095		device_printf(dev, "could not load RX DMA ring\n");
1096		return (ENOMEM);
1097	}
1098
1099	/* Create DMA maps for RX buffers */
1100
1101	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
1102		error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
1103		    &sc->rl_ldata.rl_jrx_sparemap);
1104		if (error) {
1105			device_printf(dev,
1106			    "could not create spare DMA map for jumbo RX\n");
1107			return (error);
1108		}
1109		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1110			error = bus_dmamap_create(sc->rl_ldata.rl_jrx_mtag, 0,
1111			    &sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
1112			if (error) {
1113				device_printf(dev,
1114				    "could not create DMA map for jumbo RX\n");
1115				return (error);
1116			}
1117		}
1118	}
1119	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1120	    &sc->rl_ldata.rl_rx_sparemap);
1121	if (error) {
1122		device_printf(dev, "could not create spare DMA map for RX\n");
1123		return (error);
1124	}
1125	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1126		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1127		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1128		if (error) {
1129			device_printf(dev, "could not create DMA map for RX\n");
1130			return (error);
1131		}
1132	}
1133
1134	/* Create DMA map for statistics. */
1135	error = bus_dma_tag_create(sc->rl_parent_tag, RL_DUMP_ALIGN, 0,
1136	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1137	    sizeof(struct rl_stats), 1, sizeof(struct rl_stats), 0, NULL, NULL,
1138	    &sc->rl_ldata.rl_stag);
1139	if (error) {
1140		device_printf(dev, "could not create statistics DMA tag\n");
1141		return (error);
1142	}
1143	/* Allocate DMA'able memory for statistics. */
1144	error = bus_dmamem_alloc(sc->rl_ldata.rl_stag,
1145	    (void **)&sc->rl_ldata.rl_stats,
1146	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1147	    &sc->rl_ldata.rl_smap);
1148	if (error) {
1149		device_printf(dev,
1150		    "could not allocate statistics DMA memory\n");
1151		return (error);
1152	}
1153	/* Load the map for statistics. */
1154	sc->rl_ldata.rl_stats_addr = 0;
1155	error = bus_dmamap_load(sc->rl_ldata.rl_stag, sc->rl_ldata.rl_smap,
1156	    sc->rl_ldata.rl_stats, sizeof(struct rl_stats), re_dma_map_addr,
1157	     &sc->rl_ldata.rl_stats_addr, BUS_DMA_NOWAIT);
1158	if (error != 0 || sc->rl_ldata.rl_stats_addr == 0) {
1159		device_printf(dev, "could not load statistics DMA memory\n");
1160		return (ENOMEM);
1161	}
1162
1163	return (0);
1164}
1165
1166/*
1167 * Attach the interface. Allocate softc structures, do ifmedia
1168 * setup and ethernet/BPF attach.
1169 */
1170static int
1171re_attach(device_t dev)
1172{
1173	u_char			eaddr[ETHER_ADDR_LEN];
1174	u_int16_t		as[ETHER_ADDR_LEN / 2];
1175	struct rl_softc		*sc;
1176	struct ifnet		*ifp;
1177	struct rl_hwrev		*hw_rev;
1178	int			hwrev;
1179	u_int16_t		devid, re_did = 0;
1180	int			error = 0, i, phy, rid;
1181	int			msic, msixc, reg;
1182	uint8_t			cfg;
1183
1184	sc = device_get_softc(dev);
1185	sc->rl_dev = dev;
1186
1187	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1188	    MTX_DEF);
1189	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1190
1191	/*
1192	 * Map control/status registers.
1193	 */
1194	pci_enable_busmaster(dev);
1195
1196	devid = pci_get_device(dev);
1197	/*
1198	 * Prefer memory space register mapping over IO space.
1199	 * Because RTL8169SC does not seem to work when memory mapping
1200	 * is used always activate io mapping.
1201	 */
1202	if (devid == RT_DEVICEID_8169SC)
1203		prefer_iomap = 1;
1204	if (prefer_iomap == 0) {
1205		sc->rl_res_id = PCIR_BAR(1);
1206		sc->rl_res_type = SYS_RES_MEMORY;
1207		/* RTL8168/8101E seems to use different BARs. */
1208		if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1209			sc->rl_res_id = PCIR_BAR(2);
1210	} else {
1211		sc->rl_res_id = PCIR_BAR(0);
1212		sc->rl_res_type = SYS_RES_IOPORT;
1213	}
1214	sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1215	    &sc->rl_res_id, RF_ACTIVE);
1216	if (sc->rl_res == NULL && prefer_iomap == 0) {
1217		sc->rl_res_id = PCIR_BAR(0);
1218		sc->rl_res_type = SYS_RES_IOPORT;
1219		sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1220		    &sc->rl_res_id, RF_ACTIVE);
1221	}
1222	if (sc->rl_res == NULL) {
1223		device_printf(dev, "couldn't map ports/memory\n");
1224		error = ENXIO;
1225		goto fail;
1226	}
1227
1228	sc->rl_btag = rman_get_bustag(sc->rl_res);
1229	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1230
1231	msic = pci_msi_count(dev);
1232	msixc = pci_msix_count(dev);
1233	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0)
1234		sc->rl_flags |= RL_FLAG_PCIE;
1235	if (bootverbose) {
1236		device_printf(dev, "MSI count : %d\n", msic);
1237		device_printf(dev, "MSI-X count : %d\n", msixc);
1238	}
1239	if (msix_disable > 0)
1240		msixc = 0;
1241	if (msi_disable > 0)
1242		msic = 0;
1243	/* Prefer MSI-X to MSI. */
1244	if (msixc > 0) {
1245		msixc = 1;
1246		rid = PCIR_BAR(4);
1247		sc->rl_res_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1248		    &rid, RF_ACTIVE);
1249		if (sc->rl_res_pba == NULL) {
1250			device_printf(sc->rl_dev,
1251			    "could not allocate MSI-X PBA resource\n");
1252		}
1253		if (sc->rl_res_pba != NULL &&
1254		    pci_alloc_msix(dev, &msixc) == 0) {
1255			if (msixc == 1) {
1256				device_printf(dev, "Using %d MSI-X message\n",
1257				    msixc);
1258				sc->rl_flags |= RL_FLAG_MSIX;
1259			} else
1260				pci_release_msi(dev);
1261		}
1262		if ((sc->rl_flags & RL_FLAG_MSIX) == 0) {
1263			if (sc->rl_res_pba != NULL)
1264				bus_release_resource(dev, SYS_RES_MEMORY, rid,
1265				    sc->rl_res_pba);
1266			sc->rl_res_pba = NULL;
1267			msixc = 0;
1268		}
1269	}
1270	/* Prefer MSI to INTx. */
1271	if (msixc == 0 && msic > 0) {
1272		msic = 1;
1273		if (pci_alloc_msi(dev, &msic) == 0) {
1274			if (msic == RL_MSI_MESSAGES) {
1275				device_printf(dev, "Using %d MSI message\n",
1276				    msic);
1277				sc->rl_flags |= RL_FLAG_MSI;
1278				/* Explicitly set MSI enable bit. */
1279				CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1280				cfg = CSR_READ_1(sc, RL_CFG2);
1281				cfg |= RL_CFG2_MSI;
1282				CSR_WRITE_1(sc, RL_CFG2, cfg);
1283				CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1284			} else
1285				pci_release_msi(dev);
1286		}
1287		if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1288			msic = 0;
1289	}
1290
1291	/* Allocate interrupt */
1292	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) {
1293		rid = 0;
1294		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1295		    RF_SHAREABLE | RF_ACTIVE);
1296		if (sc->rl_irq[0] == NULL) {
1297			device_printf(dev, "couldn't allocate IRQ resources\n");
1298			error = ENXIO;
1299			goto fail;
1300		}
1301	} else {
1302		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1303			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1304			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1305			if (sc->rl_irq[i] == NULL) {
1306				device_printf(dev,
1307				    "couldn't llocate IRQ resources for "
1308				    "message %d\n", rid);
1309				error = ENXIO;
1310				goto fail;
1311			}
1312		}
1313	}
1314
1315	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1316		CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1317		cfg = CSR_READ_1(sc, RL_CFG2);
1318		if ((cfg & RL_CFG2_MSI) != 0) {
1319			device_printf(dev, "turning off MSI enable bit.\n");
1320			cfg &= ~RL_CFG2_MSI;
1321			CSR_WRITE_1(sc, RL_CFG2, cfg);
1322		}
1323		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1324	}
1325
1326	hw_rev = re_hwrevs;
1327	hwrev = CSR_READ_4(sc, RL_TXCFG);
1328	switch (hwrev & 0x70000000) {
1329	case 0x00000000:
1330	case 0x10000000:
1331		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1332		hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1333		break;
1334	default:
1335		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1336		hwrev &= RL_TXCFG_HWREV;
1337		break;
1338	}
1339	device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1340	while (hw_rev->rl_desc != NULL) {
1341		if (hw_rev->rl_rev == hwrev) {
1342			sc->rl_type = hw_rev->rl_type;
1343			sc->rl_hwrev = hw_rev;
1344			break;
1345		}
1346		hw_rev++;
1347	}
1348	if (hw_rev->rl_desc == NULL) {
1349		device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1350		error = ENXIO;
1351		goto fail;
1352	}
1353
1354	switch (hw_rev->rl_rev) {
1355	case RL_HWREV_8139CPLUS:
1356		sc->rl_flags |= RL_FLAG_FASTETHER | RL_FLAG_AUTOPAD;
1357		break;
1358	case RL_HWREV_8100E:
1359	case RL_HWREV_8101E:
1360		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER;
1361		break;
1362	case RL_HWREV_8102E:
1363	case RL_HWREV_8102EL:
1364	case RL_HWREV_8102EL_SPIN1:
1365		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1366		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
1367		    RL_FLAG_AUTOPAD;
1368		break;
1369	case RL_HWREV_8103E:
1370		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1371		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP |
1372		    RL_FLAG_AUTOPAD | RL_FLAG_MACSLEEP;
1373		break;
1374	case RL_HWREV_8168B_SPIN1:
1375	case RL_HWREV_8168B_SPIN2:
1376		sc->rl_flags |= RL_FLAG_WOLRXENB;
1377		/* FALLTHROUGH */
1378	case RL_HWREV_8168B_SPIN3:
1379		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1380		break;
1381	case RL_HWREV_8168C_SPIN2:
1382		sc->rl_flags |= RL_FLAG_MACSLEEP;
1383		/* FALLTHROUGH */
1384	case RL_HWREV_8168C:
1385		if ((hwrev & 0x00700000) == 0x00200000)
1386			sc->rl_flags |= RL_FLAG_MACSLEEP;
1387		/* FALLTHROUGH */
1388	case RL_HWREV_8168CP:
1389	case RL_HWREV_8168D:
1390	case RL_HWREV_8168DP:
1391		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1392		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1393		    RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2;
1394		break;
1395	case RL_HWREV_8168E:
1396		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PHYWAKE_PM |
1397		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1398		    RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2;
1399		break;
1400	case RL_HWREV_8168E_VL:
1401		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1402		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1403		    RL_FLAG_AUTOPAD | RL_FLAG_JUMBOV2;
1404		break;
1405	case RL_HWREV_8169_8110SB:
1406	case RL_HWREV_8169_8110SBL:
1407	case RL_HWREV_8169_8110SC:
1408	case RL_HWREV_8169_8110SCE:
1409		sc->rl_flags |= RL_FLAG_PHYWAKE;
1410		/* FALLTHROUGH */
1411	case RL_HWREV_8169:
1412	case RL_HWREV_8169S:
1413	case RL_HWREV_8110S:
1414		sc->rl_flags |= RL_FLAG_MACRESET;
1415		break;
1416	default:
1417		break;
1418	}
1419
1420	/* Reset the adapter. */
1421	RL_LOCK(sc);
1422	re_reset(sc);
1423	RL_UNLOCK(sc);
1424
1425	/* Enable PME. */
1426	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1427	cfg = CSR_READ_1(sc, RL_CFG1);
1428	cfg |= RL_CFG1_PME;
1429	CSR_WRITE_1(sc, RL_CFG1, cfg);
1430	cfg = CSR_READ_1(sc, RL_CFG5);
1431	cfg &= RL_CFG5_PME_STS;
1432	CSR_WRITE_1(sc, RL_CFG5, cfg);
1433	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1434
1435	if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1436		/*
1437		 * XXX Should have a better way to extract station
1438		 * address from EEPROM.
1439		 */
1440		for (i = 0; i < ETHER_ADDR_LEN; i++)
1441			eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1442	} else {
1443		sc->rl_eewidth = RL_9356_ADDR_LEN;
1444		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1445		if (re_did != 0x8129)
1446			sc->rl_eewidth = RL_9346_ADDR_LEN;
1447
1448		/*
1449		 * Get station address from the EEPROM.
1450		 */
1451		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1452		for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1453			as[i] = le16toh(as[i]);
1454		bcopy(as, eaddr, sizeof(eaddr));
1455	}
1456
1457	if (sc->rl_type == RL_8169) {
1458		/* Set RX length mask and number of descriptors. */
1459		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1460		sc->rl_txstart = RL_GTXSTART;
1461		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1462		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1463	} else {
1464		/* Set RX length mask and number of descriptors. */
1465		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1466		sc->rl_txstart = RL_TXSTART;
1467		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1468		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1469	}
1470
1471	error = re_allocmem(dev, sc);
1472	if (error)
1473		goto fail;
1474	re_add_sysctls(sc);
1475
1476	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1477	if (ifp == NULL) {
1478		device_printf(dev, "can not if_alloc()\n");
1479		error = ENOSPC;
1480		goto fail;
1481	}
1482
1483	/* Take controller out of deep sleep mode. */
1484	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1485		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1486			CSR_WRITE_1(sc, RL_GPIO,
1487			    CSR_READ_1(sc, RL_GPIO) | 0x01);
1488		else
1489			CSR_WRITE_1(sc, RL_GPIO,
1490			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
1491	}
1492
1493	/* Take PHY out of power down mode. */
1494	if ((sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
1495		CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) | 0x80);
1496	if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1497		re_gmii_writereg(dev, 1, 0x1f, 0);
1498		re_gmii_writereg(dev, 1, 0x0e, 0);
1499	}
1500
1501#define	RE_PHYAD_INTERNAL	 0
1502
1503	/* Do MII setup. */
1504	phy = RE_PHYAD_INTERNAL;
1505	if (sc->rl_type == RL_8169)
1506		phy = 1;
1507	error = mii_attach(dev, &sc->rl_miibus, ifp, re_ifmedia_upd,
1508	    re_ifmedia_sts, BMSR_DEFCAPMASK, phy, MII_OFFSET_ANY, MIIF_DOPAUSE);
1509	if (error != 0) {
1510		device_printf(dev, "attaching PHYs failed\n");
1511		goto fail;
1512	}
1513
1514	ifp->if_softc = sc;
1515	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1516	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1517	ifp->if_ioctl = re_ioctl;
1518	ifp->if_start = re_start;
1519	ifp->if_hwassist = RE_CSUM_FEATURES | CSUM_TSO;
1520	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4;
1521	ifp->if_capenable = ifp->if_capabilities;
1522	ifp->if_init = re_init;
1523	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1524	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1525	IFQ_SET_READY(&ifp->if_snd);
1526
1527	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1528
1529	/*
1530	 * Call MI attach routine.
1531	 */
1532	ether_ifattach(ifp, eaddr);
1533
1534	/* VLAN capability setup */
1535	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1536	if (ifp->if_capabilities & IFCAP_HWCSUM)
1537		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1538	/* Enable WOL if PM is supported. */
1539	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1540		ifp->if_capabilities |= IFCAP_WOL;
1541	ifp->if_capenable = ifp->if_capabilities;
1542	/*
1543	 * Don't enable TSO by default.  It is known to generate
1544	 * corrupted TCP segments(bad TCP options) under certain
1545	 * circumtances.
1546	 */
1547	ifp->if_hwassist &= ~CSUM_TSO;
1548	ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO);
1549#ifdef DEVICE_POLLING
1550	ifp->if_capabilities |= IFCAP_POLLING;
1551#endif
1552	/*
1553	 * Tell the upper layer(s) we support long frames.
1554	 * Must appear after the call to ether_ifattach() because
1555	 * ether_ifattach() sets ifi_hdrlen to the default value.
1556	 */
1557	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1558
1559#ifdef RE_DIAG
1560	/*
1561	 * Perform hardware diagnostic on the original RTL8169.
1562	 * Some 32-bit cards were incorrectly wired and would
1563	 * malfunction if plugged into a 64-bit slot.
1564	 */
1565
1566	if (hwrev == RL_HWREV_8169) {
1567		error = re_diag(sc);
1568		if (error) {
1569			device_printf(dev,
1570		    	"attach aborted due to hardware diag failure\n");
1571			ether_ifdetach(ifp);
1572			goto fail;
1573		}
1574	}
1575#endif
1576
1577	/* Hook interrupt last to avoid having to lock softc */
1578	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0)
1579		error = bus_setup_intr(dev, sc->rl_irq[0],
1580		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1581		    &sc->rl_intrhand[0]);
1582	else {
1583		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1584			error = bus_setup_intr(dev, sc->rl_irq[i],
1585			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1586		    	    &sc->rl_intrhand[i]);
1587			if (error != 0)
1588				break;
1589		}
1590	}
1591	if (error) {
1592		device_printf(dev, "couldn't set up irq\n");
1593		ether_ifdetach(ifp);
1594	}
1595
1596fail:
1597
1598	if (error)
1599		re_detach(dev);
1600
1601	return (error);
1602}
1603
1604/*
1605 * Shutdown hardware and free up resources. This can be called any
1606 * time after the mutex has been initialized. It is called in both
1607 * the error case in attach and the normal detach case so it needs
1608 * to be careful about only freeing resources that have actually been
1609 * allocated.
1610 */
1611static int
1612re_detach(device_t dev)
1613{
1614	struct rl_softc		*sc;
1615	struct ifnet		*ifp;
1616	int			i, rid;
1617
1618	sc = device_get_softc(dev);
1619	ifp = sc->rl_ifp;
1620	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1621
1622	/* These should only be active if attach succeeded */
1623	if (device_is_attached(dev)) {
1624#ifdef DEVICE_POLLING
1625		if (ifp->if_capenable & IFCAP_POLLING)
1626			ether_poll_deregister(ifp);
1627#endif
1628		RL_LOCK(sc);
1629#if 0
1630		sc->suspended = 1;
1631#endif
1632		re_stop(sc);
1633		RL_UNLOCK(sc);
1634		callout_drain(&sc->rl_stat_callout);
1635		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1636		/*
1637		 * Force off the IFF_UP flag here, in case someone
1638		 * still had a BPF descriptor attached to this
1639		 * interface. If they do, ether_ifdetach() will cause
1640		 * the BPF code to try and clear the promisc mode
1641		 * flag, which will bubble down to re_ioctl(),
1642		 * which will try to call re_init() again. This will
1643		 * turn the NIC back on and restart the MII ticker,
1644		 * which will panic the system when the kernel tries
1645		 * to invoke the re_tick() function that isn't there
1646		 * anymore.
1647		 */
1648		ifp->if_flags &= ~IFF_UP;
1649		ether_ifdetach(ifp);
1650	}
1651	if (sc->rl_miibus)
1652		device_delete_child(dev, sc->rl_miibus);
1653	bus_generic_detach(dev);
1654
1655	/*
1656	 * The rest is resource deallocation, so we should already be
1657	 * stopped here.
1658	 */
1659
1660	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1661		if (sc->rl_intrhand[i] != NULL) {
1662			bus_teardown_intr(dev, sc->rl_irq[i],
1663			    sc->rl_intrhand[i]);
1664			sc->rl_intrhand[i] = NULL;
1665		}
1666	}
1667	if (ifp != NULL)
1668		if_free(ifp);
1669	if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) {
1670		if (sc->rl_irq[0] != NULL) {
1671			bus_release_resource(dev, SYS_RES_IRQ, 0,
1672			    sc->rl_irq[0]);
1673			sc->rl_irq[0] = NULL;
1674		}
1675	} else {
1676		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1677			if (sc->rl_irq[i] != NULL) {
1678				bus_release_resource(dev, SYS_RES_IRQ, rid,
1679				    sc->rl_irq[i]);
1680				sc->rl_irq[i] = NULL;
1681			}
1682		}
1683		pci_release_msi(dev);
1684	}
1685	if (sc->rl_res_pba) {
1686		rid = PCIR_BAR(4);
1687		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->rl_res_pba);
1688	}
1689	if (sc->rl_res)
1690		bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1691		    sc->rl_res);
1692
1693	/* Unload and free the RX DMA ring memory and map */
1694
1695	if (sc->rl_ldata.rl_rx_list_tag) {
1696		if (sc->rl_ldata.rl_rx_list_map)
1697			bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1698			    sc->rl_ldata.rl_rx_list_map);
1699		if (sc->rl_ldata.rl_rx_list_map && sc->rl_ldata.rl_rx_list)
1700			bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1701			    sc->rl_ldata.rl_rx_list,
1702			    sc->rl_ldata.rl_rx_list_map);
1703		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1704	}
1705
1706	/* Unload and free the TX DMA ring memory and map */
1707
1708	if (sc->rl_ldata.rl_tx_list_tag) {
1709		if (sc->rl_ldata.rl_tx_list_map)
1710			bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1711			    sc->rl_ldata.rl_tx_list_map);
1712		if (sc->rl_ldata.rl_tx_list_map && sc->rl_ldata.rl_tx_list)
1713			bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1714			    sc->rl_ldata.rl_tx_list,
1715			    sc->rl_ldata.rl_tx_list_map);
1716		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1717	}
1718
1719	/* Destroy all the RX and TX buffer maps */
1720
1721	if (sc->rl_ldata.rl_tx_mtag) {
1722		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1723			if (sc->rl_ldata.rl_tx_desc[i].tx_dmamap)
1724				bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1725				    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1726		}
1727		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1728	}
1729	if (sc->rl_ldata.rl_rx_mtag) {
1730		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1731			if (sc->rl_ldata.rl_rx_desc[i].rx_dmamap)
1732				bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1733				    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1734		}
1735		if (sc->rl_ldata.rl_rx_sparemap)
1736			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1737			    sc->rl_ldata.rl_rx_sparemap);
1738		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1739	}
1740	if (sc->rl_ldata.rl_jrx_mtag) {
1741		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1742			if (sc->rl_ldata.rl_jrx_desc[i].rx_dmamap)
1743				bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
1744				    sc->rl_ldata.rl_jrx_desc[i].rx_dmamap);
1745		}
1746		if (sc->rl_ldata.rl_jrx_sparemap)
1747			bus_dmamap_destroy(sc->rl_ldata.rl_jrx_mtag,
1748			    sc->rl_ldata.rl_jrx_sparemap);
1749		bus_dma_tag_destroy(sc->rl_ldata.rl_jrx_mtag);
1750	}
1751	/* Unload and free the stats buffer and map */
1752
1753	if (sc->rl_ldata.rl_stag) {
1754		if (sc->rl_ldata.rl_smap)
1755			bus_dmamap_unload(sc->rl_ldata.rl_stag,
1756			    sc->rl_ldata.rl_smap);
1757		if (sc->rl_ldata.rl_smap && sc->rl_ldata.rl_stats)
1758			bus_dmamem_free(sc->rl_ldata.rl_stag,
1759			    sc->rl_ldata.rl_stats, sc->rl_ldata.rl_smap);
1760		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1761	}
1762
1763	if (sc->rl_parent_tag)
1764		bus_dma_tag_destroy(sc->rl_parent_tag);
1765
1766	mtx_destroy(&sc->rl_mtx);
1767
1768	return (0);
1769}
1770
1771static __inline void
1772re_discard_rxbuf(struct rl_softc *sc, int idx)
1773{
1774	struct rl_desc		*desc;
1775	struct rl_rxdesc	*rxd;
1776	uint32_t		cmdstat;
1777
1778	if (sc->rl_ifp->if_mtu > RL_MTU &&
1779	    (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
1780		rxd = &sc->rl_ldata.rl_jrx_desc[idx];
1781	else
1782		rxd = &sc->rl_ldata.rl_rx_desc[idx];
1783	desc = &sc->rl_ldata.rl_rx_list[idx];
1784	desc->rl_vlanctl = 0;
1785	cmdstat = rxd->rx_size;
1786	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1787		cmdstat |= RL_RDESC_CMD_EOR;
1788	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1789}
1790
1791static int
1792re_newbuf(struct rl_softc *sc, int idx)
1793{
1794	struct mbuf		*m;
1795	struct rl_rxdesc	*rxd;
1796	bus_dma_segment_t	segs[1];
1797	bus_dmamap_t		map;
1798	struct rl_desc		*desc;
1799	uint32_t		cmdstat;
1800	int			error, nsegs;
1801
1802	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1803	if (m == NULL)
1804		return (ENOBUFS);
1805
1806	m->m_len = m->m_pkthdr.len = MCLBYTES;
1807#ifdef RE_FIXUP_RX
1808	/*
1809	 * This is part of an evil trick to deal with non-x86 platforms.
1810	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1811	 * boundaries, but that will hose non-x86 machines. To get around
1812	 * this, we leave some empty space at the start of each buffer
1813	 * and for non-x86 hosts, we copy the buffer back six bytes
1814	 * to achieve word alignment. This is slightly more efficient
1815	 * than allocating a new buffer, copying the contents, and
1816	 * discarding the old buffer.
1817	 */
1818	m_adj(m, RE_ETHER_ALIGN);
1819#endif
1820	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1821	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1822	if (error != 0) {
1823		m_freem(m);
1824		return (ENOBUFS);
1825	}
1826	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1827
1828	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1829	if (rxd->rx_m != NULL) {
1830		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1831		    BUS_DMASYNC_POSTREAD);
1832		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1833	}
1834
1835	rxd->rx_m = m;
1836	map = rxd->rx_dmamap;
1837	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1838	rxd->rx_size = segs[0].ds_len;
1839	sc->rl_ldata.rl_rx_sparemap = map;
1840	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1841	    BUS_DMASYNC_PREREAD);
1842
1843	desc = &sc->rl_ldata.rl_rx_list[idx];
1844	desc->rl_vlanctl = 0;
1845	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1846	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1847	cmdstat = segs[0].ds_len;
1848	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1849		cmdstat |= RL_RDESC_CMD_EOR;
1850	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1851
1852	return (0);
1853}
1854
1855static int
1856re_jumbo_newbuf(struct rl_softc *sc, int idx)
1857{
1858	struct mbuf		*m;
1859	struct rl_rxdesc	*rxd;
1860	bus_dma_segment_t	segs[1];
1861	bus_dmamap_t		map;
1862	struct rl_desc		*desc;
1863	uint32_t		cmdstat;
1864	int			error, nsegs;
1865
1866	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
1867	if (m == NULL)
1868		return (ENOBUFS);
1869	m->m_len = m->m_pkthdr.len = MJUM9BYTES;
1870#ifdef RE_FIXUP_RX
1871	m_adj(m, RE_ETHER_ALIGN);
1872#endif
1873	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_jrx_mtag,
1874	    sc->rl_ldata.rl_jrx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1875	if (error != 0) {
1876		m_freem(m);
1877		return (ENOBUFS);
1878	}
1879	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1880
1881	rxd = &sc->rl_ldata.rl_jrx_desc[idx];
1882	if (rxd->rx_m != NULL) {
1883		bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
1884		    BUS_DMASYNC_POSTREAD);
1885		bus_dmamap_unload(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap);
1886	}
1887
1888	rxd->rx_m = m;
1889	map = rxd->rx_dmamap;
1890	rxd->rx_dmamap = sc->rl_ldata.rl_jrx_sparemap;
1891	rxd->rx_size = segs[0].ds_len;
1892	sc->rl_ldata.rl_jrx_sparemap = map;
1893	bus_dmamap_sync(sc->rl_ldata.rl_jrx_mtag, rxd->rx_dmamap,
1894	    BUS_DMASYNC_PREREAD);
1895
1896	desc = &sc->rl_ldata.rl_rx_list[idx];
1897	desc->rl_vlanctl = 0;
1898	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1899	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1900	cmdstat = segs[0].ds_len;
1901	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1902		cmdstat |= RL_RDESC_CMD_EOR;
1903	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1904
1905	return (0);
1906}
1907
1908#ifdef RE_FIXUP_RX
1909static __inline void
1910re_fixup_rx(struct mbuf *m)
1911{
1912	int                     i;
1913	uint16_t                *src, *dst;
1914
1915	src = mtod(m, uint16_t *);
1916	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1917
1918	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1919		*dst++ = *src++;
1920
1921	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1922}
1923#endif
1924
1925static int
1926re_tx_list_init(struct rl_softc *sc)
1927{
1928	struct rl_desc		*desc;
1929	int			i;
1930
1931	RL_LOCK_ASSERT(sc);
1932
1933	bzero(sc->rl_ldata.rl_tx_list,
1934	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1935	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1936		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1937	/* Set EOR. */
1938	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1939	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1940
1941	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1942	    sc->rl_ldata.rl_tx_list_map,
1943	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1944
1945	sc->rl_ldata.rl_tx_prodidx = 0;
1946	sc->rl_ldata.rl_tx_considx = 0;
1947	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1948
1949	return (0);
1950}
1951
1952static int
1953re_rx_list_init(struct rl_softc *sc)
1954{
1955	int			error, i;
1956
1957	bzero(sc->rl_ldata.rl_rx_list,
1958	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1959	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1960		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1961		if ((error = re_newbuf(sc, i)) != 0)
1962			return (error);
1963	}
1964
1965	/* Flush the RX descriptors */
1966
1967	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1968	    sc->rl_ldata.rl_rx_list_map,
1969	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1970
1971	sc->rl_ldata.rl_rx_prodidx = 0;
1972	sc->rl_head = sc->rl_tail = NULL;
1973
1974	return (0);
1975}
1976
1977static int
1978re_jrx_list_init(struct rl_softc *sc)
1979{
1980	int			error, i;
1981
1982	bzero(sc->rl_ldata.rl_rx_list,
1983	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1984	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1985		sc->rl_ldata.rl_jrx_desc[i].rx_m = NULL;
1986		if ((error = re_jumbo_newbuf(sc, i)) != 0)
1987			return (error);
1988	}
1989
1990	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1991	    sc->rl_ldata.rl_rx_list_map,
1992	    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
1993
1994	sc->rl_ldata.rl_rx_prodidx = 0;
1995	sc->rl_head = sc->rl_tail = NULL;
1996
1997	return (0);
1998}
1999
2000/*
2001 * RX handler for C+ and 8169. For the gigE chips, we support
2002 * the reception of jumbo frames that have been fragmented
2003 * across multiple 2K mbuf cluster buffers.
2004 */
2005static int
2006re_rxeof(struct rl_softc *sc, int *rx_npktsp)
2007{
2008	struct mbuf		*m;
2009	struct ifnet		*ifp;
2010	int			i, rxerr, total_len;
2011	struct rl_desc		*cur_rx;
2012	u_int32_t		rxstat, rxvlan;
2013	int			jumbo, maxpkt = 16, rx_npkts = 0;
2014
2015	RL_LOCK_ASSERT(sc);
2016
2017	ifp = sc->rl_ifp;
2018	if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0)
2019		jumbo = 1;
2020	else
2021		jumbo = 0;
2022
2023	/* Invalidate the descriptor memory */
2024
2025	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2026	    sc->rl_ldata.rl_rx_list_map,
2027	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2028
2029	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
2030	    i = RL_RX_DESC_NXT(sc, i)) {
2031		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2032			break;
2033		cur_rx = &sc->rl_ldata.rl_rx_list[i];
2034		rxstat = le32toh(cur_rx->rl_cmdstat);
2035		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
2036			break;
2037		total_len = rxstat & sc->rl_rxlenmask;
2038		rxvlan = le32toh(cur_rx->rl_vlanctl);
2039		if (jumbo != 0)
2040			m = sc->rl_ldata.rl_jrx_desc[i].rx_m;
2041		else
2042			m = sc->rl_ldata.rl_rx_desc[i].rx_m;
2043
2044		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
2045		    (rxstat & (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) !=
2046		    (RL_RDESC_STAT_SOF | RL_RDESC_STAT_EOF)) {
2047			/*
2048			 * RTL8168C or later controllers do not
2049			 * support multi-fragment packet.
2050			 */
2051			re_discard_rxbuf(sc, i);
2052			continue;
2053		} else if ((rxstat & RL_RDESC_STAT_EOF) == 0) {
2054			if (re_newbuf(sc, i) != 0) {
2055				/*
2056				 * If this is part of a multi-fragment packet,
2057				 * discard all the pieces.
2058				 */
2059				if (sc->rl_head != NULL) {
2060					m_freem(sc->rl_head);
2061					sc->rl_head = sc->rl_tail = NULL;
2062				}
2063				re_discard_rxbuf(sc, i);
2064				continue;
2065			}
2066			m->m_len = RE_RX_DESC_BUFLEN;
2067			if (sc->rl_head == NULL)
2068				sc->rl_head = sc->rl_tail = m;
2069			else {
2070				m->m_flags &= ~M_PKTHDR;
2071				sc->rl_tail->m_next = m;
2072				sc->rl_tail = m;
2073			}
2074			continue;
2075		}
2076
2077		/*
2078		 * NOTE: for the 8139C+, the frame length field
2079		 * is always 12 bits in size, but for the gigE chips,
2080		 * it is 13 bits (since the max RX frame length is 16K).
2081		 * Unfortunately, all 32 bits in the status word
2082		 * were already used, so to make room for the extra
2083		 * length bit, RealTek took out the 'frame alignment
2084		 * error' bit and shifted the other status bits
2085		 * over one slot. The OWN, EOR, FS and LS bits are
2086		 * still in the same places. We have already extracted
2087		 * the frame length and checked the OWN bit, so rather
2088		 * than using an alternate bit mapping, we shift the
2089		 * status bits one space to the right so we can evaluate
2090		 * them using the 8169 status as though it was in the
2091		 * same format as that of the 8139C+.
2092		 */
2093		if (sc->rl_type == RL_8169)
2094			rxstat >>= 1;
2095
2096		/*
2097		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
2098		 * set, but if CRC is clear, it will still be a valid frame.
2099		 */
2100		if ((rxstat & RL_RDESC_STAT_RXERRSUM) != 0) {
2101			rxerr = 1;
2102			if ((sc->rl_flags & RL_FLAG_JUMBOV2) == 0 &&
2103			    total_len > 8191 &&
2104			    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)
2105				rxerr = 0;
2106			if (rxerr != 0) {
2107				ifp->if_ierrors++;
2108				/*
2109				 * If this is part of a multi-fragment packet,
2110				 * discard all the pieces.
2111				 */
2112				if (sc->rl_head != NULL) {
2113					m_freem(sc->rl_head);
2114					sc->rl_head = sc->rl_tail = NULL;
2115				}
2116				re_discard_rxbuf(sc, i);
2117				continue;
2118			}
2119		}
2120
2121		/*
2122		 * If allocating a replacement mbuf fails,
2123		 * reload the current one.
2124		 */
2125		if (jumbo != 0)
2126			rxerr = re_jumbo_newbuf(sc, i);
2127		else
2128			rxerr = re_newbuf(sc, i);
2129		if (rxerr != 0) {
2130			ifp->if_iqdrops++;
2131			if (sc->rl_head != NULL) {
2132				m_freem(sc->rl_head);
2133				sc->rl_head = sc->rl_tail = NULL;
2134			}
2135			re_discard_rxbuf(sc, i);
2136			continue;
2137		}
2138
2139		if (sc->rl_head != NULL) {
2140			if (jumbo != 0)
2141				m->m_len = total_len;
2142			else {
2143				m->m_len = total_len % RE_RX_DESC_BUFLEN;
2144				if (m->m_len == 0)
2145					m->m_len = RE_RX_DESC_BUFLEN;
2146			}
2147			/*
2148			 * Special case: if there's 4 bytes or less
2149			 * in this buffer, the mbuf can be discarded:
2150			 * the last 4 bytes is the CRC, which we don't
2151			 * care about anyway.
2152			 */
2153			if (m->m_len <= ETHER_CRC_LEN) {
2154				sc->rl_tail->m_len -=
2155				    (ETHER_CRC_LEN - m->m_len);
2156				m_freem(m);
2157			} else {
2158				m->m_len -= ETHER_CRC_LEN;
2159				m->m_flags &= ~M_PKTHDR;
2160				sc->rl_tail->m_next = m;
2161			}
2162			m = sc->rl_head;
2163			sc->rl_head = sc->rl_tail = NULL;
2164			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
2165		} else
2166			m->m_pkthdr.len = m->m_len =
2167			    (total_len - ETHER_CRC_LEN);
2168
2169#ifdef RE_FIXUP_RX
2170		re_fixup_rx(m);
2171#endif
2172		ifp->if_ipackets++;
2173		m->m_pkthdr.rcvif = ifp;
2174
2175		/* Do RX checksumming if enabled */
2176
2177		if (ifp->if_capenable & IFCAP_RXCSUM) {
2178			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2179				/* Check IP header checksum */
2180				if (rxstat & RL_RDESC_STAT_PROTOID)
2181					m->m_pkthdr.csum_flags |=
2182					    CSUM_IP_CHECKED;
2183				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
2184					m->m_pkthdr.csum_flags |=
2185					    CSUM_IP_VALID;
2186
2187				/* Check TCP/UDP checksum */
2188				if ((RL_TCPPKT(rxstat) &&
2189				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
2190				    (RL_UDPPKT(rxstat) &&
2191				     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
2192					m->m_pkthdr.csum_flags |=
2193						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2194					m->m_pkthdr.csum_data = 0xffff;
2195				}
2196			} else {
2197				/*
2198				 * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
2199				 */
2200				if ((rxstat & RL_RDESC_STAT_PROTOID) &&
2201				    (rxvlan & RL_RDESC_IPV4))
2202					m->m_pkthdr.csum_flags |=
2203					    CSUM_IP_CHECKED;
2204				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
2205				    (rxvlan & RL_RDESC_IPV4))
2206					m->m_pkthdr.csum_flags |=
2207					    CSUM_IP_VALID;
2208				if (((rxstat & RL_RDESC_STAT_TCP) &&
2209				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
2210				    ((rxstat & RL_RDESC_STAT_UDP) &&
2211				    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
2212					m->m_pkthdr.csum_flags |=
2213						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
2214					m->m_pkthdr.csum_data = 0xffff;
2215				}
2216			}
2217		}
2218		maxpkt--;
2219		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
2220			m->m_pkthdr.ether_vtag =
2221			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
2222			m->m_flags |= M_VLANTAG;
2223		}
2224		RL_UNLOCK(sc);
2225		(*ifp->if_input)(ifp, m);
2226		RL_LOCK(sc);
2227		rx_npkts++;
2228	}
2229
2230	/* Flush the RX DMA ring */
2231
2232	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
2233	    sc->rl_ldata.rl_rx_list_map,
2234	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2235
2236	sc->rl_ldata.rl_rx_prodidx = i;
2237
2238	if (rx_npktsp != NULL)
2239		*rx_npktsp = rx_npkts;
2240	if (maxpkt)
2241		return (EAGAIN);
2242
2243	return (0);
2244}
2245
2246static void
2247re_txeof(struct rl_softc *sc)
2248{
2249	struct ifnet		*ifp;
2250	struct rl_txdesc	*txd;
2251	u_int32_t		txstat;
2252	int			cons;
2253
2254	cons = sc->rl_ldata.rl_tx_considx;
2255	if (cons == sc->rl_ldata.rl_tx_prodidx)
2256		return;
2257
2258	ifp = sc->rl_ifp;
2259	/* Invalidate the TX descriptor list */
2260	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2261	    sc->rl_ldata.rl_tx_list_map,
2262	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2263
2264	for (; cons != sc->rl_ldata.rl_tx_prodidx;
2265	    cons = RL_TX_DESC_NXT(sc, cons)) {
2266		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2267		if (txstat & RL_TDESC_STAT_OWN)
2268			break;
2269		/*
2270		 * We only stash mbufs in the last descriptor
2271		 * in a fragment chain, which also happens to
2272		 * be the only place where the TX status bits
2273		 * are valid.
2274		 */
2275		if (txstat & RL_TDESC_CMD_EOF) {
2276			txd = &sc->rl_ldata.rl_tx_desc[cons];
2277			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2278			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2279			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2280			    txd->tx_dmamap);
2281			KASSERT(txd->tx_m != NULL,
2282			    ("%s: freeing NULL mbufs!", __func__));
2283			m_freem(txd->tx_m);
2284			txd->tx_m = NULL;
2285			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2286			    RL_TDESC_STAT_COLCNT))
2287				ifp->if_collisions++;
2288			if (txstat & RL_TDESC_STAT_TXERRSUM)
2289				ifp->if_oerrors++;
2290			else
2291				ifp->if_opackets++;
2292		}
2293		sc->rl_ldata.rl_tx_free++;
2294		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2295	}
2296	sc->rl_ldata.rl_tx_considx = cons;
2297
2298	/* No changes made to the TX ring, so no flush needed */
2299
2300	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2301#ifdef RE_TX_MODERATION
2302		/*
2303		 * If not all descriptors have been reaped yet, reload
2304		 * the timer so that we will eventually get another
2305		 * interrupt that will cause us to re-enter this routine.
2306		 * This is done in case the transmitter has gone idle.
2307		 */
2308		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2309#endif
2310	} else
2311		sc->rl_watchdog_timer = 0;
2312}
2313
2314static void
2315re_tick(void *xsc)
2316{
2317	struct rl_softc		*sc;
2318	struct mii_data		*mii;
2319
2320	sc = xsc;
2321
2322	RL_LOCK_ASSERT(sc);
2323
2324	mii = device_get_softc(sc->rl_miibus);
2325	mii_tick(mii);
2326	if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2327		re_miibus_statchg(sc->rl_dev);
2328	/*
2329	 * Reclaim transmitted frames here. Technically it is not
2330	 * necessary to do here but it ensures periodic reclamation
2331	 * regardless of Tx completion interrupt which seems to be
2332	 * lost on PCIe based controllers under certain situations.
2333	 */
2334	re_txeof(sc);
2335	re_watchdog(sc);
2336	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2337}
2338
2339#ifdef DEVICE_POLLING
2340static int
2341re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2342{
2343	struct rl_softc *sc = ifp->if_softc;
2344	int rx_npkts = 0;
2345
2346	RL_LOCK(sc);
2347	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2348		rx_npkts = re_poll_locked(ifp, cmd, count);
2349	RL_UNLOCK(sc);
2350	return (rx_npkts);
2351}
2352
2353static int
2354re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2355{
2356	struct rl_softc *sc = ifp->if_softc;
2357	int rx_npkts;
2358
2359	RL_LOCK_ASSERT(sc);
2360
2361	sc->rxcycles = count;
2362	re_rxeof(sc, &rx_npkts);
2363	re_txeof(sc);
2364
2365	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2366		re_start_locked(ifp);
2367
2368	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2369		u_int16_t       status;
2370
2371		status = CSR_READ_2(sc, RL_ISR);
2372		if (status == 0xffff)
2373			return (rx_npkts);
2374		if (status)
2375			CSR_WRITE_2(sc, RL_ISR, status);
2376		if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2377		    (sc->rl_flags & RL_FLAG_PCIE))
2378			CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2379
2380		/*
2381		 * XXX check behaviour on receiver stalls.
2382		 */
2383
2384		if (status & RL_ISR_SYSTEM_ERR) {
2385			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2386			re_init_locked(sc);
2387		}
2388	}
2389	return (rx_npkts);
2390}
2391#endif /* DEVICE_POLLING */
2392
2393static int
2394re_intr(void *arg)
2395{
2396	struct rl_softc		*sc;
2397	uint16_t		status;
2398
2399	sc = arg;
2400
2401	status = CSR_READ_2(sc, RL_ISR);
2402	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2403                return (FILTER_STRAY);
2404	CSR_WRITE_2(sc, RL_IMR, 0);
2405
2406	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2407
2408	return (FILTER_HANDLED);
2409}
2410
2411static void
2412re_int_task(void *arg, int npending)
2413{
2414	struct rl_softc		*sc;
2415	struct ifnet		*ifp;
2416	u_int16_t		status;
2417	int			rval = 0;
2418
2419	sc = arg;
2420	ifp = sc->rl_ifp;
2421
2422	RL_LOCK(sc);
2423
2424	status = CSR_READ_2(sc, RL_ISR);
2425        CSR_WRITE_2(sc, RL_ISR, status);
2426
2427	if (sc->suspended ||
2428	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2429		RL_UNLOCK(sc);
2430		return;
2431	}
2432
2433#ifdef DEVICE_POLLING
2434	if  (ifp->if_capenable & IFCAP_POLLING) {
2435		RL_UNLOCK(sc);
2436		return;
2437	}
2438#endif
2439
2440	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2441		rval = re_rxeof(sc, NULL);
2442
2443	/*
2444	 * Some chips will ignore a second TX request issued
2445	 * while an existing transmission is in progress. If
2446	 * the transmitter goes idle but there are still
2447	 * packets waiting to be sent, we need to restart the
2448	 * channel here to flush them out. This only seems to
2449	 * be required with the PCIe devices.
2450	 */
2451	if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2452	    (sc->rl_flags & RL_FLAG_PCIE))
2453		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2454	if (status & (
2455#ifdef RE_TX_MODERATION
2456	    RL_ISR_TIMEOUT_EXPIRED|
2457#else
2458	    RL_ISR_TX_OK|
2459#endif
2460	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2461		re_txeof(sc);
2462
2463	if (status & RL_ISR_SYSTEM_ERR) {
2464		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2465		re_init_locked(sc);
2466	}
2467
2468	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2469		re_start_locked(ifp);
2470
2471	RL_UNLOCK(sc);
2472
2473        if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2474		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2475		return;
2476	}
2477
2478	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2479}
2480
2481static int
2482re_encap(struct rl_softc *sc, struct mbuf **m_head)
2483{
2484	struct rl_txdesc	*txd, *txd_last;
2485	bus_dma_segment_t	segs[RL_NTXSEGS];
2486	bus_dmamap_t		map;
2487	struct mbuf		*m_new;
2488	struct rl_desc		*desc;
2489	int			nsegs, prod;
2490	int			i, error, ei, si;
2491	int			padlen;
2492	uint32_t		cmdstat, csum_flags, vlanctl;
2493
2494	RL_LOCK_ASSERT(sc);
2495	M_ASSERTPKTHDR((*m_head));
2496
2497	/*
2498	 * With some of the RealTek chips, using the checksum offload
2499	 * support in conjunction with the autopadding feature results
2500	 * in the transmission of corrupt frames. For example, if we
2501	 * need to send a really small IP fragment that's less than 60
2502	 * bytes in size, and IP header checksumming is enabled, the
2503	 * resulting ethernet frame that appears on the wire will
2504	 * have garbled payload. To work around this, if TX IP checksum
2505	 * offload is enabled, we always manually pad short frames out
2506	 * to the minimum ethernet frame size.
2507	 */
2508	if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2509	    (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2510	    ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2511		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2512		if (M_WRITABLE(*m_head) == 0) {
2513			/* Get a writable copy. */
2514			m_new = m_dup(*m_head, M_DONTWAIT);
2515			m_freem(*m_head);
2516			if (m_new == NULL) {
2517				*m_head = NULL;
2518				return (ENOBUFS);
2519			}
2520			*m_head = m_new;
2521		}
2522		if ((*m_head)->m_next != NULL ||
2523		    M_TRAILINGSPACE(*m_head) < padlen) {
2524			m_new = m_defrag(*m_head, M_DONTWAIT);
2525			if (m_new == NULL) {
2526				m_freem(*m_head);
2527				*m_head = NULL;
2528				return (ENOBUFS);
2529			}
2530		} else
2531			m_new = *m_head;
2532
2533		/*
2534		 * Manually pad short frames, and zero the pad space
2535		 * to avoid leaking data.
2536		 */
2537		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2538		m_new->m_pkthdr.len += padlen;
2539		m_new->m_len = m_new->m_pkthdr.len;
2540		*m_head = m_new;
2541	}
2542
2543	prod = sc->rl_ldata.rl_tx_prodidx;
2544	txd = &sc->rl_ldata.rl_tx_desc[prod];
2545	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2546	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2547	if (error == EFBIG) {
2548		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2549		if (m_new == NULL) {
2550			m_freem(*m_head);
2551			*m_head = NULL;
2552			return (ENOBUFS);
2553		}
2554		*m_head = m_new;
2555		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2556		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2557		if (error != 0) {
2558			m_freem(*m_head);
2559			*m_head = NULL;
2560			return (error);
2561		}
2562	} else if (error != 0)
2563		return (error);
2564	if (nsegs == 0) {
2565		m_freem(*m_head);
2566		*m_head = NULL;
2567		return (EIO);
2568	}
2569
2570	/* Check for number of available descriptors. */
2571	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2572		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2573		return (ENOBUFS);
2574	}
2575
2576	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2577	    BUS_DMASYNC_PREWRITE);
2578
2579	/*
2580	 * Set up checksum offload. Note: checksum offload bits must
2581	 * appear in all descriptors of a multi-descriptor transmit
2582	 * attempt. This is according to testing done with an 8169
2583	 * chip. This is a requirement.
2584	 */
2585	vlanctl = 0;
2586	csum_flags = 0;
2587	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2588		if ((sc->rl_flags & RL_FLAG_DESCV2) != 0) {
2589			csum_flags |= RL_TDESC_CMD_LGSEND;
2590			vlanctl |= ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2591			    RL_TDESC_CMD_MSSVALV2_SHIFT);
2592		} else {
2593			csum_flags |= RL_TDESC_CMD_LGSEND |
2594			    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2595			    RL_TDESC_CMD_MSSVAL_SHIFT);
2596		}
2597	} else {
2598		/*
2599		 * Unconditionally enable IP checksum if TCP or UDP
2600		 * checksum is required. Otherwise, TCP/UDP checksum
2601		 * does't make effects.
2602		 */
2603		if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2604			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2605				csum_flags |= RL_TDESC_CMD_IPCSUM;
2606				if (((*m_head)->m_pkthdr.csum_flags &
2607				    CSUM_TCP) != 0)
2608					csum_flags |= RL_TDESC_CMD_TCPCSUM;
2609				if (((*m_head)->m_pkthdr.csum_flags &
2610				    CSUM_UDP) != 0)
2611					csum_flags |= RL_TDESC_CMD_UDPCSUM;
2612			} else {
2613				vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2614				if (((*m_head)->m_pkthdr.csum_flags &
2615				    CSUM_TCP) != 0)
2616					vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2617				if (((*m_head)->m_pkthdr.csum_flags &
2618				    CSUM_UDP) != 0)
2619					vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2620			}
2621		}
2622	}
2623
2624	/*
2625	 * Set up hardware VLAN tagging. Note: vlan tag info must
2626	 * appear in all descriptors of a multi-descriptor
2627	 * transmission attempt.
2628	 */
2629	if ((*m_head)->m_flags & M_VLANTAG)
2630		vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2631		    RL_TDESC_VLANCTL_TAG;
2632
2633	si = prod;
2634	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2635		desc = &sc->rl_ldata.rl_tx_list[prod];
2636		desc->rl_vlanctl = htole32(vlanctl);
2637		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2638		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2639		cmdstat = segs[i].ds_len;
2640		if (i != 0)
2641			cmdstat |= RL_TDESC_CMD_OWN;
2642		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2643			cmdstat |= RL_TDESC_CMD_EOR;
2644		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2645		sc->rl_ldata.rl_tx_free--;
2646	}
2647	/* Update producer index. */
2648	sc->rl_ldata.rl_tx_prodidx = prod;
2649
2650	/* Set EOF on the last descriptor. */
2651	ei = RL_TX_DESC_PRV(sc, prod);
2652	desc = &sc->rl_ldata.rl_tx_list[ei];
2653	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2654
2655	desc = &sc->rl_ldata.rl_tx_list[si];
2656	/* Set SOF and transfer ownership of packet to the chip. */
2657	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2658
2659	/*
2660	 * Insure that the map for this transmission
2661	 * is placed at the array index of the last descriptor
2662	 * in this chain.  (Swap last and first dmamaps.)
2663	 */
2664	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2665	map = txd->tx_dmamap;
2666	txd->tx_dmamap = txd_last->tx_dmamap;
2667	txd_last->tx_dmamap = map;
2668	txd_last->tx_m = *m_head;
2669
2670	return (0);
2671}
2672
2673static void
2674re_start(struct ifnet *ifp)
2675{
2676	struct rl_softc		*sc;
2677
2678	sc = ifp->if_softc;
2679	RL_LOCK(sc);
2680	re_start_locked(ifp);
2681	RL_UNLOCK(sc);
2682}
2683
2684/*
2685 * Main transmit routine for C+ and gigE NICs.
2686 */
2687static void
2688re_start_locked(struct ifnet *ifp)
2689{
2690	struct rl_softc		*sc;
2691	struct mbuf		*m_head;
2692	int			queued;
2693
2694	sc = ifp->if_softc;
2695
2696	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2697	    IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0)
2698		return;
2699
2700	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2701	    sc->rl_ldata.rl_tx_free > 1;) {
2702		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2703		if (m_head == NULL)
2704			break;
2705
2706		if (re_encap(sc, &m_head) != 0) {
2707			if (m_head == NULL)
2708				break;
2709			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2710			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2711			break;
2712		}
2713
2714		/*
2715		 * If there's a BPF listener, bounce a copy of this frame
2716		 * to him.
2717		 */
2718		ETHER_BPF_MTAP(ifp, m_head);
2719
2720		queued++;
2721	}
2722
2723	if (queued == 0) {
2724#ifdef RE_TX_MODERATION
2725		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2726			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2727#endif
2728		return;
2729	}
2730
2731	/* Flush the TX descriptors */
2732
2733	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2734	    sc->rl_ldata.rl_tx_list_map,
2735	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2736
2737	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2738
2739#ifdef RE_TX_MODERATION
2740	/*
2741	 * Use the countdown timer for interrupt moderation.
2742	 * 'TX done' interrupts are disabled. Instead, we reset the
2743	 * countdown timer, which will begin counting until it hits
2744	 * the value in the TIMERINT register, and then trigger an
2745	 * interrupt. Each time we write to the TIMERCNT register,
2746	 * the timer count is reset to 0.
2747	 */
2748	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2749#endif
2750
2751	/*
2752	 * Set a timeout in case the chip goes out to lunch.
2753	 */
2754	sc->rl_watchdog_timer = 5;
2755}
2756
2757static void
2758re_set_jumbo(struct rl_softc *sc, int jumbo)
2759{
2760
2761	if (sc->rl_hwrev->rl_rev == RL_HWREV_8168E_VL) {
2762		pci_set_max_read_req(sc->rl_dev, 4096);
2763		return;
2764	}
2765
2766	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2767	if (jumbo != 0) {
2768		CSR_WRITE_1(sc, RL_CFG3, CSR_READ_1(sc, RL_CFG3) |
2769		    RL_CFG3_JUMBO_EN0);
2770		switch (sc->rl_hwrev->rl_rev) {
2771		case RL_HWREV_8168DP:
2772			break;
2773		case RL_HWREV_8168E:
2774			CSR_WRITE_1(sc, RL_CFG4, CSR_READ_1(sc, RL_CFG4) |
2775			    0x01);
2776			break;
2777		default:
2778			CSR_WRITE_1(sc, RL_CFG4, CSR_READ_1(sc, RL_CFG4) |
2779			    RL_CFG4_JUMBO_EN1);
2780		}
2781	} else {
2782		CSR_WRITE_1(sc, RL_CFG3, CSR_READ_1(sc, RL_CFG3) &
2783		    ~RL_CFG3_JUMBO_EN0);
2784		switch (sc->rl_hwrev->rl_rev) {
2785		case RL_HWREV_8168DP:
2786			break;
2787		case RL_HWREV_8168E:
2788			CSR_WRITE_1(sc, RL_CFG4, CSR_READ_1(sc, RL_CFG4) &
2789			    ~0x01);
2790			break;
2791		default:
2792			CSR_WRITE_1(sc, RL_CFG4, CSR_READ_1(sc, RL_CFG4) &
2793			    ~RL_CFG4_JUMBO_EN1);
2794		}
2795	}
2796	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2797
2798	switch (sc->rl_hwrev->rl_rev) {
2799	case RL_HWREV_8168DP:
2800		pci_set_max_read_req(sc->rl_dev, 4096);
2801		break;
2802	default:
2803		if (jumbo != 0)
2804			pci_set_max_read_req(sc->rl_dev, 512);
2805		else
2806			pci_set_max_read_req(sc->rl_dev, 4096);
2807	}
2808}
2809
2810static void
2811re_init(void *xsc)
2812{
2813	struct rl_softc		*sc = xsc;
2814
2815	RL_LOCK(sc);
2816	re_init_locked(sc);
2817	RL_UNLOCK(sc);
2818}
2819
2820static void
2821re_init_locked(struct rl_softc *sc)
2822{
2823	struct ifnet		*ifp = sc->rl_ifp;
2824	struct mii_data		*mii;
2825	uint32_t		reg;
2826	uint16_t		cfg;
2827	union {
2828		uint32_t align_dummy;
2829		u_char eaddr[ETHER_ADDR_LEN];
2830        } eaddr;
2831
2832	RL_LOCK_ASSERT(sc);
2833
2834	mii = device_get_softc(sc->rl_miibus);
2835
2836	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2837		return;
2838
2839	/*
2840	 * Cancel pending I/O and free all RX/TX buffers.
2841	 */
2842	re_stop(sc);
2843
2844	/* Put controller into known state. */
2845	re_reset(sc);
2846
2847	/*
2848	 * For C+ mode, initialize the RX descriptors and mbufs.
2849	 */
2850	if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
2851		if (ifp->if_mtu > RL_MTU) {
2852			if (re_jrx_list_init(sc) != 0) {
2853				device_printf(sc->rl_dev,
2854				    "no memory for jumbo RX buffers\n");
2855				re_stop(sc);
2856				return;
2857			}
2858			/* Disable checksum offloading for jumbo frames. */
2859			ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_TSO4);
2860			ifp->if_hwassist &= ~(RE_CSUM_FEATURES | CSUM_TSO);
2861		} else {
2862			if (re_rx_list_init(sc) != 0) {
2863				device_printf(sc->rl_dev,
2864				    "no memory for RX buffers\n");
2865				re_stop(sc);
2866				return;
2867			}
2868		}
2869		re_set_jumbo(sc, ifp->if_mtu > RL_MTU);
2870	} else {
2871		if (re_rx_list_init(sc) != 0) {
2872			device_printf(sc->rl_dev, "no memory for RX buffers\n");
2873			re_stop(sc);
2874			return;
2875		}
2876		if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
2877		    pci_get_device(sc->rl_dev) != RT_DEVICEID_8101E) {
2878			if (ifp->if_mtu > RL_MTU)
2879				pci_set_max_read_req(sc->rl_dev, 512);
2880			else
2881				pci_set_max_read_req(sc->rl_dev, 4096);
2882		}
2883	}
2884	re_tx_list_init(sc);
2885
2886	/*
2887	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2888	 * RX checksum offload. We must configure the C+ register
2889	 * before all others.
2890	 */
2891	cfg = RL_CPLUSCMD_PCI_MRW;
2892	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2893		cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2894	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2895		cfg |= RL_CPLUSCMD_VLANSTRIP;
2896	if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2897		cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2898		/* XXX magic. */
2899		cfg |= 0x0001;
2900	} else
2901		cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2902	CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2903	if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SC ||
2904	    sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE) {
2905		reg = 0x000fff00;
2906		if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2907			reg |= 0x000000ff;
2908		if (sc->rl_hwrev->rl_rev == RL_HWREV_8169_8110SCE)
2909			reg |= 0x00f00000;
2910		CSR_WRITE_4(sc, 0x7c, reg);
2911		/* Disable interrupt mitigation. */
2912		CSR_WRITE_2(sc, 0xe2, 0);
2913	}
2914	/*
2915	 * Disable TSO if interface MTU size is greater than MSS
2916	 * allowed in controller.
2917	 */
2918	if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2919		ifp->if_capenable &= ~IFCAP_TSO4;
2920		ifp->if_hwassist &= ~CSUM_TSO;
2921	}
2922
2923	/*
2924	 * Init our MAC address.  Even though the chipset
2925	 * documentation doesn't mention it, we need to enter "Config
2926	 * register write enable" mode to modify the ID registers.
2927	 */
2928	/* Copy MAC address on stack to align. */
2929	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2930	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2931	CSR_WRITE_4(sc, RL_IDR0,
2932	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2933	CSR_WRITE_4(sc, RL_IDR4,
2934	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2935	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2936
2937	/*
2938	 * Load the addresses of the RX and TX lists into the chip.
2939	 */
2940
2941	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2942	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2943	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2944	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2945
2946	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2947	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2948	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2949	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2950
2951	/*
2952	 * Enable transmit and receive.
2953	 */
2954	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2955
2956	/*
2957	 * Set the initial TX configuration.
2958	 */
2959	if (sc->rl_testmode) {
2960		if (sc->rl_type == RL_8169)
2961			CSR_WRITE_4(sc, RL_TXCFG,
2962			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2963		else
2964			CSR_WRITE_4(sc, RL_TXCFG,
2965			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2966	} else
2967		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2968
2969	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2970
2971	/*
2972	 * Set the initial RX configuration.
2973	 */
2974	re_set_rxmode(sc);
2975
2976	/* Configure interrupt moderation. */
2977	if (sc->rl_type == RL_8169) {
2978		/* Magic from vendor. */
2979		CSR_WRITE_2(sc, RL_INTRMOD, 0x5100);
2980	}
2981
2982#ifdef DEVICE_POLLING
2983	/*
2984	 * Disable interrupts if we are polling.
2985	 */
2986	if (ifp->if_capenable & IFCAP_POLLING)
2987		CSR_WRITE_2(sc, RL_IMR, 0);
2988	else	/* otherwise ... */
2989#endif
2990
2991	/*
2992	 * Enable interrupts.
2993	 */
2994	if (sc->rl_testmode)
2995		CSR_WRITE_2(sc, RL_IMR, 0);
2996	else
2997		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2998	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2999
3000	/* Set initial TX threshold */
3001	sc->rl_txthresh = RL_TX_THRESH_INIT;
3002
3003	/* Start RX/TX process. */
3004	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
3005#ifdef notdef
3006	/* Enable receiver and transmitter. */
3007	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
3008#endif
3009
3010#ifdef RE_TX_MODERATION
3011	/*
3012	 * Initialize the timer interrupt register so that
3013	 * a timer interrupt will be generated once the timer
3014	 * reaches a certain number of ticks. The timer is
3015	 * reloaded on each transmit. This gives us TX interrupt
3016	 * moderation, which dramatically improves TX frame rate.
3017	 */
3018	if (sc->rl_type == RL_8169)
3019		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
3020	else
3021		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
3022#endif
3023
3024	/*
3025	 * For 8169 gigE NICs, set the max allowed RX packet
3026	 * size so we can receive jumbo frames.
3027	 */
3028	if (sc->rl_type == RL_8169) {
3029		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0) {
3030			/*
3031			 * For controllers that use new jumbo frame scheme,
3032			 * set maximum size of jumbo frame depedning on
3033			 * controller revisions.
3034			 */
3035			if (ifp->if_mtu > RL_MTU)
3036				CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
3037				    sc->rl_hwrev->rl_max_mtu +
3038				    ETHER_VLAN_ENCAP_LEN + ETHER_HDR_LEN +
3039				    ETHER_CRC_LEN);
3040			else
3041				CSR_WRITE_2(sc, RL_MAXRXPKTLEN,
3042				    RE_RX_DESC_BUFLEN);
3043		} else if ((sc->rl_flags & RL_FLAG_PCIE) != 0 &&
3044		    sc->rl_hwrev->rl_max_mtu == RL_MTU) {
3045			/* RTL810x has no jumbo frame support. */
3046			CSR_WRITE_2(sc, RL_MAXRXPKTLEN, RE_RX_DESC_BUFLEN);
3047		} else
3048			CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
3049	}
3050
3051	if (sc->rl_testmode)
3052		return;
3053
3054	mii_mediachg(mii);
3055
3056	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
3057
3058	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3059	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3060
3061	sc->rl_flags &= ~RL_FLAG_LINK;
3062	sc->rl_watchdog_timer = 0;
3063	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
3064}
3065
3066/*
3067 * Set media options.
3068 */
3069static int
3070re_ifmedia_upd(struct ifnet *ifp)
3071{
3072	struct rl_softc		*sc;
3073	struct mii_data		*mii;
3074	int			error;
3075
3076	sc = ifp->if_softc;
3077	mii = device_get_softc(sc->rl_miibus);
3078	RL_LOCK(sc);
3079	error = mii_mediachg(mii);
3080	RL_UNLOCK(sc);
3081
3082	return (error);
3083}
3084
3085/*
3086 * Report current media status.
3087 */
3088static void
3089re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3090{
3091	struct rl_softc		*sc;
3092	struct mii_data		*mii;
3093
3094	sc = ifp->if_softc;
3095	mii = device_get_softc(sc->rl_miibus);
3096
3097	RL_LOCK(sc);
3098	mii_pollstat(mii);
3099	RL_UNLOCK(sc);
3100	ifmr->ifm_active = mii->mii_media_active;
3101	ifmr->ifm_status = mii->mii_media_status;
3102}
3103
3104static int
3105re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3106{
3107	struct rl_softc		*sc = ifp->if_softc;
3108	struct ifreq		*ifr = (struct ifreq *) data;
3109	struct mii_data		*mii;
3110	int			error = 0;
3111
3112	switch (command) {
3113	case SIOCSIFMTU:
3114		if (ifr->ifr_mtu < ETHERMIN ||
3115		    ifr->ifr_mtu > sc->rl_hwrev->rl_max_mtu) {
3116			error = EINVAL;
3117			break;
3118		}
3119		RL_LOCK(sc);
3120		if (ifp->if_mtu != ifr->ifr_mtu) {
3121			ifp->if_mtu = ifr->ifr_mtu;
3122			if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
3123			    (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
3124				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3125				re_init_locked(sc);
3126			}
3127			if (ifp->if_mtu > RL_TSO_MTU &&
3128			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
3129				ifp->if_capenable &= ~(IFCAP_TSO4 |
3130				    IFCAP_VLAN_HWTSO);
3131				ifp->if_hwassist &= ~CSUM_TSO;
3132			}
3133			VLAN_CAPABILITIES(ifp);
3134		}
3135		RL_UNLOCK(sc);
3136		break;
3137	case SIOCSIFFLAGS:
3138		RL_LOCK(sc);
3139		if ((ifp->if_flags & IFF_UP) != 0) {
3140			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
3141				if (((ifp->if_flags ^ sc->rl_if_flags)
3142				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
3143					re_set_rxmode(sc);
3144			} else
3145				re_init_locked(sc);
3146		} else {
3147			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
3148				re_stop(sc);
3149		}
3150		sc->rl_if_flags = ifp->if_flags;
3151		RL_UNLOCK(sc);
3152		break;
3153	case SIOCADDMULTI:
3154	case SIOCDELMULTI:
3155		RL_LOCK(sc);
3156		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
3157			re_set_rxmode(sc);
3158		RL_UNLOCK(sc);
3159		break;
3160	case SIOCGIFMEDIA:
3161	case SIOCSIFMEDIA:
3162		mii = device_get_softc(sc->rl_miibus);
3163		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
3164		break;
3165	case SIOCSIFCAP:
3166	    {
3167		int mask, reinit;
3168
3169		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3170		reinit = 0;
3171#ifdef DEVICE_POLLING
3172		if (mask & IFCAP_POLLING) {
3173			if (ifr->ifr_reqcap & IFCAP_POLLING) {
3174				error = ether_poll_register(re_poll, ifp);
3175				if (error)
3176					return (error);
3177				RL_LOCK(sc);
3178				/* Disable interrupts */
3179				CSR_WRITE_2(sc, RL_IMR, 0x0000);
3180				ifp->if_capenable |= IFCAP_POLLING;
3181				RL_UNLOCK(sc);
3182			} else {
3183				error = ether_poll_deregister(ifp);
3184				/* Enable interrupts. */
3185				RL_LOCK(sc);
3186				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
3187				ifp->if_capenable &= ~IFCAP_POLLING;
3188				RL_UNLOCK(sc);
3189			}
3190		}
3191#endif /* DEVICE_POLLING */
3192		if ((mask & IFCAP_TXCSUM) != 0 &&
3193		    (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
3194			ifp->if_capenable ^= IFCAP_TXCSUM;
3195			if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
3196				ifp->if_hwassist |= RE_CSUM_FEATURES;
3197			else
3198				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
3199			reinit = 1;
3200		}
3201		if ((mask & IFCAP_RXCSUM) != 0 &&
3202		    (ifp->if_capabilities & IFCAP_RXCSUM) != 0) {
3203			ifp->if_capenable ^= IFCAP_RXCSUM;
3204			reinit = 1;
3205		}
3206		if ((mask & IFCAP_TSO4) != 0 &&
3207		    (ifp->if_capabilities & IFCAP_TSO) != 0) {
3208			ifp->if_capenable ^= IFCAP_TSO4;
3209			if ((IFCAP_TSO4 & ifp->if_capenable) != 0)
3210				ifp->if_hwassist |= CSUM_TSO;
3211			else
3212				ifp->if_hwassist &= ~CSUM_TSO;
3213			if (ifp->if_mtu > RL_TSO_MTU &&
3214			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
3215				ifp->if_capenable &= ~IFCAP_TSO4;
3216				ifp->if_hwassist &= ~CSUM_TSO;
3217			}
3218		}
3219		if ((mask & IFCAP_VLAN_HWTSO) != 0 &&
3220		    (ifp->if_capabilities & IFCAP_VLAN_HWTSO) != 0)
3221			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3222		if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
3223		    (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0) {
3224			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3225			/* TSO over VLAN requires VLAN hardware tagging. */
3226			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0)
3227				ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
3228			reinit = 1;
3229		}
3230		if ((sc->rl_flags & RL_FLAG_JUMBOV2) != 0 &&
3231		    (mask & (IFCAP_HWCSUM | IFCAP_TSO4 |
3232		    IFCAP_VLAN_HWTSO)) != 0)
3233				reinit = 1;
3234		if ((mask & IFCAP_WOL) != 0 &&
3235		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
3236			if ((mask & IFCAP_WOL_UCAST) != 0)
3237				ifp->if_capenable ^= IFCAP_WOL_UCAST;
3238			if ((mask & IFCAP_WOL_MCAST) != 0)
3239				ifp->if_capenable ^= IFCAP_WOL_MCAST;
3240			if ((mask & IFCAP_WOL_MAGIC) != 0)
3241				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
3242		}
3243		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING) {
3244			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3245			re_init(sc);
3246		}
3247		VLAN_CAPABILITIES(ifp);
3248	    }
3249		break;
3250	default:
3251		error = ether_ioctl(ifp, command, data);
3252		break;
3253	}
3254
3255	return (error);
3256}
3257
3258static void
3259re_watchdog(struct rl_softc *sc)
3260{
3261	struct ifnet		*ifp;
3262
3263	RL_LOCK_ASSERT(sc);
3264
3265	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
3266		return;
3267
3268	ifp = sc->rl_ifp;
3269	re_txeof(sc);
3270	if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
3271		if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
3272		    "-- recovering\n");
3273		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3274			re_start_locked(ifp);
3275		return;
3276	}
3277
3278	if_printf(ifp, "watchdog timeout\n");
3279	ifp->if_oerrors++;
3280
3281	re_rxeof(sc, NULL);
3282	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3283	re_init_locked(sc);
3284	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3285		re_start_locked(ifp);
3286}
3287
3288/*
3289 * Stop the adapter and free any mbufs allocated to the
3290 * RX and TX lists.
3291 */
3292static void
3293re_stop(struct rl_softc *sc)
3294{
3295	int			i;
3296	struct ifnet		*ifp;
3297	struct rl_txdesc	*txd;
3298	struct rl_rxdesc	*rxd;
3299
3300	RL_LOCK_ASSERT(sc);
3301
3302	ifp = sc->rl_ifp;
3303
3304	sc->rl_watchdog_timer = 0;
3305	callout_stop(&sc->rl_stat_callout);
3306	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
3307
3308	if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
3309		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
3310		    RL_CMD_RX_ENB);
3311	else
3312		CSR_WRITE_1(sc, RL_COMMAND, 0x00);
3313	DELAY(1000);
3314	CSR_WRITE_2(sc, RL_IMR, 0x0000);
3315	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
3316
3317	if (sc->rl_head != NULL) {
3318		m_freem(sc->rl_head);
3319		sc->rl_head = sc->rl_tail = NULL;
3320	}
3321
3322	/* Free the TX list buffers. */
3323
3324	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
3325		txd = &sc->rl_ldata.rl_tx_desc[i];
3326		if (txd->tx_m != NULL) {
3327			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
3328			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
3329			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
3330			    txd->tx_dmamap);
3331			m_freem(txd->tx_m);
3332			txd->tx_m = NULL;
3333		}
3334	}
3335
3336	/* Free the RX list buffers. */
3337
3338	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
3339		rxd = &sc->rl_ldata.rl_rx_desc[i];
3340		if (rxd->rx_m != NULL) {
3341			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
3342			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
3343			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
3344			    rxd->rx_dmamap);
3345			m_freem(rxd->rx_m);
3346			rxd->rx_m = NULL;
3347		}
3348	}
3349}
3350
3351/*
3352 * Device suspend routine.  Stop the interface and save some PCI
3353 * settings in case the BIOS doesn't restore them properly on
3354 * resume.
3355 */
3356static int
3357re_suspend(device_t dev)
3358{
3359	struct rl_softc		*sc;
3360
3361	sc = device_get_softc(dev);
3362
3363	RL_LOCK(sc);
3364	re_stop(sc);
3365	re_setwol(sc);
3366	sc->suspended = 1;
3367	RL_UNLOCK(sc);
3368
3369	return (0);
3370}
3371
3372/*
3373 * Device resume routine.  Restore some PCI settings in case the BIOS
3374 * doesn't, re-enable busmastering, and restart the interface if
3375 * appropriate.
3376 */
3377static int
3378re_resume(device_t dev)
3379{
3380	struct rl_softc		*sc;
3381	struct ifnet		*ifp;
3382
3383	sc = device_get_softc(dev);
3384
3385	RL_LOCK(sc);
3386
3387	ifp = sc->rl_ifp;
3388	/* Take controller out of sleep mode. */
3389	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3390		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3391			CSR_WRITE_1(sc, RL_GPIO,
3392			    CSR_READ_1(sc, RL_GPIO) | 0x01);
3393	}
3394
3395	/*
3396	 * Clear WOL matching such that normal Rx filtering
3397	 * wouldn't interfere with WOL patterns.
3398	 */
3399	re_clrwol(sc);
3400
3401	/* reinitialize interface if necessary */
3402	if (ifp->if_flags & IFF_UP)
3403		re_init_locked(sc);
3404
3405	sc->suspended = 0;
3406	RL_UNLOCK(sc);
3407
3408	return (0);
3409}
3410
3411/*
3412 * Stop all chip I/O so that the kernel's probe routines don't
3413 * get confused by errant DMAs when rebooting.
3414 */
3415static int
3416re_shutdown(device_t dev)
3417{
3418	struct rl_softc		*sc;
3419
3420	sc = device_get_softc(dev);
3421
3422	RL_LOCK(sc);
3423	re_stop(sc);
3424	/*
3425	 * Mark interface as down since otherwise we will panic if
3426	 * interrupt comes in later on, which can happen in some
3427	 * cases.
3428	 */
3429	sc->rl_ifp->if_flags &= ~IFF_UP;
3430	re_setwol(sc);
3431	RL_UNLOCK(sc);
3432
3433	return (0);
3434}
3435
3436static void
3437re_setwol(struct rl_softc *sc)
3438{
3439	struct ifnet		*ifp;
3440	int			pmc;
3441	uint16_t		pmstat;
3442	uint8_t			v;
3443
3444	RL_LOCK_ASSERT(sc);
3445
3446	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3447		return;
3448
3449	ifp = sc->rl_ifp;
3450	/* Put controller into sleep mode. */
3451	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3452		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3453			CSR_WRITE_1(sc, RL_GPIO,
3454			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
3455	}
3456	if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3457	    (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3458		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3459	/* Enable config register write. */
3460	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3461
3462	/* Enable PME. */
3463	v = CSR_READ_1(sc, RL_CFG1);
3464	v &= ~RL_CFG1_PME;
3465	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3466		v |= RL_CFG1_PME;
3467	CSR_WRITE_1(sc, RL_CFG1, v);
3468
3469	v = CSR_READ_1(sc, RL_CFG3);
3470	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3471	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3472		v |= RL_CFG3_WOL_MAGIC;
3473	CSR_WRITE_1(sc, RL_CFG3, v);
3474
3475	/* Config register write done. */
3476	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3477
3478	v = CSR_READ_1(sc, RL_CFG5);
3479	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3480	v &= ~RL_CFG5_WOL_LANWAKE;
3481	if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3482		v |= RL_CFG5_WOL_UCAST;
3483	if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3484		v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3485	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3486		v |= RL_CFG5_WOL_LANWAKE;
3487	CSR_WRITE_1(sc, RL_CFG5, v);
3488
3489	if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3490	    (sc->rl_flags & RL_FLAG_PHYWAKE_PM) != 0)
3491		CSR_WRITE_1(sc, RL_PMCH, CSR_READ_1(sc, RL_PMCH) & ~0x80);
3492	/*
3493	 * It seems that hardware resets its link speed to 100Mbps in
3494	 * power down mode so switching to 100Mbps in driver is not
3495	 * needed.
3496	 */
3497
3498	/* Request PME if WOL is requested. */
3499	pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3500	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3501	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3502		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3503	pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3504}
3505
3506static void
3507re_clrwol(struct rl_softc *sc)
3508{
3509	int			pmc;
3510	uint8_t			v;
3511
3512	RL_LOCK_ASSERT(sc);
3513
3514	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3515		return;
3516
3517	/* Enable config register write. */
3518	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3519
3520	v = CSR_READ_1(sc, RL_CFG3);
3521	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3522	CSR_WRITE_1(sc, RL_CFG3, v);
3523
3524	/* Config register write done. */
3525	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3526
3527	v = CSR_READ_1(sc, RL_CFG5);
3528	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3529	v &= ~RL_CFG5_WOL_LANWAKE;
3530	CSR_WRITE_1(sc, RL_CFG5, v);
3531}
3532
3533static void
3534re_add_sysctls(struct rl_softc *sc)
3535{
3536	struct sysctl_ctx_list	*ctx;
3537	struct sysctl_oid_list	*children;
3538
3539	ctx = device_get_sysctl_ctx(sc->rl_dev);
3540	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->rl_dev));
3541
3542	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "stats",
3543	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, re_sysctl_stats, "I",
3544	    "Statistics Information");
3545}
3546
3547static int
3548re_sysctl_stats(SYSCTL_HANDLER_ARGS)
3549{
3550	struct rl_softc		*sc;
3551	struct rl_stats		*stats;
3552	int			error, i, result;
3553
3554	result = -1;
3555	error = sysctl_handle_int(oidp, &result, 0, req);
3556	if (error || req->newptr == NULL)
3557		return (error);
3558
3559	if (result == 1) {
3560		sc = (struct rl_softc *)arg1;
3561		RL_LOCK(sc);
3562		if ((sc->rl_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
3563			RL_UNLOCK(sc);
3564			goto done;
3565		}
3566		bus_dmamap_sync(sc->rl_ldata.rl_stag,
3567		    sc->rl_ldata.rl_smap, BUS_DMASYNC_PREREAD);
3568		CSR_WRITE_4(sc, RL_DUMPSTATS_HI,
3569		    RL_ADDR_HI(sc->rl_ldata.rl_stats_addr));
3570		CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
3571		    RL_ADDR_LO(sc->rl_ldata.rl_stats_addr));
3572		CSR_WRITE_4(sc, RL_DUMPSTATS_LO,
3573		    RL_ADDR_LO(sc->rl_ldata.rl_stats_addr |
3574		    RL_DUMPSTATS_START));
3575		for (i = RL_TIMEOUT; i > 0; i--) {
3576			if ((CSR_READ_4(sc, RL_DUMPSTATS_LO) &
3577			    RL_DUMPSTATS_START) == 0)
3578				break;
3579			DELAY(1000);
3580		}
3581		bus_dmamap_sync(sc->rl_ldata.rl_stag,
3582		    sc->rl_ldata.rl_smap, BUS_DMASYNC_POSTREAD);
3583		RL_UNLOCK(sc);
3584		if (i == 0) {
3585			device_printf(sc->rl_dev,
3586			    "DUMP statistics request timedout\n");
3587			return (ETIMEDOUT);
3588		}
3589done:
3590		stats = sc->rl_ldata.rl_stats;
3591		printf("%s statistics:\n", device_get_nameunit(sc->rl_dev));
3592		printf("Tx frames : %ju\n",
3593		    (uintmax_t)le64toh(stats->rl_tx_pkts));
3594		printf("Rx frames : %ju\n",
3595		    (uintmax_t)le64toh(stats->rl_rx_pkts));
3596		printf("Tx errors : %ju\n",
3597		    (uintmax_t)le64toh(stats->rl_tx_errs));
3598		printf("Rx errors : %u\n",
3599		    le32toh(stats->rl_rx_errs));
3600		printf("Rx missed frames : %u\n",
3601		    (uint32_t)le16toh(stats->rl_missed_pkts));
3602		printf("Rx frame alignment errs : %u\n",
3603		    (uint32_t)le16toh(stats->rl_rx_framealign_errs));
3604		printf("Tx single collisions : %u\n",
3605		    le32toh(stats->rl_tx_onecoll));
3606		printf("Tx multiple collisions : %u\n",
3607		    le32toh(stats->rl_tx_multicolls));
3608		printf("Rx unicast frames : %ju\n",
3609		    (uintmax_t)le64toh(stats->rl_rx_ucasts));
3610		printf("Rx broadcast frames : %ju\n",
3611		    (uintmax_t)le64toh(stats->rl_rx_bcasts));
3612		printf("Rx multicast frames : %u\n",
3613		    le32toh(stats->rl_rx_mcasts));
3614		printf("Tx aborts : %u\n",
3615		    (uint32_t)le16toh(stats->rl_tx_aborts));
3616		printf("Tx underruns : %u\n",
3617		    (uint32_t)le16toh(stats->rl_rx_underruns));
3618	}
3619
3620	return (error);
3621}
3622