if_re.c revision 195675
1/*-
2 * Copyright (c) 1997, 1998-2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/dev/re/if_re.c 195675 2009-07-14 04:35:13Z avatar $");
35
36/*
37 * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38 *
39 * Written by Bill Paul <wpaul@windriver.com>
40 * Senior Networking Software Engineer
41 * Wind River Systems
42 */
43
44/*
45 * This driver is designed to support RealTek's next generation of
46 * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47 * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48 * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49 *
50 * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51 * with the older 8139 family, however it also supports a special
52 * C+ mode of operation that provides several new performance enhancing
53 * features. These include:
54 *
55 *	o Descriptor based DMA mechanism. Each descriptor represents
56 *	  a single packet fragment. Data buffers may be aligned on
57 *	  any byte boundary.
58 *
59 *	o 64-bit DMA
60 *
61 *	o TCP/IP checksum offload for both RX and TX
62 *
63 *	o High and normal priority transmit DMA rings
64 *
65 *	o VLAN tag insertion and extraction
66 *
67 *	o TCP large send (segmentation offload)
68 *
69 * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70 * programming API is fairly straightforward. The RX filtering, EEPROM
71 * access and PHY access is the same as it is on the older 8139 series
72 * chips.
73 *
74 * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75 * same programming API and feature set as the 8139C+ with the following
76 * differences and additions:
77 *
78 *	o 1000Mbps mode
79 *
80 *	o Jumbo frames
81 *
82 *	o GMII and TBI ports/registers for interfacing with copper
83 *	  or fiber PHYs
84 *
85 *	o RX and TX DMA rings can have up to 1024 descriptors
86 *	  (the 8139C+ allows a maximum of 64)
87 *
88 *	o Slight differences in register layout from the 8139C+
89 *
90 * The TX start and timer interrupt registers are at different locations
91 * on the 8169 than they are on the 8139C+. Also, the status word in the
92 * RX descriptor has a slightly different bit layout. The 8169 does not
93 * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94 * copper gigE PHY.
95 *
96 * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97 * (the 'S' stands for 'single-chip'). These devices have the same
98 * programming API as the older 8169, but also have some vendor-specific
99 * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100 * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101 *
102 * This driver takes advantage of the RX and TX checksum offload and
103 * VLAN tag insertion/extraction features. It also implements TX
104 * interrupt moderation using the timer interrupt registers, which
105 * significantly reduces TX interrupt load. There is also support
106 * for jumbo frames, however the 8169/8169S/8110S can not transmit
107 * jumbo frames larger than 7440, so the max MTU possible with this
108 * driver is 7422 bytes.
109 */
110
111#ifdef HAVE_KERNEL_OPTION_HEADERS
112#include "opt_device_polling.h"
113#endif
114
115#include <sys/param.h>
116#include <sys/endian.h>
117#include <sys/systm.h>
118#include <sys/sockio.h>
119#include <sys/mbuf.h>
120#include <sys/malloc.h>
121#include <sys/module.h>
122#include <sys/kernel.h>
123#include <sys/socket.h>
124#include <sys/lock.h>
125#include <sys/mutex.h>
126#include <sys/taskqueue.h>
127
128#include <net/if.h>
129#include <net/if_arp.h>
130#include <net/ethernet.h>
131#include <net/if_dl.h>
132#include <net/if_media.h>
133#include <net/if_types.h>
134#include <net/if_vlan_var.h>
135
136#include <net/bpf.h>
137
138#include <machine/bus.h>
139#include <machine/resource.h>
140#include <sys/bus.h>
141#include <sys/rman.h>
142
143#include <dev/mii/mii.h>
144#include <dev/mii/miivar.h>
145
146#include <dev/pci/pcireg.h>
147#include <dev/pci/pcivar.h>
148
149#include <pci/if_rlreg.h>
150
151MODULE_DEPEND(re, pci, 1, 1, 1);
152MODULE_DEPEND(re, ether, 1, 1, 1);
153MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155/* "device miibus" required.  See GENERIC if you get errors here. */
156#include "miibus_if.h"
157
158/* Tunables. */
159static int msi_disable = 0;
160TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161static int prefer_iomap = 0;
162TUNABLE_INT("hw.re.prefer_iomap", &prefer_iomap);
163
164#define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
165
166/*
167 * Various supported device vendors/types and their names.
168 */
169static struct rl_type re_devs[] = {
170	{ DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
171	    "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
172	{ RT_VENDORID, RT_DEVICEID_8139, 0,
173	    "RealTek 8139C+ 10/100BaseTX" },
174	{ RT_VENDORID, RT_DEVICEID_8101E, 0,
175	    "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
176	{ RT_VENDORID, RT_DEVICEID_8168, 0,
177	    "RealTek 8168/8168B/8168C/8168CP/8168D/8111B/8111C/8111CP PCIe "
178	    "Gigabit Ethernet" },
179	{ RT_VENDORID, RT_DEVICEID_8169, 0,
180	    "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
181	{ RT_VENDORID, RT_DEVICEID_8169SC, 0,
182	    "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
183	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
184	    "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
185	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
186	    "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
187	{ USR_VENDORID, USR_DEVICEID_997902, 0,
188	    "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
189};
190
191static struct rl_hwrev re_hwrevs[] = {
192	{ RL_HWREV_8139, RL_8139,  "" },
193	{ RL_HWREV_8139A, RL_8139, "A" },
194	{ RL_HWREV_8139AG, RL_8139, "A-G" },
195	{ RL_HWREV_8139B, RL_8139, "B" },
196	{ RL_HWREV_8130, RL_8139, "8130" },
197	{ RL_HWREV_8139C, RL_8139, "C" },
198	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
199	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
200	{ RL_HWREV_8168_SPIN1, RL_8169, "8168"},
201	{ RL_HWREV_8169, RL_8169, "8169"},
202	{ RL_HWREV_8169S, RL_8169, "8169S"},
203	{ RL_HWREV_8110S, RL_8169, "8110S"},
204	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB/8110SB"},
205	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC/8110SC"},
206	{ RL_HWREV_8169_8110SBL, RL_8169, "8169SBL/8110SBL"},
207	{ RL_HWREV_8169_8110SCE, RL_8169, "8169SC/8110SC"},
208	{ RL_HWREV_8100, RL_8139, "8100"},
209	{ RL_HWREV_8101, RL_8139, "8101"},
210	{ RL_HWREV_8100E, RL_8169, "8100E"},
211	{ RL_HWREV_8101E, RL_8169, "8101E"},
212	{ RL_HWREV_8102E, RL_8169, "8102E"},
213	{ RL_HWREV_8102EL, RL_8169, "8102EL"},
214	{ RL_HWREV_8102EL_SPIN1, RL_8169, "8102EL"},
215	{ RL_HWREV_8168_SPIN2, RL_8169, "8168"},
216	{ RL_HWREV_8168_SPIN3, RL_8169, "8168"},
217	{ RL_HWREV_8168C, RL_8169, "8168C/8111C"},
218	{ RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
219	{ RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
220	{ RL_HWREV_8168D, RL_8169, "8168D"},
221	{ 0, 0, NULL }
222};
223
224static int re_probe		(device_t);
225static int re_attach		(device_t);
226static int re_detach		(device_t);
227
228static int re_encap		(struct rl_softc *, struct mbuf **);
229
230static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
231static int re_allocmem		(device_t, struct rl_softc *);
232static __inline void re_discard_rxbuf
233				(struct rl_softc *, int);
234static int re_newbuf		(struct rl_softc *, int);
235static int re_rx_list_init	(struct rl_softc *);
236static int re_tx_list_init	(struct rl_softc *);
237#ifdef RE_FIXUP_RX
238static __inline void re_fixup_rx
239				(struct mbuf *);
240#endif
241static int re_rxeof		(struct rl_softc *, int *);
242static void re_txeof		(struct rl_softc *);
243#ifdef DEVICE_POLLING
244static int re_poll		(struct ifnet *, enum poll_cmd, int);
245static int re_poll_locked	(struct ifnet *, enum poll_cmd, int);
246#endif
247static int re_intr		(void *);
248static void re_tick		(void *);
249static void re_tx_task		(void *, int);
250static void re_int_task		(void *, int);
251static void re_start		(struct ifnet *);
252static int re_ioctl		(struct ifnet *, u_long, caddr_t);
253static void re_init		(void *);
254static void re_init_locked	(struct rl_softc *);
255static void re_stop		(struct rl_softc *);
256static void re_watchdog		(struct rl_softc *);
257static int re_suspend		(device_t);
258static int re_resume		(device_t);
259static int re_shutdown		(device_t);
260static int re_ifmedia_upd	(struct ifnet *);
261static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
262
263static void re_eeprom_putbyte	(struct rl_softc *, int);
264static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
265static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
266static int re_gmii_readreg	(device_t, int, int);
267static int re_gmii_writereg	(device_t, int, int, int);
268
269static int re_miibus_readreg	(device_t, int, int);
270static int re_miibus_writereg	(device_t, int, int, int);
271static void re_miibus_statchg	(device_t);
272
273static void re_set_rxmode		(struct rl_softc *);
274static void re_reset		(struct rl_softc *);
275static void re_setwol		(struct rl_softc *);
276static void re_clrwol		(struct rl_softc *);
277
278#ifdef RE_DIAG
279static int re_diag		(struct rl_softc *);
280#endif
281
282static device_method_t re_methods[] = {
283	/* Device interface */
284	DEVMETHOD(device_probe,		re_probe),
285	DEVMETHOD(device_attach,	re_attach),
286	DEVMETHOD(device_detach,	re_detach),
287	DEVMETHOD(device_suspend,	re_suspend),
288	DEVMETHOD(device_resume,	re_resume),
289	DEVMETHOD(device_shutdown,	re_shutdown),
290
291	/* bus interface */
292	DEVMETHOD(bus_print_child,	bus_generic_print_child),
293	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
294
295	/* MII interface */
296	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
297	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
298	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
299
300	{ 0, 0 }
301};
302
303static driver_t re_driver = {
304	"re",
305	re_methods,
306	sizeof(struct rl_softc)
307};
308
309static devclass_t re_devclass;
310
311DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
312DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
313
314#define EE_SET(x)					\
315	CSR_WRITE_1(sc, RL_EECMD,			\
316		CSR_READ_1(sc, RL_EECMD) | x)
317
318#define EE_CLR(x)					\
319	CSR_WRITE_1(sc, RL_EECMD,			\
320		CSR_READ_1(sc, RL_EECMD) & ~x)
321
322/*
323 * Send a read command and address to the EEPROM, check for ACK.
324 */
325static void
326re_eeprom_putbyte(struct rl_softc *sc, int addr)
327{
328	int			d, i;
329
330	d = addr | (RL_9346_READ << sc->rl_eewidth);
331
332	/*
333	 * Feed in each bit and strobe the clock.
334	 */
335
336	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
337		if (d & i) {
338			EE_SET(RL_EE_DATAIN);
339		} else {
340			EE_CLR(RL_EE_DATAIN);
341		}
342		DELAY(100);
343		EE_SET(RL_EE_CLK);
344		DELAY(150);
345		EE_CLR(RL_EE_CLK);
346		DELAY(100);
347	}
348}
349
350/*
351 * Read a word of data stored in the EEPROM at address 'addr.'
352 */
353static void
354re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
355{
356	int			i;
357	u_int16_t		word = 0;
358
359	/*
360	 * Send address of word we want to read.
361	 */
362	re_eeprom_putbyte(sc, addr);
363
364	/*
365	 * Start reading bits from EEPROM.
366	 */
367	for (i = 0x8000; i; i >>= 1) {
368		EE_SET(RL_EE_CLK);
369		DELAY(100);
370		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
371			word |= i;
372		EE_CLR(RL_EE_CLK);
373		DELAY(100);
374	}
375
376	*dest = word;
377}
378
379/*
380 * Read a sequence of words from the EEPROM.
381 */
382static void
383re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
384{
385	int			i;
386	u_int16_t		word = 0, *ptr;
387
388	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
389
390        DELAY(100);
391
392	for (i = 0; i < cnt; i++) {
393		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
394		re_eeprom_getword(sc, off + i, &word);
395		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
396		ptr = (u_int16_t *)(dest + (i * 2));
397                *ptr = word;
398	}
399
400	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
401}
402
403static int
404re_gmii_readreg(device_t dev, int phy, int reg)
405{
406	struct rl_softc		*sc;
407	u_int32_t		rval;
408	int			i;
409
410	if (phy != 1)
411		return (0);
412
413	sc = device_get_softc(dev);
414
415	/* Let the rgephy driver read the GMEDIASTAT register */
416
417	if (reg == RL_GMEDIASTAT) {
418		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
419		return (rval);
420	}
421
422	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
423	DELAY(1000);
424
425	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
426		rval = CSR_READ_4(sc, RL_PHYAR);
427		if (rval & RL_PHYAR_BUSY)
428			break;
429		DELAY(100);
430	}
431
432	if (i == RL_PHY_TIMEOUT) {
433		device_printf(sc->rl_dev, "PHY read failed\n");
434		return (0);
435	}
436
437	return (rval & RL_PHYAR_PHYDATA);
438}
439
440static int
441re_gmii_writereg(device_t dev, int phy, int reg, int data)
442{
443	struct rl_softc		*sc;
444	u_int32_t		rval;
445	int			i;
446
447	sc = device_get_softc(dev);
448
449	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
450	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
451	DELAY(1000);
452
453	for (i = 0; i < RL_PHY_TIMEOUT; i++) {
454		rval = CSR_READ_4(sc, RL_PHYAR);
455		if (!(rval & RL_PHYAR_BUSY))
456			break;
457		DELAY(100);
458	}
459
460	if (i == RL_PHY_TIMEOUT) {
461		device_printf(sc->rl_dev, "PHY write failed\n");
462		return (0);
463	}
464
465	return (0);
466}
467
468static int
469re_miibus_readreg(device_t dev, int phy, int reg)
470{
471	struct rl_softc		*sc;
472	u_int16_t		rval = 0;
473	u_int16_t		re8139_reg = 0;
474
475	sc = device_get_softc(dev);
476
477	if (sc->rl_type == RL_8169) {
478		rval = re_gmii_readreg(dev, phy, reg);
479		return (rval);
480	}
481
482	/* Pretend the internal PHY is only at address 0 */
483	if (phy) {
484		return (0);
485	}
486	switch (reg) {
487	case MII_BMCR:
488		re8139_reg = RL_BMCR;
489		break;
490	case MII_BMSR:
491		re8139_reg = RL_BMSR;
492		break;
493	case MII_ANAR:
494		re8139_reg = RL_ANAR;
495		break;
496	case MII_ANER:
497		re8139_reg = RL_ANER;
498		break;
499	case MII_ANLPAR:
500		re8139_reg = RL_LPAR;
501		break;
502	case MII_PHYIDR1:
503	case MII_PHYIDR2:
504		return (0);
505	/*
506	 * Allow the rlphy driver to read the media status
507	 * register. If we have a link partner which does not
508	 * support NWAY, this is the register which will tell
509	 * us the results of parallel detection.
510	 */
511	case RL_MEDIASTAT:
512		rval = CSR_READ_1(sc, RL_MEDIASTAT);
513		return (rval);
514	default:
515		device_printf(sc->rl_dev, "bad phy register\n");
516		return (0);
517	}
518	rval = CSR_READ_2(sc, re8139_reg);
519	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
520		/* 8139C+ has different bit layout. */
521		rval &= ~(BMCR_LOOP | BMCR_ISO);
522	}
523	return (rval);
524}
525
526static int
527re_miibus_writereg(device_t dev, int phy, int reg, int data)
528{
529	struct rl_softc		*sc;
530	u_int16_t		re8139_reg = 0;
531	int			rval = 0;
532
533	sc = device_get_softc(dev);
534
535	if (sc->rl_type == RL_8169) {
536		rval = re_gmii_writereg(dev, phy, reg, data);
537		return (rval);
538	}
539
540	/* Pretend the internal PHY is only at address 0 */
541	if (phy)
542		return (0);
543
544	switch (reg) {
545	case MII_BMCR:
546		re8139_reg = RL_BMCR;
547		if (sc->rl_type == RL_8139CPLUS) {
548			/* 8139C+ has different bit layout. */
549			data &= ~(BMCR_LOOP | BMCR_ISO);
550		}
551		break;
552	case MII_BMSR:
553		re8139_reg = RL_BMSR;
554		break;
555	case MII_ANAR:
556		re8139_reg = RL_ANAR;
557		break;
558	case MII_ANER:
559		re8139_reg = RL_ANER;
560		break;
561	case MII_ANLPAR:
562		re8139_reg = RL_LPAR;
563		break;
564	case MII_PHYIDR1:
565	case MII_PHYIDR2:
566		return (0);
567		break;
568	default:
569		device_printf(sc->rl_dev, "bad phy register\n");
570		return (0);
571	}
572	CSR_WRITE_2(sc, re8139_reg, data);
573	return (0);
574}
575
576static void
577re_miibus_statchg(device_t dev)
578{
579	struct rl_softc		*sc;
580	struct ifnet		*ifp;
581	struct mii_data		*mii;
582
583	sc = device_get_softc(dev);
584	mii = device_get_softc(sc->rl_miibus);
585	ifp = sc->rl_ifp;
586	if (mii == NULL || ifp == NULL ||
587	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
588		return;
589
590	sc->rl_flags &= ~RL_FLAG_LINK;
591	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
592	    (IFM_ACTIVE | IFM_AVALID)) {
593		switch (IFM_SUBTYPE(mii->mii_media_active)) {
594		case IFM_10_T:
595		case IFM_100_TX:
596			sc->rl_flags |= RL_FLAG_LINK;
597			break;
598		case IFM_1000_T:
599			if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
600				break;
601			sc->rl_flags |= RL_FLAG_LINK;
602			break;
603		default:
604			break;
605		}
606	}
607	/*
608	 * RealTek controllers does not provide any interface to
609	 * Tx/Rx MACs for resolved speed, duplex and flow-control
610	 * parameters.
611	 */
612}
613
614/*
615 * Set the RX configuration and 64-bit multicast hash filter.
616 */
617static void
618re_set_rxmode(struct rl_softc *sc)
619{
620	struct ifnet		*ifp;
621	struct ifmultiaddr	*ifma;
622	uint32_t		hashes[2] = { 0, 0 };
623	uint32_t		h, rxfilt;
624
625	RL_LOCK_ASSERT(sc);
626
627	ifp = sc->rl_ifp;
628
629	rxfilt = RL_RXCFG_CONFIG | RL_RXCFG_RX_INDIV | RL_RXCFG_RX_BROAD;
630
631	if (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) {
632		if (ifp->if_flags & IFF_PROMISC)
633			rxfilt |= RL_RXCFG_RX_ALLPHYS;
634		/*
635		 * Unlike other hardwares, we have to explicitly set
636		 * RL_RXCFG_RX_MULTI to receive multicast frames in
637		 * promiscuous mode.
638		 */
639		rxfilt |= RL_RXCFG_RX_MULTI;
640		hashes[0] = hashes[1] = 0xffffffff;
641		goto done;
642	}
643
644	if_maddr_rlock(ifp);
645	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
646		if (ifma->ifma_addr->sa_family != AF_LINK)
647			continue;
648		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
649		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
650		if (h < 32)
651			hashes[0] |= (1 << h);
652		else
653			hashes[1] |= (1 << (h - 32));
654	}
655	if_maddr_runlock(ifp);
656
657	if (hashes[0] != 0 || hashes[1] != 0) {
658		/*
659		 * For some unfathomable reason, RealTek decided to
660		 * reverse the order of the multicast hash registers
661		 * in the PCI Express parts.  This means we have to
662		 * write the hash pattern in reverse order for those
663		 * devices.
664		 */
665		if ((sc->rl_flags & RL_FLAG_PCIE) != 0) {
666			h = bswap32(hashes[0]);
667			hashes[0] = bswap32(hashes[1]);
668			hashes[1] = h;
669		}
670		rxfilt |= RL_RXCFG_RX_MULTI;
671	}
672
673done:
674	CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
675	CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
676	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
677}
678
679static void
680re_reset(struct rl_softc *sc)
681{
682	int			i;
683
684	RL_LOCK_ASSERT(sc);
685
686	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
687
688	for (i = 0; i < RL_TIMEOUT; i++) {
689		DELAY(10);
690		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
691			break;
692	}
693	if (i == RL_TIMEOUT)
694		device_printf(sc->rl_dev, "reset never completed!\n");
695
696	if ((sc->rl_flags & RL_FLAG_MACRESET) != 0)
697		CSR_WRITE_1(sc, 0x82, 1);
698	if (sc->rl_hwrev == RL_HWREV_8169S)
699		re_gmii_writereg(sc->rl_dev, 1, 0x0b, 0);
700}
701
702#ifdef RE_DIAG
703
704/*
705 * The following routine is designed to test for a defect on some
706 * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
707 * lines connected to the bus, however for a 32-bit only card, they
708 * should be pulled high. The result of this defect is that the
709 * NIC will not work right if you plug it into a 64-bit slot: DMA
710 * operations will be done with 64-bit transfers, which will fail
711 * because the 64-bit data lines aren't connected.
712 *
713 * There's no way to work around this (short of talking a soldering
714 * iron to the board), however we can detect it. The method we use
715 * here is to put the NIC into digital loopback mode, set the receiver
716 * to promiscuous mode, and then try to send a frame. We then compare
717 * the frame data we sent to what was received. If the data matches,
718 * then the NIC is working correctly, otherwise we know the user has
719 * a defective NIC which has been mistakenly plugged into a 64-bit PCI
720 * slot. In the latter case, there's no way the NIC can work correctly,
721 * so we print out a message on the console and abort the device attach.
722 */
723
724static int
725re_diag(struct rl_softc *sc)
726{
727	struct ifnet		*ifp = sc->rl_ifp;
728	struct mbuf		*m0;
729	struct ether_header	*eh;
730	struct rl_desc		*cur_rx;
731	u_int16_t		status;
732	u_int32_t		rxstat;
733	int			total_len, i, error = 0, phyaddr;
734	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
735	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
736
737	/* Allocate a single mbuf */
738	MGETHDR(m0, M_DONTWAIT, MT_DATA);
739	if (m0 == NULL)
740		return (ENOBUFS);
741
742	RL_LOCK(sc);
743
744	/*
745	 * Initialize the NIC in test mode. This sets the chip up
746	 * so that it can send and receive frames, but performs the
747	 * following special functions:
748	 * - Puts receiver in promiscuous mode
749	 * - Enables digital loopback mode
750	 * - Leaves interrupts turned off
751	 */
752
753	ifp->if_flags |= IFF_PROMISC;
754	sc->rl_testmode = 1;
755	re_init_locked(sc);
756	sc->rl_flags |= RL_FLAG_LINK;
757	if (sc->rl_type == RL_8169)
758		phyaddr = 1;
759	else
760		phyaddr = 0;
761
762	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
763	for (i = 0; i < RL_TIMEOUT; i++) {
764		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
765		if (!(status & BMCR_RESET))
766			break;
767	}
768
769	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
770	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
771
772	DELAY(100000);
773
774	/* Put some data in the mbuf */
775
776	eh = mtod(m0, struct ether_header *);
777	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
778	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
779	eh->ether_type = htons(ETHERTYPE_IP);
780	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
781
782	/*
783	 * Queue the packet, start transmission.
784	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
785	 */
786
787	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
788	RL_UNLOCK(sc);
789	/* XXX: re_diag must not be called when in ALTQ mode */
790	IF_HANDOFF(&ifp->if_snd, m0, ifp);
791	RL_LOCK(sc);
792	m0 = NULL;
793
794	/* Wait for it to propagate through the chip */
795
796	DELAY(100000);
797	for (i = 0; i < RL_TIMEOUT; i++) {
798		status = CSR_READ_2(sc, RL_ISR);
799		CSR_WRITE_2(sc, RL_ISR, status);
800		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
801		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
802			break;
803		DELAY(10);
804	}
805
806	if (i == RL_TIMEOUT) {
807		device_printf(sc->rl_dev,
808		    "diagnostic failed, failed to receive packet in"
809		    " loopback mode\n");
810		error = EIO;
811		goto done;
812	}
813
814	/*
815	 * The packet should have been dumped into the first
816	 * entry in the RX DMA ring. Grab it from there.
817	 */
818
819	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
820	    sc->rl_ldata.rl_rx_list_map,
821	    BUS_DMASYNC_POSTREAD);
822	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
823	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
824	    BUS_DMASYNC_POSTREAD);
825	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
826	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
827
828	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
829	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
830	eh = mtod(m0, struct ether_header *);
831
832	cur_rx = &sc->rl_ldata.rl_rx_list[0];
833	total_len = RL_RXBYTES(cur_rx);
834	rxstat = le32toh(cur_rx->rl_cmdstat);
835
836	if (total_len != ETHER_MIN_LEN) {
837		device_printf(sc->rl_dev,
838		    "diagnostic failed, received short packet\n");
839		error = EIO;
840		goto done;
841	}
842
843	/* Test that the received packet data matches what we sent. */
844
845	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
846	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
847	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
848		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
849		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
850		    dst, ":", src, ":", ETHERTYPE_IP);
851		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
852		    eh->ether_dhost, ":",  eh->ether_shost, ":",
853		    ntohs(eh->ether_type));
854		device_printf(sc->rl_dev, "You may have a defective 32-bit "
855		    "NIC plugged into a 64-bit PCI slot.\n");
856		device_printf(sc->rl_dev, "Please re-install the NIC in a "
857		    "32-bit slot for proper operation.\n");
858		device_printf(sc->rl_dev, "Read the re(4) man page for more "
859		    "details.\n");
860		error = EIO;
861	}
862
863done:
864	/* Turn interface off, release resources */
865
866	sc->rl_testmode = 0;
867	sc->rl_flags &= ~RL_FLAG_LINK;
868	ifp->if_flags &= ~IFF_PROMISC;
869	re_stop(sc);
870	if (m0 != NULL)
871		m_freem(m0);
872
873	RL_UNLOCK(sc);
874
875	return (error);
876}
877
878#endif
879
880/*
881 * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
882 * IDs against our list and return a device name if we find a match.
883 */
884static int
885re_probe(device_t dev)
886{
887	struct rl_type		*t;
888	uint16_t		devid, vendor;
889	uint16_t		revid, sdevid;
890	int			i;
891
892	vendor = pci_get_vendor(dev);
893	devid = pci_get_device(dev);
894	revid = pci_get_revid(dev);
895	sdevid = pci_get_subdevice(dev);
896
897	if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
898		if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
899			/*
900			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
901			 * Rev. 2 is supported by sk(4).
902			 */
903			return (ENXIO);
904		}
905	}
906
907	if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
908		if (revid != 0x20) {
909			/* 8139, let rl(4) take care of this device. */
910			return (ENXIO);
911		}
912	}
913
914	t = re_devs;
915	for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
916		if (vendor == t->rl_vid && devid == t->rl_did) {
917			device_set_desc(dev, t->rl_name);
918			return (BUS_PROBE_DEFAULT);
919		}
920	}
921
922	return (ENXIO);
923}
924
925/*
926 * Map a single buffer address.
927 */
928
929static void
930re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
931{
932	bus_addr_t		*addr;
933
934	if (error)
935		return;
936
937	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
938	addr = arg;
939	*addr = segs->ds_addr;
940}
941
942static int
943re_allocmem(device_t dev, struct rl_softc *sc)
944{
945	bus_size_t		rx_list_size, tx_list_size;
946	int			error;
947	int			i;
948
949	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
950	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
951
952	/*
953	 * Allocate the parent bus DMA tag appropriate for PCI.
954	 * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
955	 * register should be set. However some RealTek chips are known
956	 * to be buggy on DAC handling, therefore disable DAC by limiting
957	 * DMA address space to 32bit. PCIe variants of RealTek chips
958	 * may not have the limitation but I took safer path.
959	 */
960	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
961	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
962	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
963	    NULL, NULL, &sc->rl_parent_tag);
964	if (error) {
965		device_printf(dev, "could not allocate parent DMA tag\n");
966		return (error);
967	}
968
969	/*
970	 * Allocate map for TX mbufs.
971	 */
972	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
973	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
974	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
975	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
976	if (error) {
977		device_printf(dev, "could not allocate TX DMA tag\n");
978		return (error);
979	}
980
981	/*
982	 * Allocate map for RX mbufs.
983	 */
984
985	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
986	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
987	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
988	if (error) {
989		device_printf(dev, "could not allocate RX DMA tag\n");
990		return (error);
991	}
992
993	/*
994	 * Allocate map for TX descriptor list.
995	 */
996	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
997	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
998	    NULL, tx_list_size, 1, tx_list_size, 0,
999	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1000	if (error) {
1001		device_printf(dev, "could not allocate TX DMA ring tag\n");
1002		return (error);
1003	}
1004
1005	/* Allocate DMA'able memory for the TX ring */
1006
1007	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1008	    (void **)&sc->rl_ldata.rl_tx_list,
1009	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1010	    &sc->rl_ldata.rl_tx_list_map);
1011	if (error) {
1012		device_printf(dev, "could not allocate TX DMA ring\n");
1013		return (error);
1014	}
1015
1016	/* Load the map for the TX ring. */
1017
1018	sc->rl_ldata.rl_tx_list_addr = 0;
1019	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1020	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1021	     tx_list_size, re_dma_map_addr,
1022	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1023	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1024		device_printf(dev, "could not load TX DMA ring\n");
1025		return (ENOMEM);
1026	}
1027
1028	/* Create DMA maps for TX buffers */
1029
1030	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1031		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1032		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1033		if (error) {
1034			device_printf(dev, "could not create DMA map for TX\n");
1035			return (error);
1036		}
1037	}
1038
1039	/*
1040	 * Allocate map for RX descriptor list.
1041	 */
1042	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1043	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1044	    NULL, rx_list_size, 1, rx_list_size, 0,
1045	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1046	if (error) {
1047		device_printf(dev, "could not create RX DMA ring tag\n");
1048		return (error);
1049	}
1050
1051	/* Allocate DMA'able memory for the RX ring */
1052
1053	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1054	    (void **)&sc->rl_ldata.rl_rx_list,
1055	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1056	    &sc->rl_ldata.rl_rx_list_map);
1057	if (error) {
1058		device_printf(dev, "could not allocate RX DMA ring\n");
1059		return (error);
1060	}
1061
1062	/* Load the map for the RX ring. */
1063
1064	sc->rl_ldata.rl_rx_list_addr = 0;
1065	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1066	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1067	     rx_list_size, re_dma_map_addr,
1068	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1069	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1070		device_printf(dev, "could not load RX DMA ring\n");
1071		return (ENOMEM);
1072	}
1073
1074	/* Create DMA maps for RX buffers */
1075
1076	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1077	    &sc->rl_ldata.rl_rx_sparemap);
1078	if (error) {
1079		device_printf(dev, "could not create spare DMA map for RX\n");
1080		return (error);
1081	}
1082	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1083		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1084		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1085		if (error) {
1086			device_printf(dev, "could not create DMA map for RX\n");
1087			return (error);
1088		}
1089	}
1090
1091	return (0);
1092}
1093
1094/*
1095 * Attach the interface. Allocate softc structures, do ifmedia
1096 * setup and ethernet/BPF attach.
1097 */
1098static int
1099re_attach(device_t dev)
1100{
1101	u_char			eaddr[ETHER_ADDR_LEN];
1102	u_int16_t		as[ETHER_ADDR_LEN / 2];
1103	struct rl_softc		*sc;
1104	struct ifnet		*ifp;
1105	struct rl_hwrev		*hw_rev;
1106	int			hwrev;
1107	u_int16_t		devid, re_did = 0;
1108	int			error = 0, rid, i;
1109	int			msic, reg;
1110	uint8_t			cfg;
1111
1112	sc = device_get_softc(dev);
1113	sc->rl_dev = dev;
1114
1115	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1116	    MTX_DEF);
1117	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1118
1119	/*
1120	 * Map control/status registers.
1121	 */
1122	pci_enable_busmaster(dev);
1123
1124	devid = pci_get_device(dev);
1125	/*
1126	 * Prefer memory space register mapping over IO space.
1127	 * Because RTL8169SC does not seem to work when memory mapping
1128	 * is used always activate io mapping.
1129	 */
1130	if (devid == RT_DEVICEID_8169SC)
1131		prefer_iomap = 1;
1132	if (prefer_iomap == 0) {
1133		sc->rl_res_id = PCIR_BAR(1);
1134		sc->rl_res_type = SYS_RES_MEMORY;
1135		/* RTL8168/8101E seems to use different BARs. */
1136		if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1137			sc->rl_res_id = PCIR_BAR(2);
1138	} else {
1139		sc->rl_res_id = PCIR_BAR(0);
1140		sc->rl_res_type = SYS_RES_IOPORT;
1141	}
1142	sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1143	    &sc->rl_res_id, RF_ACTIVE);
1144	if (sc->rl_res == NULL && prefer_iomap == 0) {
1145		sc->rl_res_id = PCIR_BAR(0);
1146		sc->rl_res_type = SYS_RES_IOPORT;
1147		sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1148		    &sc->rl_res_id, RF_ACTIVE);
1149	}
1150	if (sc->rl_res == NULL) {
1151		device_printf(dev, "couldn't map ports/memory\n");
1152		error = ENXIO;
1153		goto fail;
1154	}
1155
1156	sc->rl_btag = rman_get_bustag(sc->rl_res);
1157	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1158
1159	msic = 0;
1160	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1161		sc->rl_flags |= RL_FLAG_PCIE;
1162		msic = pci_msi_count(dev);
1163		if (bootverbose)
1164			device_printf(dev, "MSI count : %d\n", msic);
1165	}
1166	if (msic > 0 && msi_disable == 0) {
1167		msic = 1;
1168		if (pci_alloc_msi(dev, &msic) == 0) {
1169			if (msic == RL_MSI_MESSAGES) {
1170				device_printf(dev, "Using %d MSI messages\n",
1171				    msic);
1172				sc->rl_flags |= RL_FLAG_MSI;
1173				/* Explicitly set MSI enable bit. */
1174				CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1175				cfg = CSR_READ_1(sc, RL_CFG2);
1176				cfg |= RL_CFG2_MSI;
1177				CSR_WRITE_1(sc, RL_CFG2, cfg);
1178				CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1179			} else
1180				pci_release_msi(dev);
1181		}
1182	}
1183
1184	/* Allocate interrupt */
1185	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1186		rid = 0;
1187		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1188		    RF_SHAREABLE | RF_ACTIVE);
1189		if (sc->rl_irq[0] == NULL) {
1190			device_printf(dev, "couldn't allocate IRQ resources\n");
1191			error = ENXIO;
1192			goto fail;
1193		}
1194	} else {
1195		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1196			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1197			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1198			if (sc->rl_irq[i] == NULL) {
1199				device_printf(dev,
1200				    "couldn't llocate IRQ resources for "
1201				    "message %d\n", rid);
1202				error = ENXIO;
1203				goto fail;
1204			}
1205		}
1206	}
1207
1208	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1209		CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1210		cfg = CSR_READ_1(sc, RL_CFG2);
1211		if ((cfg & RL_CFG2_MSI) != 0) {
1212			device_printf(dev, "turning off MSI enable bit.\n");
1213			cfg &= ~RL_CFG2_MSI;
1214			CSR_WRITE_1(sc, RL_CFG2, cfg);
1215		}
1216		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1217	}
1218
1219	/* Reset the adapter. */
1220	RL_LOCK(sc);
1221	re_reset(sc);
1222	RL_UNLOCK(sc);
1223
1224	hw_rev = re_hwrevs;
1225	hwrev = CSR_READ_4(sc, RL_TXCFG);
1226	switch (hwrev & 0x70000000) {
1227	case 0x00000000:
1228	case 0x10000000:
1229		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0xfc800000);
1230		hwrev &= (RL_TXCFG_HWREV | 0x80000000);
1231		break;
1232	default:
1233		device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1234		hwrev &= RL_TXCFG_HWREV;
1235		break;
1236	}
1237	device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1238	while (hw_rev->rl_desc != NULL) {
1239		if (hw_rev->rl_rev == hwrev) {
1240			sc->rl_type = hw_rev->rl_type;
1241			sc->rl_hwrev = hw_rev->rl_rev;
1242			break;
1243		}
1244		hw_rev++;
1245	}
1246	if (hw_rev->rl_desc == NULL) {
1247		device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1248		error = ENXIO;
1249		goto fail;
1250	}
1251
1252	switch (hw_rev->rl_rev) {
1253	case RL_HWREV_8139CPLUS:
1254		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER |
1255		    RL_FLAG_AUTOPAD;
1256		break;
1257	case RL_HWREV_8100E:
1258	case RL_HWREV_8101E:
1259		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1260		    RL_FLAG_FASTETHER;
1261		break;
1262	case RL_HWREV_8102E:
1263	case RL_HWREV_8102EL:
1264	case RL_HWREV_8102EL_SPIN1:
1265		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_PHYWAKE |
1266		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT |
1267		    RL_FLAG_FASTETHER | RL_FLAG_CMDSTOP | RL_FLAG_AUTOPAD;
1268		break;
1269	case RL_HWREV_8168_SPIN1:
1270	case RL_HWREV_8168_SPIN2:
1271		sc->rl_flags |= RL_FLAG_WOLRXENB;
1272		/* FALLTHROUGH */
1273	case RL_HWREV_8168_SPIN3:
1274		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_MACSTAT;
1275		break;
1276	case RL_HWREV_8168C_SPIN2:
1277		sc->rl_flags |= RL_FLAG_MACSLEEP;
1278		/* FALLTHROUGH */
1279	case RL_HWREV_8168C:
1280		if ((hwrev & 0x00700000) == 0x00200000)
1281			sc->rl_flags |= RL_FLAG_MACSLEEP;
1282		/* FALLTHROUGH */
1283	case RL_HWREV_8168CP:
1284	case RL_HWREV_8168D:
1285		sc->rl_flags |= RL_FLAG_PHYWAKE | RL_FLAG_PAR |
1286		    RL_FLAG_DESCV2 | RL_FLAG_MACSTAT | RL_FLAG_CMDSTOP |
1287		    RL_FLAG_AUTOPAD;
1288		/*
1289		 * These controllers support jumbo frame but it seems
1290		 * that enabling it requires touching additional magic
1291		 * registers. Depending on MAC revisions some
1292		 * controllers need to disable checksum offload. So
1293		 * disable jumbo frame until I have better idea what
1294		 * it really requires to make it support.
1295		 * RTL8168C/CP : supports up to 6KB jumbo frame.
1296		 * RTL8111C/CP : supports up to 9KB jumbo frame.
1297		 */
1298		sc->rl_flags |= RL_FLAG_NOJUMBO;
1299		break;
1300	case RL_HWREV_8169_8110SB:
1301	case RL_HWREV_8169_8110SBL:
1302	case RL_HWREV_8169_8110SC:
1303	case RL_HWREV_8169_8110SCE:
1304		sc->rl_flags |= RL_FLAG_PHYWAKE;
1305		/* FALLTHROUGH */
1306	case RL_HWREV_8169:
1307	case RL_HWREV_8169S:
1308	case RL_HWREV_8110S:
1309		sc->rl_flags |= RL_FLAG_MACRESET;
1310		break;
1311	default:
1312		break;
1313	}
1314
1315	/* Enable PME. */
1316	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1317	cfg = CSR_READ_1(sc, RL_CFG1);
1318	cfg |= RL_CFG1_PME;
1319	CSR_WRITE_1(sc, RL_CFG1, cfg);
1320	cfg = CSR_READ_1(sc, RL_CFG5);
1321	cfg &= RL_CFG5_PME_STS;
1322	CSR_WRITE_1(sc, RL_CFG5, cfg);
1323	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1324
1325	if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1326		/*
1327		 * XXX Should have a better way to extract station
1328		 * address from EEPROM.
1329		 */
1330		for (i = 0; i < ETHER_ADDR_LEN; i++)
1331			eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1332	} else {
1333		sc->rl_eewidth = RL_9356_ADDR_LEN;
1334		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1335		if (re_did != 0x8129)
1336			sc->rl_eewidth = RL_9346_ADDR_LEN;
1337
1338		/*
1339		 * Get station address from the EEPROM.
1340		 */
1341		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1342		for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1343			as[i] = le16toh(as[i]);
1344		bcopy(as, eaddr, sizeof(eaddr));
1345	}
1346
1347	if (sc->rl_type == RL_8169) {
1348		/* Set RX length mask and number of descriptors. */
1349		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1350		sc->rl_txstart = RL_GTXSTART;
1351		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1352		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1353	} else {
1354		/* Set RX length mask and number of descriptors. */
1355		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1356		sc->rl_txstart = RL_TXSTART;
1357		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1358		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1359	}
1360
1361	error = re_allocmem(dev, sc);
1362	if (error)
1363		goto fail;
1364
1365	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1366	if (ifp == NULL) {
1367		device_printf(dev, "can not if_alloc()\n");
1368		error = ENOSPC;
1369		goto fail;
1370	}
1371
1372	/* Take controller out of deep sleep mode. */
1373	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
1374		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
1375			CSR_WRITE_1(sc, RL_GPIO,
1376			    CSR_READ_1(sc, RL_GPIO) | 0x01);
1377		else
1378			CSR_WRITE_1(sc, RL_GPIO,
1379			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
1380	}
1381
1382	/* Take PHY out of power down mode. */
1383	if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1384		re_gmii_writereg(dev, 1, 0x1f, 0);
1385		re_gmii_writereg(dev, 1, 0x0e, 0);
1386	}
1387
1388	/* Do MII setup */
1389	if (mii_phy_probe(dev, &sc->rl_miibus,
1390	    re_ifmedia_upd, re_ifmedia_sts)) {
1391		device_printf(dev, "MII without any phy!\n");
1392		error = ENXIO;
1393		goto fail;
1394	}
1395
1396	ifp->if_softc = sc;
1397	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1398	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1399	ifp->if_ioctl = re_ioctl;
1400	ifp->if_start = re_start;
1401	ifp->if_hwassist = RE_CSUM_FEATURES;
1402	ifp->if_capabilities = IFCAP_HWCSUM;
1403	ifp->if_capenable = ifp->if_capabilities;
1404	ifp->if_init = re_init;
1405	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1406	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1407	IFQ_SET_READY(&ifp->if_snd);
1408
1409	TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1410	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1411
1412	/*
1413	 * XXX
1414	 * Still have no idea how to make TSO work on 8168C, 8168CP,
1415	 * 8111C and 8111CP.
1416	 */
1417	if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1418		ifp->if_hwassist |= CSUM_TSO;
1419		ifp->if_capabilities |= IFCAP_TSO4;
1420	}
1421
1422	/*
1423	 * Call MI attach routine.
1424	 */
1425	ether_ifattach(ifp, eaddr);
1426
1427	/* VLAN capability setup */
1428	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1429	if (ifp->if_capabilities & IFCAP_HWCSUM)
1430		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1431	/* Enable WOL if PM is supported. */
1432	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1433		ifp->if_capabilities |= IFCAP_WOL;
1434	ifp->if_capenable = ifp->if_capabilities;
1435	/*
1436	 * Don't enable TSO by default. Under certain
1437	 * circumtances the controller generated corrupted
1438	 * packets in TSO size.
1439	 */
1440	ifp->if_hwassist &= ~CSUM_TSO;
1441	ifp->if_capenable &= ~IFCAP_TSO4;
1442#ifdef DEVICE_POLLING
1443	ifp->if_capabilities |= IFCAP_POLLING;
1444#endif
1445	/*
1446	 * Tell the upper layer(s) we support long frames.
1447	 * Must appear after the call to ether_ifattach() because
1448	 * ether_ifattach() sets ifi_hdrlen to the default value.
1449	 */
1450	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1451
1452#ifdef RE_DIAG
1453	/*
1454	 * Perform hardware diagnostic on the original RTL8169.
1455	 * Some 32-bit cards were incorrectly wired and would
1456	 * malfunction if plugged into a 64-bit slot.
1457	 */
1458
1459	if (hwrev == RL_HWREV_8169) {
1460		error = re_diag(sc);
1461		if (error) {
1462			device_printf(dev,
1463		    	"attach aborted due to hardware diag failure\n");
1464			ether_ifdetach(ifp);
1465			goto fail;
1466		}
1467	}
1468#endif
1469
1470	/* Hook interrupt last to avoid having to lock softc */
1471	if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1472		error = bus_setup_intr(dev, sc->rl_irq[0],
1473		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1474		    &sc->rl_intrhand[0]);
1475	else {
1476		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1477			error = bus_setup_intr(dev, sc->rl_irq[i],
1478			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1479		    	    &sc->rl_intrhand[i]);
1480			if (error != 0)
1481				break;
1482		}
1483	}
1484	if (error) {
1485		device_printf(dev, "couldn't set up irq\n");
1486		ether_ifdetach(ifp);
1487	}
1488
1489fail:
1490
1491	if (error)
1492		re_detach(dev);
1493
1494	return (error);
1495}
1496
1497/*
1498 * Shutdown hardware and free up resources. This can be called any
1499 * time after the mutex has been initialized. It is called in both
1500 * the error case in attach and the normal detach case so it needs
1501 * to be careful about only freeing resources that have actually been
1502 * allocated.
1503 */
1504static int
1505re_detach(device_t dev)
1506{
1507	struct rl_softc		*sc;
1508	struct ifnet		*ifp;
1509	int			i, rid;
1510
1511	sc = device_get_softc(dev);
1512	ifp = sc->rl_ifp;
1513	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1514
1515	/* These should only be active if attach succeeded */
1516	if (device_is_attached(dev)) {
1517#ifdef DEVICE_POLLING
1518		if (ifp->if_capenable & IFCAP_POLLING)
1519			ether_poll_deregister(ifp);
1520#endif
1521		RL_LOCK(sc);
1522#if 0
1523		sc->suspended = 1;
1524#endif
1525		re_stop(sc);
1526		RL_UNLOCK(sc);
1527		callout_drain(&sc->rl_stat_callout);
1528		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1529		taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1530		/*
1531		 * Force off the IFF_UP flag here, in case someone
1532		 * still had a BPF descriptor attached to this
1533		 * interface. If they do, ether_ifdetach() will cause
1534		 * the BPF code to try and clear the promisc mode
1535		 * flag, which will bubble down to re_ioctl(),
1536		 * which will try to call re_init() again. This will
1537		 * turn the NIC back on and restart the MII ticker,
1538		 * which will panic the system when the kernel tries
1539		 * to invoke the re_tick() function that isn't there
1540		 * anymore.
1541		 */
1542		ifp->if_flags &= ~IFF_UP;
1543		ether_ifdetach(ifp);
1544	}
1545	if (sc->rl_miibus)
1546		device_delete_child(dev, sc->rl_miibus);
1547	bus_generic_detach(dev);
1548
1549	/*
1550	 * The rest is resource deallocation, so we should already be
1551	 * stopped here.
1552	 */
1553
1554	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1555		if (sc->rl_intrhand[i] != NULL) {
1556			bus_teardown_intr(dev, sc->rl_irq[i],
1557			    sc->rl_intrhand[i]);
1558			sc->rl_intrhand[i] = NULL;
1559		}
1560	}
1561	if (ifp != NULL)
1562		if_free(ifp);
1563	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1564		if (sc->rl_irq[0] != NULL) {
1565			bus_release_resource(dev, SYS_RES_IRQ, 0,
1566			    sc->rl_irq[0]);
1567			sc->rl_irq[0] = NULL;
1568		}
1569	} else {
1570		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1571			if (sc->rl_irq[i] != NULL) {
1572				bus_release_resource(dev, SYS_RES_IRQ, rid,
1573				    sc->rl_irq[i]);
1574				sc->rl_irq[i] = NULL;
1575			}
1576		}
1577		pci_release_msi(dev);
1578	}
1579	if (sc->rl_res)
1580		bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1581		    sc->rl_res);
1582
1583	/* Unload and free the RX DMA ring memory and map */
1584
1585	if (sc->rl_ldata.rl_rx_list_tag) {
1586		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1587		    sc->rl_ldata.rl_rx_list_map);
1588		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1589		    sc->rl_ldata.rl_rx_list,
1590		    sc->rl_ldata.rl_rx_list_map);
1591		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1592	}
1593
1594	/* Unload and free the TX DMA ring memory and map */
1595
1596	if (sc->rl_ldata.rl_tx_list_tag) {
1597		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1598		    sc->rl_ldata.rl_tx_list_map);
1599		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1600		    sc->rl_ldata.rl_tx_list,
1601		    sc->rl_ldata.rl_tx_list_map);
1602		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1603	}
1604
1605	/* Destroy all the RX and TX buffer maps */
1606
1607	if (sc->rl_ldata.rl_tx_mtag) {
1608		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1609			bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1610			    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1611		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1612	}
1613	if (sc->rl_ldata.rl_rx_mtag) {
1614		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1615			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1616			    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1617		if (sc->rl_ldata.rl_rx_sparemap)
1618			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1619			    sc->rl_ldata.rl_rx_sparemap);
1620		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1621	}
1622
1623	/* Unload and free the stats buffer and map */
1624
1625	if (sc->rl_ldata.rl_stag) {
1626		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1627		    sc->rl_ldata.rl_rx_list_map);
1628		bus_dmamem_free(sc->rl_ldata.rl_stag,
1629		    sc->rl_ldata.rl_stats,
1630		    sc->rl_ldata.rl_smap);
1631		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1632	}
1633
1634	if (sc->rl_parent_tag)
1635		bus_dma_tag_destroy(sc->rl_parent_tag);
1636
1637	mtx_destroy(&sc->rl_mtx);
1638
1639	return (0);
1640}
1641
1642static __inline void
1643re_discard_rxbuf(struct rl_softc *sc, int idx)
1644{
1645	struct rl_desc		*desc;
1646	struct rl_rxdesc	*rxd;
1647	uint32_t		cmdstat;
1648
1649	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1650	desc = &sc->rl_ldata.rl_rx_list[idx];
1651	desc->rl_vlanctl = 0;
1652	cmdstat = rxd->rx_size;
1653	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1654		cmdstat |= RL_RDESC_CMD_EOR;
1655	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1656}
1657
1658static int
1659re_newbuf(struct rl_softc *sc, int idx)
1660{
1661	struct mbuf		*m;
1662	struct rl_rxdesc	*rxd;
1663	bus_dma_segment_t	segs[1];
1664	bus_dmamap_t		map;
1665	struct rl_desc		*desc;
1666	uint32_t		cmdstat;
1667	int			error, nsegs;
1668
1669	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1670	if (m == NULL)
1671		return (ENOBUFS);
1672
1673	m->m_len = m->m_pkthdr.len = MCLBYTES;
1674#ifdef RE_FIXUP_RX
1675	/*
1676	 * This is part of an evil trick to deal with non-x86 platforms.
1677	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1678	 * boundaries, but that will hose non-x86 machines. To get around
1679	 * this, we leave some empty space at the start of each buffer
1680	 * and for non-x86 hosts, we copy the buffer back six bytes
1681	 * to achieve word alignment. This is slightly more efficient
1682	 * than allocating a new buffer, copying the contents, and
1683	 * discarding the old buffer.
1684	 */
1685	m_adj(m, RE_ETHER_ALIGN);
1686#endif
1687	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1688	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1689	if (error != 0) {
1690		m_freem(m);
1691		return (ENOBUFS);
1692	}
1693	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1694
1695	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1696	if (rxd->rx_m != NULL) {
1697		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1698		    BUS_DMASYNC_POSTREAD);
1699		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1700	}
1701
1702	rxd->rx_m = m;
1703	map = rxd->rx_dmamap;
1704	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1705	rxd->rx_size = segs[0].ds_len;
1706	sc->rl_ldata.rl_rx_sparemap = map;
1707	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1708	    BUS_DMASYNC_PREREAD);
1709
1710	desc = &sc->rl_ldata.rl_rx_list[idx];
1711	desc->rl_vlanctl = 0;
1712	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1713	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1714	cmdstat = segs[0].ds_len;
1715	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1716		cmdstat |= RL_RDESC_CMD_EOR;
1717	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1718
1719	return (0);
1720}
1721
1722#ifdef RE_FIXUP_RX
1723static __inline void
1724re_fixup_rx(struct mbuf *m)
1725{
1726	int                     i;
1727	uint16_t                *src, *dst;
1728
1729	src = mtod(m, uint16_t *);
1730	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1731
1732	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1733		*dst++ = *src++;
1734
1735	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1736}
1737#endif
1738
1739static int
1740re_tx_list_init(struct rl_softc *sc)
1741{
1742	struct rl_desc		*desc;
1743	int			i;
1744
1745	RL_LOCK_ASSERT(sc);
1746
1747	bzero(sc->rl_ldata.rl_tx_list,
1748	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1749	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1750		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1751	/* Set EOR. */
1752	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1753	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1754
1755	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1756	    sc->rl_ldata.rl_tx_list_map,
1757	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1758
1759	sc->rl_ldata.rl_tx_prodidx = 0;
1760	sc->rl_ldata.rl_tx_considx = 0;
1761	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1762
1763	return (0);
1764}
1765
1766static int
1767re_rx_list_init(struct rl_softc *sc)
1768{
1769	int			error, i;
1770
1771	bzero(sc->rl_ldata.rl_rx_list,
1772	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1773	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1774		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1775		if ((error = re_newbuf(sc, i)) != 0)
1776			return (error);
1777	}
1778
1779	/* Flush the RX descriptors */
1780
1781	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1782	    sc->rl_ldata.rl_rx_list_map,
1783	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1784
1785	sc->rl_ldata.rl_rx_prodidx = 0;
1786	sc->rl_head = sc->rl_tail = NULL;
1787
1788	return (0);
1789}
1790
1791/*
1792 * RX handler for C+ and 8169. For the gigE chips, we support
1793 * the reception of jumbo frames that have been fragmented
1794 * across multiple 2K mbuf cluster buffers.
1795 */
1796static int
1797re_rxeof(struct rl_softc *sc, int *rx_npktsp)
1798{
1799	struct mbuf		*m;
1800	struct ifnet		*ifp;
1801	int			i, total_len;
1802	struct rl_desc		*cur_rx;
1803	u_int32_t		rxstat, rxvlan;
1804	int			maxpkt = 16, rx_npkts = 0;
1805
1806	RL_LOCK_ASSERT(sc);
1807
1808	ifp = sc->rl_ifp;
1809
1810	/* Invalidate the descriptor memory */
1811
1812	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1813	    sc->rl_ldata.rl_rx_list_map,
1814	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1815
1816	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1817	    i = RL_RX_DESC_NXT(sc, i)) {
1818		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1819		rxstat = le32toh(cur_rx->rl_cmdstat);
1820		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1821			break;
1822		total_len = rxstat & sc->rl_rxlenmask;
1823		rxvlan = le32toh(cur_rx->rl_vlanctl);
1824		m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1825
1826		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1827			if (re_newbuf(sc, i) != 0) {
1828				/*
1829				 * If this is part of a multi-fragment packet,
1830				 * discard all the pieces.
1831				 */
1832				if (sc->rl_head != NULL) {
1833					m_freem(sc->rl_head);
1834					sc->rl_head = sc->rl_tail = NULL;
1835				}
1836				re_discard_rxbuf(sc, i);
1837				continue;
1838			}
1839			m->m_len = RE_RX_DESC_BUFLEN;
1840			if (sc->rl_head == NULL)
1841				sc->rl_head = sc->rl_tail = m;
1842			else {
1843				m->m_flags &= ~M_PKTHDR;
1844				sc->rl_tail->m_next = m;
1845				sc->rl_tail = m;
1846			}
1847			continue;
1848		}
1849
1850		/*
1851		 * NOTE: for the 8139C+, the frame length field
1852		 * is always 12 bits in size, but for the gigE chips,
1853		 * it is 13 bits (since the max RX frame length is 16K).
1854		 * Unfortunately, all 32 bits in the status word
1855		 * were already used, so to make room for the extra
1856		 * length bit, RealTek took out the 'frame alignment
1857		 * error' bit and shifted the other status bits
1858		 * over one slot. The OWN, EOR, FS and LS bits are
1859		 * still in the same places. We have already extracted
1860		 * the frame length and checked the OWN bit, so rather
1861		 * than using an alternate bit mapping, we shift the
1862		 * status bits one space to the right so we can evaluate
1863		 * them using the 8169 status as though it was in the
1864		 * same format as that of the 8139C+.
1865		 */
1866		if (sc->rl_type == RL_8169)
1867			rxstat >>= 1;
1868
1869		/*
1870		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1871		 * set, but if CRC is clear, it will still be a valid frame.
1872		 */
1873		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1874		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1875			ifp->if_ierrors++;
1876			/*
1877			 * If this is part of a multi-fragment packet,
1878			 * discard all the pieces.
1879			 */
1880			if (sc->rl_head != NULL) {
1881				m_freem(sc->rl_head);
1882				sc->rl_head = sc->rl_tail = NULL;
1883			}
1884			re_discard_rxbuf(sc, i);
1885			continue;
1886		}
1887
1888		/*
1889		 * If allocating a replacement mbuf fails,
1890		 * reload the current one.
1891		 */
1892
1893		if (re_newbuf(sc, i) != 0) {
1894			ifp->if_iqdrops++;
1895			if (sc->rl_head != NULL) {
1896				m_freem(sc->rl_head);
1897				sc->rl_head = sc->rl_tail = NULL;
1898			}
1899			re_discard_rxbuf(sc, i);
1900			continue;
1901		}
1902
1903		if (sc->rl_head != NULL) {
1904			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1905			if (m->m_len == 0)
1906				m->m_len = RE_RX_DESC_BUFLEN;
1907			/*
1908			 * Special case: if there's 4 bytes or less
1909			 * in this buffer, the mbuf can be discarded:
1910			 * the last 4 bytes is the CRC, which we don't
1911			 * care about anyway.
1912			 */
1913			if (m->m_len <= ETHER_CRC_LEN) {
1914				sc->rl_tail->m_len -=
1915				    (ETHER_CRC_LEN - m->m_len);
1916				m_freem(m);
1917			} else {
1918				m->m_len -= ETHER_CRC_LEN;
1919				m->m_flags &= ~M_PKTHDR;
1920				sc->rl_tail->m_next = m;
1921			}
1922			m = sc->rl_head;
1923			sc->rl_head = sc->rl_tail = NULL;
1924			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1925		} else
1926			m->m_pkthdr.len = m->m_len =
1927			    (total_len - ETHER_CRC_LEN);
1928
1929#ifdef RE_FIXUP_RX
1930		re_fixup_rx(m);
1931#endif
1932		ifp->if_ipackets++;
1933		m->m_pkthdr.rcvif = ifp;
1934
1935		/* Do RX checksumming if enabled */
1936
1937		if (ifp->if_capenable & IFCAP_RXCSUM) {
1938			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1939				/* Check IP header checksum */
1940				if (rxstat & RL_RDESC_STAT_PROTOID)
1941					m->m_pkthdr.csum_flags |=
1942					    CSUM_IP_CHECKED;
1943				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1944					m->m_pkthdr.csum_flags |=
1945					    CSUM_IP_VALID;
1946
1947				/* Check TCP/UDP checksum */
1948				if ((RL_TCPPKT(rxstat) &&
1949				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1950				    (RL_UDPPKT(rxstat) &&
1951				     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1952					m->m_pkthdr.csum_flags |=
1953						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1954					m->m_pkthdr.csum_data = 0xffff;
1955				}
1956			} else {
1957				/*
1958				 * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1959				 */
1960				if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1961				    (rxvlan & RL_RDESC_IPV4))
1962					m->m_pkthdr.csum_flags |=
1963					    CSUM_IP_CHECKED;
1964				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1965				    (rxvlan & RL_RDESC_IPV4))
1966					m->m_pkthdr.csum_flags |=
1967					    CSUM_IP_VALID;
1968				if (((rxstat & RL_RDESC_STAT_TCP) &&
1969				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1970				    ((rxstat & RL_RDESC_STAT_UDP) &&
1971				    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1972					m->m_pkthdr.csum_flags |=
1973						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1974					m->m_pkthdr.csum_data = 0xffff;
1975				}
1976			}
1977		}
1978		maxpkt--;
1979		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1980			m->m_pkthdr.ether_vtag =
1981			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1982			m->m_flags |= M_VLANTAG;
1983		}
1984		RL_UNLOCK(sc);
1985		(*ifp->if_input)(ifp, m);
1986		RL_LOCK(sc);
1987		rx_npkts++;
1988	}
1989
1990	/* Flush the RX DMA ring */
1991
1992	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1993	    sc->rl_ldata.rl_rx_list_map,
1994	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1995
1996	sc->rl_ldata.rl_rx_prodidx = i;
1997
1998	if (rx_npktsp != NULL)
1999		*rx_npktsp = rx_npkts;
2000	if (maxpkt)
2001		return(EAGAIN);
2002
2003	return(0);
2004}
2005
2006static void
2007re_txeof(struct rl_softc *sc)
2008{
2009	struct ifnet		*ifp;
2010	struct rl_txdesc	*txd;
2011	u_int32_t		txstat;
2012	int			cons;
2013
2014	cons = sc->rl_ldata.rl_tx_considx;
2015	if (cons == sc->rl_ldata.rl_tx_prodidx)
2016		return;
2017
2018	ifp = sc->rl_ifp;
2019	/* Invalidate the TX descriptor list */
2020	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2021	    sc->rl_ldata.rl_tx_list_map,
2022	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023
2024	for (; cons != sc->rl_ldata.rl_tx_prodidx;
2025	    cons = RL_TX_DESC_NXT(sc, cons)) {
2026		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
2027		if (txstat & RL_TDESC_STAT_OWN)
2028			break;
2029		/*
2030		 * We only stash mbufs in the last descriptor
2031		 * in a fragment chain, which also happens to
2032		 * be the only place where the TX status bits
2033		 * are valid.
2034		 */
2035		if (txstat & RL_TDESC_CMD_EOF) {
2036			txd = &sc->rl_ldata.rl_tx_desc[cons];
2037			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2038			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2039			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2040			    txd->tx_dmamap);
2041			KASSERT(txd->tx_m != NULL,
2042			    ("%s: freeing NULL mbufs!", __func__));
2043			m_freem(txd->tx_m);
2044			txd->tx_m = NULL;
2045			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2046			    RL_TDESC_STAT_COLCNT))
2047				ifp->if_collisions++;
2048			if (txstat & RL_TDESC_STAT_TXERRSUM)
2049				ifp->if_oerrors++;
2050			else
2051				ifp->if_opackets++;
2052		}
2053		sc->rl_ldata.rl_tx_free++;
2054		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2055	}
2056	sc->rl_ldata.rl_tx_considx = cons;
2057
2058	/* No changes made to the TX ring, so no flush needed */
2059
2060	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2061#ifdef RE_TX_MODERATION
2062		/*
2063		 * If not all descriptors have been reaped yet, reload
2064		 * the timer so that we will eventually get another
2065		 * interrupt that will cause us to re-enter this routine.
2066		 * This is done in case the transmitter has gone idle.
2067		 */
2068		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2069#endif
2070	} else
2071		sc->rl_watchdog_timer = 0;
2072}
2073
2074static void
2075re_tick(void *xsc)
2076{
2077	struct rl_softc		*sc;
2078	struct mii_data		*mii;
2079
2080	sc = xsc;
2081
2082	RL_LOCK_ASSERT(sc);
2083
2084	mii = device_get_softc(sc->rl_miibus);
2085	mii_tick(mii);
2086	if ((sc->rl_flags & RL_FLAG_LINK) == 0)
2087		re_miibus_statchg(sc->rl_dev);
2088	/*
2089	 * Reclaim transmitted frames here. Technically it is not
2090	 * necessary to do here but it ensures periodic reclamation
2091	 * regardless of Tx completion interrupt which seems to be
2092	 * lost on PCIe based controllers under certain situations.
2093	 */
2094	re_txeof(sc);
2095	re_watchdog(sc);
2096	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2097}
2098
2099#ifdef DEVICE_POLLING
2100static int
2101re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2102{
2103	struct rl_softc *sc = ifp->if_softc;
2104	int rx_npkts = 0;
2105
2106	RL_LOCK(sc);
2107	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2108		rx_npkts = re_poll_locked(ifp, cmd, count);
2109	RL_UNLOCK(sc);
2110	return (rx_npkts);
2111}
2112
2113static int
2114re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2115{
2116	struct rl_softc *sc = ifp->if_softc;
2117	int rx_npkts;
2118
2119	RL_LOCK_ASSERT(sc);
2120
2121	sc->rxcycles = count;
2122	re_rxeof(sc, &rx_npkts);
2123	re_txeof(sc);
2124
2125	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2126		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2127
2128	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2129		u_int16_t       status;
2130
2131		status = CSR_READ_2(sc, RL_ISR);
2132		if (status == 0xffff)
2133			return (rx_npkts);
2134		if (status)
2135			CSR_WRITE_2(sc, RL_ISR, status);
2136		if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2137		    (sc->rl_flags & RL_FLAG_PCIE))
2138			CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2139
2140		/*
2141		 * XXX check behaviour on receiver stalls.
2142		 */
2143
2144		if (status & RL_ISR_SYSTEM_ERR)
2145			re_init_locked(sc);
2146	}
2147	return (rx_npkts);
2148}
2149#endif /* DEVICE_POLLING */
2150
2151static int
2152re_intr(void *arg)
2153{
2154	struct rl_softc		*sc;
2155	uint16_t		status;
2156
2157	sc = arg;
2158
2159	status = CSR_READ_2(sc, RL_ISR);
2160	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2161                return (FILTER_STRAY);
2162	CSR_WRITE_2(sc, RL_IMR, 0);
2163
2164	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2165
2166	return (FILTER_HANDLED);
2167}
2168
2169static void
2170re_int_task(void *arg, int npending)
2171{
2172	struct rl_softc		*sc;
2173	struct ifnet		*ifp;
2174	u_int16_t		status;
2175	int			rval = 0;
2176
2177	sc = arg;
2178	ifp = sc->rl_ifp;
2179
2180	RL_LOCK(sc);
2181
2182	status = CSR_READ_2(sc, RL_ISR);
2183        CSR_WRITE_2(sc, RL_ISR, status);
2184
2185	if (sc->suspended ||
2186	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2187		RL_UNLOCK(sc);
2188		return;
2189	}
2190
2191#ifdef DEVICE_POLLING
2192	if  (ifp->if_capenable & IFCAP_POLLING) {
2193		RL_UNLOCK(sc);
2194		return;
2195	}
2196#endif
2197
2198	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2199		rval = re_rxeof(sc, NULL);
2200
2201	/*
2202	 * Some chips will ignore a second TX request issued
2203	 * while an existing transmission is in progress. If
2204	 * the transmitter goes idle but there are still
2205	 * packets waiting to be sent, we need to restart the
2206	 * channel here to flush them out. This only seems to
2207	 * be required with the PCIe devices.
2208	 */
2209	if ((status & (RL_ISR_TX_OK | RL_ISR_TX_DESC_UNAVAIL)) &&
2210	    (sc->rl_flags & RL_FLAG_PCIE))
2211		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2212	if (status & (
2213#ifdef RE_TX_MODERATION
2214	    RL_ISR_TIMEOUT_EXPIRED|
2215#else
2216	    RL_ISR_TX_OK|
2217#endif
2218	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2219		re_txeof(sc);
2220
2221	if (status & RL_ISR_SYSTEM_ERR)
2222		re_init_locked(sc);
2223
2224	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2225		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2226
2227	RL_UNLOCK(sc);
2228
2229        if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2230		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2231		return;
2232	}
2233
2234	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2235}
2236
2237static int
2238re_encap(struct rl_softc *sc, struct mbuf **m_head)
2239{
2240	struct rl_txdesc	*txd, *txd_last;
2241	bus_dma_segment_t	segs[RL_NTXSEGS];
2242	bus_dmamap_t		map;
2243	struct mbuf		*m_new;
2244	struct rl_desc		*desc;
2245	int			nsegs, prod;
2246	int			i, error, ei, si;
2247	int			padlen;
2248	uint32_t		cmdstat, csum_flags, vlanctl;
2249
2250	RL_LOCK_ASSERT(sc);
2251	M_ASSERTPKTHDR((*m_head));
2252
2253	/*
2254	 * With some of the RealTek chips, using the checksum offload
2255	 * support in conjunction with the autopadding feature results
2256	 * in the transmission of corrupt frames. For example, if we
2257	 * need to send a really small IP fragment that's less than 60
2258	 * bytes in size, and IP header checksumming is enabled, the
2259	 * resulting ethernet frame that appears on the wire will
2260	 * have garbled payload. To work around this, if TX IP checksum
2261	 * offload is enabled, we always manually pad short frames out
2262	 * to the minimum ethernet frame size.
2263	 */
2264	if ((sc->rl_flags & RL_FLAG_AUTOPAD) == 0 &&
2265	    (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2266	    ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2267		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2268		if (M_WRITABLE(*m_head) == 0) {
2269			/* Get a writable copy. */
2270			m_new = m_dup(*m_head, M_DONTWAIT);
2271			m_freem(*m_head);
2272			if (m_new == NULL) {
2273				*m_head = NULL;
2274				return (ENOBUFS);
2275			}
2276			*m_head = m_new;
2277		}
2278		if ((*m_head)->m_next != NULL ||
2279		    M_TRAILINGSPACE(*m_head) < padlen) {
2280			m_new = m_defrag(*m_head, M_DONTWAIT);
2281			if (m_new == NULL) {
2282				m_freem(*m_head);
2283				*m_head = NULL;
2284				return (ENOBUFS);
2285			}
2286		} else
2287			m_new = *m_head;
2288
2289		/*
2290		 * Manually pad short frames, and zero the pad space
2291		 * to avoid leaking data.
2292		 */
2293		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2294		m_new->m_pkthdr.len += padlen;
2295		m_new->m_len = m_new->m_pkthdr.len;
2296		*m_head = m_new;
2297	}
2298
2299	prod = sc->rl_ldata.rl_tx_prodidx;
2300	txd = &sc->rl_ldata.rl_tx_desc[prod];
2301	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2302	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2303	if (error == EFBIG) {
2304		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2305		if (m_new == NULL) {
2306			m_freem(*m_head);
2307			*m_head = NULL;
2308			return (ENOBUFS);
2309		}
2310		*m_head = m_new;
2311		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2312		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2313		if (error != 0) {
2314			m_freem(*m_head);
2315			*m_head = NULL;
2316			return (error);
2317		}
2318	} else if (error != 0)
2319		return (error);
2320	if (nsegs == 0) {
2321		m_freem(*m_head);
2322		*m_head = NULL;
2323		return (EIO);
2324	}
2325
2326	/* Check for number of available descriptors. */
2327	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2328		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2329		return (ENOBUFS);
2330	}
2331
2332	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2333	    BUS_DMASYNC_PREWRITE);
2334
2335	/*
2336	 * Set up checksum offload. Note: checksum offload bits must
2337	 * appear in all descriptors of a multi-descriptor transmit
2338	 * attempt. This is according to testing done with an 8169
2339	 * chip. This is a requirement.
2340	 */
2341	vlanctl = 0;
2342	csum_flags = 0;
2343	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2344		csum_flags = RL_TDESC_CMD_LGSEND |
2345		    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2346		    RL_TDESC_CMD_MSSVAL_SHIFT);
2347	else {
2348		/*
2349		 * Unconditionally enable IP checksum if TCP or UDP
2350		 * checksum is required. Otherwise, TCP/UDP checksum
2351		 * does't make effects.
2352		 */
2353		if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2354			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2355				csum_flags |= RL_TDESC_CMD_IPCSUM;
2356				if (((*m_head)->m_pkthdr.csum_flags &
2357				    CSUM_TCP) != 0)
2358					csum_flags |= RL_TDESC_CMD_TCPCSUM;
2359				if (((*m_head)->m_pkthdr.csum_flags &
2360				    CSUM_UDP) != 0)
2361					csum_flags |= RL_TDESC_CMD_UDPCSUM;
2362			} else {
2363				vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2364				if (((*m_head)->m_pkthdr.csum_flags &
2365				    CSUM_TCP) != 0)
2366					vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2367				if (((*m_head)->m_pkthdr.csum_flags &
2368				    CSUM_UDP) != 0)
2369					vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2370			}
2371		}
2372	}
2373
2374	/*
2375	 * Set up hardware VLAN tagging. Note: vlan tag info must
2376	 * appear in all descriptors of a multi-descriptor
2377	 * transmission attempt.
2378	 */
2379	if ((*m_head)->m_flags & M_VLANTAG)
2380		vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2381		    RL_TDESC_VLANCTL_TAG;
2382
2383	si = prod;
2384	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2385		desc = &sc->rl_ldata.rl_tx_list[prod];
2386		desc->rl_vlanctl = htole32(vlanctl);
2387		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2388		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2389		cmdstat = segs[i].ds_len;
2390		if (i != 0)
2391			cmdstat |= RL_TDESC_CMD_OWN;
2392		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2393			cmdstat |= RL_TDESC_CMD_EOR;
2394		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2395		sc->rl_ldata.rl_tx_free--;
2396	}
2397	/* Update producer index. */
2398	sc->rl_ldata.rl_tx_prodidx = prod;
2399
2400	/* Set EOF on the last descriptor. */
2401	ei = RL_TX_DESC_PRV(sc, prod);
2402	desc = &sc->rl_ldata.rl_tx_list[ei];
2403	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2404
2405	desc = &sc->rl_ldata.rl_tx_list[si];
2406	/* Set SOF and transfer ownership of packet to the chip. */
2407	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2408
2409	/*
2410	 * Insure that the map for this transmission
2411	 * is placed at the array index of the last descriptor
2412	 * in this chain.  (Swap last and first dmamaps.)
2413	 */
2414	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2415	map = txd->tx_dmamap;
2416	txd->tx_dmamap = txd_last->tx_dmamap;
2417	txd_last->tx_dmamap = map;
2418	txd_last->tx_m = *m_head;
2419
2420	return (0);
2421}
2422
2423static void
2424re_tx_task(void *arg, int npending)
2425{
2426	struct ifnet		*ifp;
2427
2428	ifp = arg;
2429	re_start(ifp);
2430}
2431
2432/*
2433 * Main transmit routine for C+ and gigE NICs.
2434 */
2435static void
2436re_start(struct ifnet *ifp)
2437{
2438	struct rl_softc		*sc;
2439	struct mbuf		*m_head;
2440	int			queued;
2441
2442	sc = ifp->if_softc;
2443
2444	RL_LOCK(sc);
2445
2446	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2447	    IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2448		RL_UNLOCK(sc);
2449		return;
2450	}
2451
2452	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2453	    sc->rl_ldata.rl_tx_free > 1;) {
2454		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2455		if (m_head == NULL)
2456			break;
2457
2458		if (re_encap(sc, &m_head) != 0) {
2459			if (m_head == NULL)
2460				break;
2461			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2462			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2463			break;
2464		}
2465
2466		/*
2467		 * If there's a BPF listener, bounce a copy of this frame
2468		 * to him.
2469		 */
2470		ETHER_BPF_MTAP(ifp, m_head);
2471
2472		queued++;
2473	}
2474
2475	if (queued == 0) {
2476#ifdef RE_TX_MODERATION
2477		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2478			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2479#endif
2480		RL_UNLOCK(sc);
2481		return;
2482	}
2483
2484	/* Flush the TX descriptors */
2485
2486	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2487	    sc->rl_ldata.rl_tx_list_map,
2488	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2489
2490	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2491
2492#ifdef RE_TX_MODERATION
2493	/*
2494	 * Use the countdown timer for interrupt moderation.
2495	 * 'TX done' interrupts are disabled. Instead, we reset the
2496	 * countdown timer, which will begin counting until it hits
2497	 * the value in the TIMERINT register, and then trigger an
2498	 * interrupt. Each time we write to the TIMERCNT register,
2499	 * the timer count is reset to 0.
2500	 */
2501	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2502#endif
2503
2504	/*
2505	 * Set a timeout in case the chip goes out to lunch.
2506	 */
2507	sc->rl_watchdog_timer = 5;
2508
2509	RL_UNLOCK(sc);
2510}
2511
2512static void
2513re_init(void *xsc)
2514{
2515	struct rl_softc		*sc = xsc;
2516
2517	RL_LOCK(sc);
2518	re_init_locked(sc);
2519	RL_UNLOCK(sc);
2520}
2521
2522static void
2523re_init_locked(struct rl_softc *sc)
2524{
2525	struct ifnet		*ifp = sc->rl_ifp;
2526	struct mii_data		*mii;
2527	uint32_t		reg;
2528	uint16_t		cfg;
2529	union {
2530		uint32_t align_dummy;
2531		u_char eaddr[ETHER_ADDR_LEN];
2532        } eaddr;
2533
2534	RL_LOCK_ASSERT(sc);
2535
2536	mii = device_get_softc(sc->rl_miibus);
2537
2538	/*
2539	 * Cancel pending I/O and free all RX/TX buffers.
2540	 */
2541	re_stop(sc);
2542
2543	/* Put controller into known state. */
2544	re_reset(sc);
2545
2546	/*
2547	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2548	 * RX checksum offload. We must configure the C+ register
2549	 * before all others.
2550	 */
2551	cfg = RL_CPLUSCMD_PCI_MRW;
2552	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2553		cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2554	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2555		cfg |= RL_CPLUSCMD_VLANSTRIP;
2556	if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2557		cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2558		/* XXX magic. */
2559		cfg |= 0x0001;
2560	} else
2561		cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2562	CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2563	if (sc->rl_hwrev == RL_HWREV_8169_8110SC ||
2564	    sc->rl_hwrev == RL_HWREV_8169_8110SCE) {
2565		reg = 0x000fff00;
2566		if ((CSR_READ_1(sc, RL_CFG2) & RL_CFG2_PCI66MHZ) != 0)
2567			reg |= 0x000000ff;
2568		if (sc->rl_hwrev == RL_HWREV_8169_8110SCE)
2569			reg |= 0x00f00000;
2570		CSR_WRITE_4(sc, 0x7c, reg);
2571		/* Disable interrupt mitigation. */
2572		CSR_WRITE_2(sc, 0xe2, 0);
2573	}
2574	/*
2575	 * Disable TSO if interface MTU size is greater than MSS
2576	 * allowed in controller.
2577	 */
2578	if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2579		ifp->if_capenable &= ~IFCAP_TSO4;
2580		ifp->if_hwassist &= ~CSUM_TSO;
2581	}
2582
2583	/*
2584	 * Init our MAC address.  Even though the chipset
2585	 * documentation doesn't mention it, we need to enter "Config
2586	 * register write enable" mode to modify the ID registers.
2587	 */
2588	/* Copy MAC address on stack to align. */
2589	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2590	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2591	CSR_WRITE_4(sc, RL_IDR0,
2592	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2593	CSR_WRITE_4(sc, RL_IDR4,
2594	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2595	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2596
2597	/*
2598	 * For C+ mode, initialize the RX descriptors and mbufs.
2599	 */
2600	re_rx_list_init(sc);
2601	re_tx_list_init(sc);
2602
2603	/*
2604	 * Load the addresses of the RX and TX lists into the chip.
2605	 */
2606
2607	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2608	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2609	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2610	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2611
2612	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2613	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2614	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2615	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2616
2617	/*
2618	 * Enable transmit and receive.
2619	 */
2620	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2621
2622	/*
2623	 * Set the initial TX configuration.
2624	 */
2625	if (sc->rl_testmode) {
2626		if (sc->rl_type == RL_8169)
2627			CSR_WRITE_4(sc, RL_TXCFG,
2628			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2629		else
2630			CSR_WRITE_4(sc, RL_TXCFG,
2631			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2632	} else
2633		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2634
2635	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2636
2637	/*
2638	 * Set the initial RX configuration.
2639	 */
2640	re_set_rxmode(sc);
2641
2642#ifdef DEVICE_POLLING
2643	/*
2644	 * Disable interrupts if we are polling.
2645	 */
2646	if (ifp->if_capenable & IFCAP_POLLING)
2647		CSR_WRITE_2(sc, RL_IMR, 0);
2648	else	/* otherwise ... */
2649#endif
2650
2651	/*
2652	 * Enable interrupts.
2653	 */
2654	if (sc->rl_testmode)
2655		CSR_WRITE_2(sc, RL_IMR, 0);
2656	else
2657		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2658	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2659
2660	/* Set initial TX threshold */
2661	sc->rl_txthresh = RL_TX_THRESH_INIT;
2662
2663	/* Start RX/TX process. */
2664	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2665#ifdef notdef
2666	/* Enable receiver and transmitter. */
2667	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2668#endif
2669
2670#ifdef RE_TX_MODERATION
2671	/*
2672	 * Initialize the timer interrupt register so that
2673	 * a timer interrupt will be generated once the timer
2674	 * reaches a certain number of ticks. The timer is
2675	 * reloaded on each transmit. This gives us TX interrupt
2676	 * moderation, which dramatically improves TX frame rate.
2677	 */
2678	if (sc->rl_type == RL_8169)
2679		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2680	else
2681		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2682#endif
2683
2684	/*
2685	 * For 8169 gigE NICs, set the max allowed RX packet
2686	 * size so we can receive jumbo frames.
2687	 */
2688	if (sc->rl_type == RL_8169)
2689		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2690
2691	if (sc->rl_testmode)
2692		return;
2693
2694	mii_mediachg(mii);
2695
2696	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2697
2698	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2699	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2700
2701	sc->rl_flags &= ~RL_FLAG_LINK;
2702	sc->rl_watchdog_timer = 0;
2703	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2704}
2705
2706/*
2707 * Set media options.
2708 */
2709static int
2710re_ifmedia_upd(struct ifnet *ifp)
2711{
2712	struct rl_softc		*sc;
2713	struct mii_data		*mii;
2714	int			error;
2715
2716	sc = ifp->if_softc;
2717	mii = device_get_softc(sc->rl_miibus);
2718	RL_LOCK(sc);
2719	error = mii_mediachg(mii);
2720	RL_UNLOCK(sc);
2721
2722	return (error);
2723}
2724
2725/*
2726 * Report current media status.
2727 */
2728static void
2729re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2730{
2731	struct rl_softc		*sc;
2732	struct mii_data		*mii;
2733
2734	sc = ifp->if_softc;
2735	mii = device_get_softc(sc->rl_miibus);
2736
2737	RL_LOCK(sc);
2738	mii_pollstat(mii);
2739	RL_UNLOCK(sc);
2740	ifmr->ifm_active = mii->mii_media_active;
2741	ifmr->ifm_status = mii->mii_media_status;
2742}
2743
2744static int
2745re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2746{
2747	struct rl_softc		*sc = ifp->if_softc;
2748	struct ifreq		*ifr = (struct ifreq *) data;
2749	struct mii_data		*mii;
2750	int			error = 0;
2751
2752	switch (command) {
2753	case SIOCSIFMTU:
2754		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2755			error = EINVAL;
2756			break;
2757		}
2758		if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2759		    ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2760			error = EINVAL;
2761			break;
2762		}
2763		RL_LOCK(sc);
2764		if (ifp->if_mtu != ifr->ifr_mtu)
2765			ifp->if_mtu = ifr->ifr_mtu;
2766		if (ifp->if_mtu > RL_TSO_MTU &&
2767		    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2768			ifp->if_capenable &= ~IFCAP_TSO4;
2769			ifp->if_hwassist &= ~CSUM_TSO;
2770		}
2771		RL_UNLOCK(sc);
2772		break;
2773	case SIOCSIFFLAGS:
2774		RL_LOCK(sc);
2775		if ((ifp->if_flags & IFF_UP) != 0) {
2776			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2777				if (((ifp->if_flags ^ sc->rl_if_flags)
2778				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2779					re_set_rxmode(sc);
2780			} else
2781				re_init_locked(sc);
2782		} else {
2783			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2784				re_stop(sc);
2785		}
2786		sc->rl_if_flags = ifp->if_flags;
2787		RL_UNLOCK(sc);
2788		break;
2789	case SIOCADDMULTI:
2790	case SIOCDELMULTI:
2791		RL_LOCK(sc);
2792		re_set_rxmode(sc);
2793		RL_UNLOCK(sc);
2794		break;
2795	case SIOCGIFMEDIA:
2796	case SIOCSIFMEDIA:
2797		mii = device_get_softc(sc->rl_miibus);
2798		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2799		break;
2800	case SIOCSIFCAP:
2801	    {
2802		int mask, reinit;
2803
2804		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2805		reinit = 0;
2806#ifdef DEVICE_POLLING
2807		if (mask & IFCAP_POLLING) {
2808			if (ifr->ifr_reqcap & IFCAP_POLLING) {
2809				error = ether_poll_register(re_poll, ifp);
2810				if (error)
2811					return(error);
2812				RL_LOCK(sc);
2813				/* Disable interrupts */
2814				CSR_WRITE_2(sc, RL_IMR, 0x0000);
2815				ifp->if_capenable |= IFCAP_POLLING;
2816				RL_UNLOCK(sc);
2817			} else {
2818				error = ether_poll_deregister(ifp);
2819				/* Enable interrupts. */
2820				RL_LOCK(sc);
2821				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2822				ifp->if_capenable &= ~IFCAP_POLLING;
2823				RL_UNLOCK(sc);
2824			}
2825		}
2826#endif /* DEVICE_POLLING */
2827		if (mask & IFCAP_HWCSUM) {
2828			ifp->if_capenable ^= IFCAP_HWCSUM;
2829			if (ifp->if_capenable & IFCAP_TXCSUM)
2830				ifp->if_hwassist |= RE_CSUM_FEATURES;
2831			else
2832				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2833			reinit = 1;
2834		}
2835		if (mask & IFCAP_VLAN_HWTAGGING) {
2836			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2837			reinit = 1;
2838		}
2839		if (mask & IFCAP_TSO4) {
2840			ifp->if_capenable ^= IFCAP_TSO4;
2841			if ((IFCAP_TSO4 & ifp->if_capenable) &&
2842			    (IFCAP_TSO4 & ifp->if_capabilities))
2843				ifp->if_hwassist |= CSUM_TSO;
2844			else
2845				ifp->if_hwassist &= ~CSUM_TSO;
2846			if (ifp->if_mtu > RL_TSO_MTU &&
2847			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2848				ifp->if_capenable &= ~IFCAP_TSO4;
2849				ifp->if_hwassist &= ~CSUM_TSO;
2850			}
2851		}
2852		if ((mask & IFCAP_WOL) != 0 &&
2853		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
2854			if ((mask & IFCAP_WOL_UCAST) != 0)
2855				ifp->if_capenable ^= IFCAP_WOL_UCAST;
2856			if ((mask & IFCAP_WOL_MCAST) != 0)
2857				ifp->if_capenable ^= IFCAP_WOL_MCAST;
2858			if ((mask & IFCAP_WOL_MAGIC) != 0)
2859				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2860		}
2861		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2862			re_init(sc);
2863		VLAN_CAPABILITIES(ifp);
2864	    }
2865		break;
2866	default:
2867		error = ether_ioctl(ifp, command, data);
2868		break;
2869	}
2870
2871	return (error);
2872}
2873
2874static void
2875re_watchdog(struct rl_softc *sc)
2876{
2877	struct ifnet		*ifp;
2878
2879	RL_LOCK_ASSERT(sc);
2880
2881	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2882		return;
2883
2884	ifp = sc->rl_ifp;
2885	re_txeof(sc);
2886	if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2887		if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2888		    "-- recovering\n");
2889		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2890			taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2891		return;
2892	}
2893
2894	if_printf(ifp, "watchdog timeout\n");
2895	ifp->if_oerrors++;
2896
2897	re_rxeof(sc, NULL);
2898	re_init_locked(sc);
2899	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2900		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2901}
2902
2903/*
2904 * Stop the adapter and free any mbufs allocated to the
2905 * RX and TX lists.
2906 */
2907static void
2908re_stop(struct rl_softc *sc)
2909{
2910	int			i;
2911	struct ifnet		*ifp;
2912	struct rl_txdesc	*txd;
2913	struct rl_rxdesc	*rxd;
2914
2915	RL_LOCK_ASSERT(sc);
2916
2917	ifp = sc->rl_ifp;
2918
2919	sc->rl_watchdog_timer = 0;
2920	callout_stop(&sc->rl_stat_callout);
2921	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2922
2923	if ((sc->rl_flags & RL_FLAG_CMDSTOP) != 0)
2924		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_STOPREQ | RL_CMD_TX_ENB |
2925		    RL_CMD_RX_ENB);
2926	else
2927		CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2928	DELAY(1000);
2929	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2930	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2931
2932	if (sc->rl_head != NULL) {
2933		m_freem(sc->rl_head);
2934		sc->rl_head = sc->rl_tail = NULL;
2935	}
2936
2937	/* Free the TX list buffers. */
2938
2939	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2940		txd = &sc->rl_ldata.rl_tx_desc[i];
2941		if (txd->tx_m != NULL) {
2942			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2943			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2944			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2945			    txd->tx_dmamap);
2946			m_freem(txd->tx_m);
2947			txd->tx_m = NULL;
2948		}
2949	}
2950
2951	/* Free the RX list buffers. */
2952
2953	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2954		rxd = &sc->rl_ldata.rl_rx_desc[i];
2955		if (rxd->rx_m != NULL) {
2956			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2957			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2958			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2959			    rxd->rx_dmamap);
2960			m_freem(rxd->rx_m);
2961			rxd->rx_m = NULL;
2962		}
2963	}
2964}
2965
2966/*
2967 * Device suspend routine.  Stop the interface and save some PCI
2968 * settings in case the BIOS doesn't restore them properly on
2969 * resume.
2970 */
2971static int
2972re_suspend(device_t dev)
2973{
2974	struct rl_softc		*sc;
2975
2976	sc = device_get_softc(dev);
2977
2978	RL_LOCK(sc);
2979	re_stop(sc);
2980	re_setwol(sc);
2981	sc->suspended = 1;
2982	RL_UNLOCK(sc);
2983
2984	return (0);
2985}
2986
2987/*
2988 * Device resume routine.  Restore some PCI settings in case the BIOS
2989 * doesn't, re-enable busmastering, and restart the interface if
2990 * appropriate.
2991 */
2992static int
2993re_resume(device_t dev)
2994{
2995	struct rl_softc		*sc;
2996	struct ifnet		*ifp;
2997
2998	sc = device_get_softc(dev);
2999
3000	RL_LOCK(sc);
3001
3002	ifp = sc->rl_ifp;
3003	/* Take controller out of sleep mode. */
3004	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3005		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3006			CSR_WRITE_1(sc, RL_GPIO,
3007			    CSR_READ_1(sc, RL_GPIO) | 0x01);
3008	}
3009
3010	/* reinitialize interface if necessary */
3011	if (ifp->if_flags & IFF_UP)
3012		re_init_locked(sc);
3013
3014	/*
3015	 * Clear WOL matching such that normal Rx filtering
3016	 * wouldn't interfere with WOL patterns.
3017	 */
3018	re_clrwol(sc);
3019	sc->suspended = 0;
3020	RL_UNLOCK(sc);
3021
3022	return (0);
3023}
3024
3025/*
3026 * Stop all chip I/O so that the kernel's probe routines don't
3027 * get confused by errant DMAs when rebooting.
3028 */
3029static int
3030re_shutdown(device_t dev)
3031{
3032	struct rl_softc		*sc;
3033
3034	sc = device_get_softc(dev);
3035
3036	RL_LOCK(sc);
3037	re_stop(sc);
3038	/*
3039	 * Mark interface as down since otherwise we will panic if
3040	 * interrupt comes in later on, which can happen in some
3041	 * cases.
3042	 */
3043	sc->rl_ifp->if_flags &= ~IFF_UP;
3044	re_setwol(sc);
3045	RL_UNLOCK(sc);
3046
3047	return (0);
3048}
3049
3050static void
3051re_setwol(struct rl_softc *sc)
3052{
3053	struct ifnet		*ifp;
3054	int			pmc;
3055	uint16_t		pmstat;
3056	uint8_t			v;
3057
3058	RL_LOCK_ASSERT(sc);
3059
3060	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3061		return;
3062
3063	ifp = sc->rl_ifp;
3064	/* Put controller into sleep mode. */
3065	if ((sc->rl_flags & RL_FLAG_MACSLEEP) != 0) {
3066		if ((CSR_READ_1(sc, RL_MACDBG) & 0x80) == 0x80)
3067			CSR_WRITE_1(sc, RL_GPIO,
3068			    CSR_READ_1(sc, RL_GPIO) & ~0x01);
3069	}
3070	if ((ifp->if_capenable & IFCAP_WOL) != 0 &&
3071	    (sc->rl_flags & RL_FLAG_WOLRXENB) != 0)
3072		CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RX_ENB);
3073	/* Enable config register write. */
3074	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3075
3076	/* Enable PME. */
3077	v = CSR_READ_1(sc, RL_CFG1);
3078	v &= ~RL_CFG1_PME;
3079	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3080		v |= RL_CFG1_PME;
3081	CSR_WRITE_1(sc, RL_CFG1, v);
3082
3083	v = CSR_READ_1(sc, RL_CFG3);
3084	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3085	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3086		v |= RL_CFG3_WOL_MAGIC;
3087	CSR_WRITE_1(sc, RL_CFG3, v);
3088
3089	/* Config register write done. */
3090	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3091
3092	v = CSR_READ_1(sc, RL_CFG5);
3093	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3094	v &= ~RL_CFG5_WOL_LANWAKE;
3095	if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3096		v |= RL_CFG5_WOL_UCAST;
3097	if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3098		v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3099	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3100		v |= RL_CFG5_WOL_LANWAKE;
3101	CSR_WRITE_1(sc, RL_CFG5, v);
3102
3103	/*
3104	 * It seems that hardware resets its link speed to 100Mbps in
3105	 * power down mode so switching to 100Mbps in driver is not
3106	 * needed.
3107	 */
3108
3109	/* Request PME if WOL is requested. */
3110	pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3111	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3112	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3113		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3114	pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3115}
3116
3117static void
3118re_clrwol(struct rl_softc *sc)
3119{
3120	int			pmc;
3121	uint8_t			v;
3122
3123	RL_LOCK_ASSERT(sc);
3124
3125	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3126		return;
3127
3128	/* Enable config register write. */
3129	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3130
3131	v = CSR_READ_1(sc, RL_CFG3);
3132	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3133	CSR_WRITE_1(sc, RL_CFG3, v);
3134
3135	/* Config register write done. */
3136	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3137
3138	v = CSR_READ_1(sc, RL_CFG5);
3139	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3140	v &= ~RL_CFG5_WOL_LANWAKE;
3141	CSR_WRITE_1(sc, RL_CFG5, v);
3142}
3143