if_re.c revision 185896
1/*-
2 * Copyright (c) 1997, 1998-2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/dev/re/if_re.c 185896 2008-12-11 00:30:26Z yongari $");
35
36/*
37 * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38 *
39 * Written by Bill Paul <wpaul@windriver.com>
40 * Senior Networking Software Engineer
41 * Wind River Systems
42 */
43
44/*
45 * This driver is designed to support RealTek's next generation of
46 * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47 * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48 * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49 *
50 * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51 * with the older 8139 family, however it also supports a special
52 * C+ mode of operation that provides several new performance enhancing
53 * features. These include:
54 *
55 *	o Descriptor based DMA mechanism. Each descriptor represents
56 *	  a single packet fragment. Data buffers may be aligned on
57 *	  any byte boundary.
58 *
59 *	o 64-bit DMA
60 *
61 *	o TCP/IP checksum offload for both RX and TX
62 *
63 *	o High and normal priority transmit DMA rings
64 *
65 *	o VLAN tag insertion and extraction
66 *
67 *	o TCP large send (segmentation offload)
68 *
69 * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70 * programming API is fairly straightforward. The RX filtering, EEPROM
71 * access and PHY access is the same as it is on the older 8139 series
72 * chips.
73 *
74 * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75 * same programming API and feature set as the 8139C+ with the following
76 * differences and additions:
77 *
78 *	o 1000Mbps mode
79 *
80 *	o Jumbo frames
81 *
82 *	o GMII and TBI ports/registers for interfacing with copper
83 *	  or fiber PHYs
84 *
85 *	o RX and TX DMA rings can have up to 1024 descriptors
86 *	  (the 8139C+ allows a maximum of 64)
87 *
88 *	o Slight differences in register layout from the 8139C+
89 *
90 * The TX start and timer interrupt registers are at different locations
91 * on the 8169 than they are on the 8139C+. Also, the status word in the
92 * RX descriptor has a slightly different bit layout. The 8169 does not
93 * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94 * copper gigE PHY.
95 *
96 * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97 * (the 'S' stands for 'single-chip'). These devices have the same
98 * programming API as the older 8169, but also have some vendor-specific
99 * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100 * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101 *
102 * This driver takes advantage of the RX and TX checksum offload and
103 * VLAN tag insertion/extraction features. It also implements TX
104 * interrupt moderation using the timer interrupt registers, which
105 * significantly reduces TX interrupt load. There is also support
106 * for jumbo frames, however the 8169/8169S/8110S can not transmit
107 * jumbo frames larger than 7440, so the max MTU possible with this
108 * driver is 7422 bytes.
109 */
110
111#ifdef HAVE_KERNEL_OPTION_HEADERS
112#include "opt_device_polling.h"
113#endif
114
115#include <sys/param.h>
116#include <sys/endian.h>
117#include <sys/systm.h>
118#include <sys/sockio.h>
119#include <sys/mbuf.h>
120#include <sys/malloc.h>
121#include <sys/module.h>
122#include <sys/kernel.h>
123#include <sys/socket.h>
124#include <sys/lock.h>
125#include <sys/mutex.h>
126#include <sys/taskqueue.h>
127
128#include <net/if.h>
129#include <net/if_arp.h>
130#include <net/ethernet.h>
131#include <net/if_dl.h>
132#include <net/if_media.h>
133#include <net/if_types.h>
134#include <net/if_vlan_var.h>
135
136#include <net/bpf.h>
137
138#include <machine/bus.h>
139#include <machine/resource.h>
140#include <sys/bus.h>
141#include <sys/rman.h>
142
143#include <dev/mii/mii.h>
144#include <dev/mii/miivar.h>
145
146#include <dev/pci/pcireg.h>
147#include <dev/pci/pcivar.h>
148
149#include <pci/if_rlreg.h>
150
151MODULE_DEPEND(re, pci, 1, 1, 1);
152MODULE_DEPEND(re, ether, 1, 1, 1);
153MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155/* "device miibus" required.  See GENERIC if you get errors here. */
156#include "miibus_if.h"
157
158/* Tunables. */
159static int msi_disable = 1;
160TUNABLE_INT("hw.re.msi_disable", &msi_disable);
161
162#define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
163
164/*
165 * Various supported device vendors/types and their names.
166 */
167static struct rl_type re_devs[] = {
168	{ DLINK_VENDORID, DLINK_DEVICEID_528T, 0,
169	    "D-Link DGE-528(T) Gigabit Ethernet Adapter" },
170	{ RT_VENDORID, RT_DEVICEID_8139, 0,
171	    "RealTek 8139C+ 10/100BaseTX" },
172	{ RT_VENDORID, RT_DEVICEID_8101E, 0,
173	    "RealTek 8101E/8102E/8102EL PCIe 10/100baseTX" },
174	{ RT_VENDORID, RT_DEVICEID_8168, 0,
175	    "RealTek 8168/8168B/8168C/8168CP/8168D/8111B/8111C/8111CP PCIe "
176	    "Gigabit Ethernet" },
177	{ RT_VENDORID, RT_DEVICEID_8169, 0,
178	    "RealTek 8169/8169S/8169SB(L)/8110S/8110SB(L) Gigabit Ethernet" },
179	{ RT_VENDORID, RT_DEVICEID_8169SC, 0,
180	    "RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
181	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, 0,
182	    "Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
183	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, 0,
184	    "Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
185	{ USR_VENDORID, USR_DEVICEID_997902, 0,
186	    "US Robotics 997902 (RTL8169S) Gigabit Ethernet" }
187};
188
189static struct rl_hwrev re_hwrevs[] = {
190	{ RL_HWREV_8139, RL_8139,  "" },
191	{ RL_HWREV_8139A, RL_8139, "A" },
192	{ RL_HWREV_8139AG, RL_8139, "A-G" },
193	{ RL_HWREV_8139B, RL_8139, "B" },
194	{ RL_HWREV_8130, RL_8139, "8130" },
195	{ RL_HWREV_8139C, RL_8139, "C" },
196	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
197	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
198	{ RL_HWREV_8168_SPIN1, RL_8169, "8168"},
199	{ RL_HWREV_8169, RL_8169, "8169"},
200	{ RL_HWREV_8169S, RL_8169, "8169S"},
201	{ RL_HWREV_8110S, RL_8169, "8110S"},
202	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB"},
203	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC"},
204	{ RL_HWREV_8169_8110SBL, RL_8169, "8169SBL"},
205	{ RL_HWREV_8100, RL_8139, "8100"},
206	{ RL_HWREV_8101, RL_8139, "8101"},
207	{ RL_HWREV_8100E, RL_8169, "8100E"},
208	{ RL_HWREV_8101E, RL_8169, "8101E"},
209	{ RL_HWREV_8102E, RL_8169, "8102E"},
210	{ RL_HWREV_8102EL, RL_8169, "8102EL"},
211	{ RL_HWREV_8168_SPIN2, RL_8169, "8168"},
212	{ RL_HWREV_8168_SPIN3, RL_8169, "8168"},
213	{ RL_HWREV_8168C, RL_8169, "8168C/8111C"},
214	{ RL_HWREV_8168C_SPIN2, RL_8169, "8168C/8111C"},
215	{ RL_HWREV_8168CP, RL_8169, "8168CP/8111CP"},
216	{ RL_HWREV_8168D, RL_8169, "8168D"},
217	{ 0, 0, NULL }
218};
219
220static int re_probe		(device_t);
221static int re_attach		(device_t);
222static int re_detach		(device_t);
223
224static int re_encap		(struct rl_softc *, struct mbuf **);
225
226static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
227static int re_allocmem		(device_t, struct rl_softc *);
228static __inline void re_discard_rxbuf
229				(struct rl_softc *, int);
230static int re_newbuf		(struct rl_softc *, int);
231static int re_rx_list_init	(struct rl_softc *);
232static int re_tx_list_init	(struct rl_softc *);
233#ifdef RE_FIXUP_RX
234static __inline void re_fixup_rx
235				(struct mbuf *);
236#endif
237static int re_rxeof		(struct rl_softc *);
238static void re_txeof		(struct rl_softc *);
239#ifdef DEVICE_POLLING
240static void re_poll		(struct ifnet *, enum poll_cmd, int);
241static void re_poll_locked	(struct ifnet *, enum poll_cmd, int);
242#endif
243static int re_intr		(void *);
244static void re_tick		(void *);
245static void re_tx_task		(void *, int);
246static void re_int_task		(void *, int);
247static void re_start		(struct ifnet *);
248static int re_ioctl		(struct ifnet *, u_long, caddr_t);
249static void re_init		(void *);
250static void re_init_locked	(struct rl_softc *);
251static void re_stop		(struct rl_softc *);
252static void re_watchdog		(struct rl_softc *);
253static int re_suspend		(device_t);
254static int re_resume		(device_t);
255static int re_shutdown		(device_t);
256static int re_ifmedia_upd	(struct ifnet *);
257static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
258
259static void re_eeprom_putbyte	(struct rl_softc *, int);
260static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
261static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
262static int re_gmii_readreg	(device_t, int, int);
263static int re_gmii_writereg	(device_t, int, int, int);
264
265static int re_miibus_readreg	(device_t, int, int);
266static int re_miibus_writereg	(device_t, int, int, int);
267static void re_miibus_statchg	(device_t);
268
269static void re_setmulti		(struct rl_softc *);
270static void re_reset		(struct rl_softc *);
271static void re_setwol		(struct rl_softc *);
272static void re_clrwol		(struct rl_softc *);
273
274#ifdef RE_DIAG
275static int re_diag		(struct rl_softc *);
276#endif
277
278static device_method_t re_methods[] = {
279	/* Device interface */
280	DEVMETHOD(device_probe,		re_probe),
281	DEVMETHOD(device_attach,	re_attach),
282	DEVMETHOD(device_detach,	re_detach),
283	DEVMETHOD(device_suspend,	re_suspend),
284	DEVMETHOD(device_resume,	re_resume),
285	DEVMETHOD(device_shutdown,	re_shutdown),
286
287	/* bus interface */
288	DEVMETHOD(bus_print_child,	bus_generic_print_child),
289	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
290
291	/* MII interface */
292	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
293	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
294	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
295
296	{ 0, 0 }
297};
298
299static driver_t re_driver = {
300	"re",
301	re_methods,
302	sizeof(struct rl_softc)
303};
304
305static devclass_t re_devclass;
306
307DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
308DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
309DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
310
311#define EE_SET(x)					\
312	CSR_WRITE_1(sc, RL_EECMD,			\
313		CSR_READ_1(sc, RL_EECMD) | x)
314
315#define EE_CLR(x)					\
316	CSR_WRITE_1(sc, RL_EECMD,			\
317		CSR_READ_1(sc, RL_EECMD) & ~x)
318
319/*
320 * Send a read command and address to the EEPROM, check for ACK.
321 */
322static void
323re_eeprom_putbyte(struct rl_softc *sc, int addr)
324{
325	int			d, i;
326
327	d = addr | (RL_9346_READ << sc->rl_eewidth);
328
329	/*
330	 * Feed in each bit and strobe the clock.
331	 */
332
333	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
334		if (d & i) {
335			EE_SET(RL_EE_DATAIN);
336		} else {
337			EE_CLR(RL_EE_DATAIN);
338		}
339		DELAY(100);
340		EE_SET(RL_EE_CLK);
341		DELAY(150);
342		EE_CLR(RL_EE_CLK);
343		DELAY(100);
344	}
345}
346
347/*
348 * Read a word of data stored in the EEPROM at address 'addr.'
349 */
350static void
351re_eeprom_getword(struct rl_softc *sc, int addr, u_int16_t *dest)
352{
353	int			i;
354	u_int16_t		word = 0;
355
356	/*
357	 * Send address of word we want to read.
358	 */
359	re_eeprom_putbyte(sc, addr);
360
361	/*
362	 * Start reading bits from EEPROM.
363	 */
364	for (i = 0x8000; i; i >>= 1) {
365		EE_SET(RL_EE_CLK);
366		DELAY(100);
367		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
368			word |= i;
369		EE_CLR(RL_EE_CLK);
370		DELAY(100);
371	}
372
373	*dest = word;
374}
375
376/*
377 * Read a sequence of words from the EEPROM.
378 */
379static void
380re_read_eeprom(struct rl_softc *sc, caddr_t dest, int off, int cnt)
381{
382	int			i;
383	u_int16_t		word = 0, *ptr;
384
385	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
386
387        DELAY(100);
388
389	for (i = 0; i < cnt; i++) {
390		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
391		re_eeprom_getword(sc, off + i, &word);
392		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
393		ptr = (u_int16_t *)(dest + (i * 2));
394                *ptr = word;
395	}
396
397	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
398}
399
400static int
401re_gmii_readreg(device_t dev, int phy, int reg)
402{
403	struct rl_softc		*sc;
404	u_int32_t		rval;
405	int			i;
406
407	if (phy != 1)
408		return (0);
409
410	sc = device_get_softc(dev);
411
412	/* Let the rgephy driver read the GMEDIASTAT register */
413
414	if (reg == RL_GMEDIASTAT) {
415		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
416		return (rval);
417	}
418
419	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
420
421	for (i = 0; i < RL_TIMEOUT; i++) {
422		rval = CSR_READ_4(sc, RL_PHYAR);
423		if (rval & RL_PHYAR_BUSY)
424			break;
425		DELAY(100);
426	}
427
428	if (i == RL_TIMEOUT) {
429		device_printf(sc->rl_dev, "PHY read failed\n");
430		return (0);
431	}
432
433	return (rval & RL_PHYAR_PHYDATA);
434}
435
436static int
437re_gmii_writereg(device_t dev, int phy, int reg, int data)
438{
439	struct rl_softc		*sc;
440	u_int32_t		rval;
441	int			i;
442
443	sc = device_get_softc(dev);
444
445	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
446	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
447
448	for (i = 0; i < RL_TIMEOUT; i++) {
449		rval = CSR_READ_4(sc, RL_PHYAR);
450		if (!(rval & RL_PHYAR_BUSY))
451			break;
452		DELAY(100);
453	}
454
455	if (i == RL_TIMEOUT) {
456		device_printf(sc->rl_dev, "PHY write failed\n");
457		return (0);
458	}
459
460	return (0);
461}
462
463static int
464re_miibus_readreg(device_t dev, int phy, int reg)
465{
466	struct rl_softc		*sc;
467	u_int16_t		rval = 0;
468	u_int16_t		re8139_reg = 0;
469
470	sc = device_get_softc(dev);
471
472	if (sc->rl_type == RL_8169) {
473		rval = re_gmii_readreg(dev, phy, reg);
474		return (rval);
475	}
476
477	/* Pretend the internal PHY is only at address 0 */
478	if (phy) {
479		return (0);
480	}
481	switch (reg) {
482	case MII_BMCR:
483		re8139_reg = RL_BMCR;
484		break;
485	case MII_BMSR:
486		re8139_reg = RL_BMSR;
487		break;
488	case MII_ANAR:
489		re8139_reg = RL_ANAR;
490		break;
491	case MII_ANER:
492		re8139_reg = RL_ANER;
493		break;
494	case MII_ANLPAR:
495		re8139_reg = RL_LPAR;
496		break;
497	case MII_PHYIDR1:
498	case MII_PHYIDR2:
499		return (0);
500	/*
501	 * Allow the rlphy driver to read the media status
502	 * register. If we have a link partner which does not
503	 * support NWAY, this is the register which will tell
504	 * us the results of parallel detection.
505	 */
506	case RL_MEDIASTAT:
507		rval = CSR_READ_1(sc, RL_MEDIASTAT);
508		return (rval);
509	default:
510		device_printf(sc->rl_dev, "bad phy register\n");
511		return (0);
512	}
513	rval = CSR_READ_2(sc, re8139_reg);
514	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
515		/* 8139C+ has different bit layout. */
516		rval &= ~(BMCR_LOOP | BMCR_ISO);
517	}
518	return (rval);
519}
520
521static int
522re_miibus_writereg(device_t dev, int phy, int reg, int data)
523{
524	struct rl_softc		*sc;
525	u_int16_t		re8139_reg = 0;
526	int			rval = 0;
527
528	sc = device_get_softc(dev);
529
530	if (sc->rl_type == RL_8169) {
531		rval = re_gmii_writereg(dev, phy, reg, data);
532		return (rval);
533	}
534
535	/* Pretend the internal PHY is only at address 0 */
536	if (phy)
537		return (0);
538
539	switch (reg) {
540	case MII_BMCR:
541		re8139_reg = RL_BMCR;
542		if (sc->rl_type == RL_8139CPLUS) {
543			/* 8139C+ has different bit layout. */
544			data &= ~(BMCR_LOOP | BMCR_ISO);
545		}
546		break;
547	case MII_BMSR:
548		re8139_reg = RL_BMSR;
549		break;
550	case MII_ANAR:
551		re8139_reg = RL_ANAR;
552		break;
553	case MII_ANER:
554		re8139_reg = RL_ANER;
555		break;
556	case MII_ANLPAR:
557		re8139_reg = RL_LPAR;
558		break;
559	case MII_PHYIDR1:
560	case MII_PHYIDR2:
561		return (0);
562		break;
563	default:
564		device_printf(sc->rl_dev, "bad phy register\n");
565		return (0);
566	}
567	CSR_WRITE_2(sc, re8139_reg, data);
568	return (0);
569}
570
571static void
572re_miibus_statchg(device_t dev)
573{
574	struct rl_softc		*sc;
575	struct ifnet		*ifp;
576	struct mii_data		*mii;
577
578	sc = device_get_softc(dev);
579	mii = device_get_softc(sc->rl_miibus);
580	ifp = sc->rl_ifp;
581	if (mii == NULL || ifp == NULL ||
582	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
583		return;
584
585	sc->rl_flags &= ~RL_FLAG_LINK;
586	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
587	    (IFM_ACTIVE | IFM_AVALID)) {
588		switch (IFM_SUBTYPE(mii->mii_media_active)) {
589		case IFM_10_T:
590		case IFM_100_TX:
591			sc->rl_flags |= RL_FLAG_LINK;
592			break;
593		case IFM_1000_T:
594			if ((sc->rl_flags & RL_FLAG_FASTETHER) != 0)
595				break;
596			sc->rl_flags |= RL_FLAG_LINK;
597			break;
598		default:
599			break;
600		}
601	}
602	/*
603	 * RealTek controllers does not provide any interface to
604	 * Tx/Rx MACs for resolved speed, duplex and flow-control
605	 * parameters.
606	 */
607}
608
609/*
610 * Program the 64-bit multicast hash filter.
611 */
612static void
613re_setmulti(struct rl_softc *sc)
614{
615	struct ifnet		*ifp;
616	int			h = 0;
617	u_int32_t		hashes[2] = { 0, 0 };
618	struct ifmultiaddr	*ifma;
619	u_int32_t		rxfilt;
620	int			mcnt = 0;
621
622	RL_LOCK_ASSERT(sc);
623
624	ifp = sc->rl_ifp;
625
626
627	rxfilt = CSR_READ_4(sc, RL_RXCFG);
628	rxfilt &= ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_MULTI);
629	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
630		if (ifp->if_flags & IFF_PROMISC)
631			rxfilt |= RL_RXCFG_RX_ALLPHYS;
632		/*
633		 * Unlike other hardwares, we have to explicitly set
634		 * RL_RXCFG_RX_MULTI to receive multicast frames in
635		 * promiscuous mode.
636		 */
637		rxfilt |= RL_RXCFG_RX_MULTI;
638		CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
639		CSR_WRITE_4(sc, RL_MAR0, 0xFFFFFFFF);
640		CSR_WRITE_4(sc, RL_MAR4, 0xFFFFFFFF);
641		return;
642	}
643
644	/* first, zot all the existing hash bits */
645	CSR_WRITE_4(sc, RL_MAR0, 0);
646	CSR_WRITE_4(sc, RL_MAR4, 0);
647
648	/* now program new ones */
649	IF_ADDR_LOCK(ifp);
650	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
651		if (ifma->ifma_addr->sa_family != AF_LINK)
652			continue;
653		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
654		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
655		if (h < 32)
656			hashes[0] |= (1 << h);
657		else
658			hashes[1] |= (1 << (h - 32));
659		mcnt++;
660	}
661	IF_ADDR_UNLOCK(ifp);
662
663	if (mcnt)
664		rxfilt |= RL_RXCFG_RX_MULTI;
665	else
666		rxfilt &= ~RL_RXCFG_RX_MULTI;
667
668	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
669
670	/*
671	 * For some unfathomable reason, RealTek decided to reverse
672	 * the order of the multicast hash registers in the PCI Express
673	 * parts. This means we have to write the hash pattern in reverse
674	 * order for those devices.
675	 */
676
677	if ((sc->rl_flags & RL_FLAG_INVMAR) != 0) {
678		CSR_WRITE_4(sc, RL_MAR0, bswap32(hashes[1]));
679		CSR_WRITE_4(sc, RL_MAR4, bswap32(hashes[0]));
680	} else {
681		CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
682		CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
683	}
684}
685
686static void
687re_reset(struct rl_softc *sc)
688{
689	int			i;
690
691	RL_LOCK_ASSERT(sc);
692
693	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
694
695	for (i = 0; i < RL_TIMEOUT; i++) {
696		DELAY(10);
697		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
698			break;
699	}
700	if (i == RL_TIMEOUT)
701		device_printf(sc->rl_dev, "reset never completed!\n");
702
703	CSR_WRITE_1(sc, 0x82, 1);
704}
705
706#ifdef RE_DIAG
707
708/*
709 * The following routine is designed to test for a defect on some
710 * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
711 * lines connected to the bus, however for a 32-bit only card, they
712 * should be pulled high. The result of this defect is that the
713 * NIC will not work right if you plug it into a 64-bit slot: DMA
714 * operations will be done with 64-bit transfers, which will fail
715 * because the 64-bit data lines aren't connected.
716 *
717 * There's no way to work around this (short of talking a soldering
718 * iron to the board), however we can detect it. The method we use
719 * here is to put the NIC into digital loopback mode, set the receiver
720 * to promiscuous mode, and then try to send a frame. We then compare
721 * the frame data we sent to what was received. If the data matches,
722 * then the NIC is working correctly, otherwise we know the user has
723 * a defective NIC which has been mistakenly plugged into a 64-bit PCI
724 * slot. In the latter case, there's no way the NIC can work correctly,
725 * so we print out a message on the console and abort the device attach.
726 */
727
728static int
729re_diag(struct rl_softc *sc)
730{
731	struct ifnet		*ifp = sc->rl_ifp;
732	struct mbuf		*m0;
733	struct ether_header	*eh;
734	struct rl_desc		*cur_rx;
735	u_int16_t		status;
736	u_int32_t		rxstat;
737	int			total_len, i, error = 0, phyaddr;
738	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
739	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
740
741	/* Allocate a single mbuf */
742	MGETHDR(m0, M_DONTWAIT, MT_DATA);
743	if (m0 == NULL)
744		return (ENOBUFS);
745
746	RL_LOCK(sc);
747
748	/*
749	 * Initialize the NIC in test mode. This sets the chip up
750	 * so that it can send and receive frames, but performs the
751	 * following special functions:
752	 * - Puts receiver in promiscuous mode
753	 * - Enables digital loopback mode
754	 * - Leaves interrupts turned off
755	 */
756
757	ifp->if_flags |= IFF_PROMISC;
758	sc->rl_testmode = 1;
759	re_reset(sc);
760	re_init_locked(sc);
761	sc->rl_flags |= RL_FLAG_LINK;
762	if (sc->rl_type == RL_8169)
763		phyaddr = 1;
764	else
765		phyaddr = 0;
766
767	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
768	for (i = 0; i < RL_TIMEOUT; i++) {
769		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
770		if (!(status & BMCR_RESET))
771			break;
772	}
773
774	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
775	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
776
777	DELAY(100000);
778
779	/* Put some data in the mbuf */
780
781	eh = mtod(m0, struct ether_header *);
782	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
783	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
784	eh->ether_type = htons(ETHERTYPE_IP);
785	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
786
787	/*
788	 * Queue the packet, start transmission.
789	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
790	 */
791
792	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
793	RL_UNLOCK(sc);
794	/* XXX: re_diag must not be called when in ALTQ mode */
795	IF_HANDOFF(&ifp->if_snd, m0, ifp);
796	RL_LOCK(sc);
797	m0 = NULL;
798
799	/* Wait for it to propagate through the chip */
800
801	DELAY(100000);
802	for (i = 0; i < RL_TIMEOUT; i++) {
803		status = CSR_READ_2(sc, RL_ISR);
804		CSR_WRITE_2(sc, RL_ISR, status);
805		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
806		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
807			break;
808		DELAY(10);
809	}
810
811	if (i == RL_TIMEOUT) {
812		device_printf(sc->rl_dev,
813		    "diagnostic failed, failed to receive packet in"
814		    " loopback mode\n");
815		error = EIO;
816		goto done;
817	}
818
819	/*
820	 * The packet should have been dumped into the first
821	 * entry in the RX DMA ring. Grab it from there.
822	 */
823
824	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
825	    sc->rl_ldata.rl_rx_list_map,
826	    BUS_DMASYNC_POSTREAD);
827	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
828	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
829	    BUS_DMASYNC_POSTREAD);
830	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
831	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
832
833	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
834	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
835	eh = mtod(m0, struct ether_header *);
836
837	cur_rx = &sc->rl_ldata.rl_rx_list[0];
838	total_len = RL_RXBYTES(cur_rx);
839	rxstat = le32toh(cur_rx->rl_cmdstat);
840
841	if (total_len != ETHER_MIN_LEN) {
842		device_printf(sc->rl_dev,
843		    "diagnostic failed, received short packet\n");
844		error = EIO;
845		goto done;
846	}
847
848	/* Test that the received packet data matches what we sent. */
849
850	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
851	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
852	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
853		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
854		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
855		    dst, ":", src, ":", ETHERTYPE_IP);
856		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
857		    eh->ether_dhost, ":",  eh->ether_shost, ":",
858		    ntohs(eh->ether_type));
859		device_printf(sc->rl_dev, "You may have a defective 32-bit "
860		    "NIC plugged into a 64-bit PCI slot.\n");
861		device_printf(sc->rl_dev, "Please re-install the NIC in a "
862		    "32-bit slot for proper operation.\n");
863		device_printf(sc->rl_dev, "Read the re(4) man page for more "
864		    "details.\n");
865		error = EIO;
866	}
867
868done:
869	/* Turn interface off, release resources */
870
871	sc->rl_testmode = 0;
872	sc->rl_flags &= ~RL_FLAG_LINK;
873	ifp->if_flags &= ~IFF_PROMISC;
874	re_stop(sc);
875	if (m0 != NULL)
876		m_freem(m0);
877
878	RL_UNLOCK(sc);
879
880	return (error);
881}
882
883#endif
884
885/*
886 * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
887 * IDs against our list and return a device name if we find a match.
888 */
889static int
890re_probe(device_t dev)
891{
892	struct rl_type		*t;
893	uint16_t		devid, vendor;
894	uint16_t		revid, sdevid;
895	int			i;
896
897	vendor = pci_get_vendor(dev);
898	devid = pci_get_device(dev);
899	revid = pci_get_revid(dev);
900	sdevid = pci_get_subdevice(dev);
901
902	if (vendor == LINKSYS_VENDORID && devid == LINKSYS_DEVICEID_EG1032) {
903		if (sdevid != LINKSYS_SUBDEVICE_EG1032_REV3) {
904			/*
905			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
906			 * Rev. 2 is supported by sk(4).
907			 */
908			return (ENXIO);
909		}
910	}
911
912	if (vendor == RT_VENDORID && devid == RT_DEVICEID_8139) {
913		if (revid != 0x20) {
914			/* 8139, let rl(4) take care of this device. */
915			return (ENXIO);
916		}
917	}
918
919	t = re_devs;
920	for (i = 0; i < sizeof(re_devs) / sizeof(re_devs[0]); i++, t++) {
921		if (vendor == t->rl_vid && devid == t->rl_did) {
922			device_set_desc(dev, t->rl_name);
923			return (BUS_PROBE_DEFAULT);
924		}
925	}
926
927	return (ENXIO);
928}
929
930/*
931 * Map a single buffer address.
932 */
933
934static void
935re_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
936{
937	bus_addr_t		*addr;
938
939	if (error)
940		return;
941
942	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
943	addr = arg;
944	*addr = segs->ds_addr;
945}
946
947static int
948re_allocmem(device_t dev, struct rl_softc *sc)
949{
950	bus_size_t		rx_list_size, tx_list_size;
951	int			error;
952	int			i;
953
954	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
955	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
956
957	/*
958	 * Allocate the parent bus DMA tag appropriate for PCI.
959	 * In order to use DAC, RL_CPLUSCMD_PCI_DAC bit of RL_CPLUS_CMD
960	 * register should be set. However some RealTek chips are known
961	 * to be buggy on DAC handling, therefore disable DAC by limiting
962	 * DMA address space to 32bit. PCIe variants of RealTek chips
963	 * may not have the limitation but I took safer path.
964	 */
965	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
966	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
967	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
968	    NULL, NULL, &sc->rl_parent_tag);
969	if (error) {
970		device_printf(dev, "could not allocate parent DMA tag\n");
971		return (error);
972	}
973
974	/*
975	 * Allocate map for TX mbufs.
976	 */
977	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
978	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
979	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
980	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
981	if (error) {
982		device_printf(dev, "could not allocate TX DMA tag\n");
983		return (error);
984	}
985
986	/*
987	 * Allocate map for RX mbufs.
988	 */
989
990	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
991	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
992	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
993	if (error) {
994		device_printf(dev, "could not allocate RX DMA tag\n");
995		return (error);
996	}
997
998	/*
999	 * Allocate map for TX descriptor list.
1000	 */
1001	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1002	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1003	    NULL, tx_list_size, 1, tx_list_size, 0,
1004	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1005	if (error) {
1006		device_printf(dev, "could not allocate TX DMA ring tag\n");
1007		return (error);
1008	}
1009
1010	/* Allocate DMA'able memory for the TX ring */
1011
1012	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1013	    (void **)&sc->rl_ldata.rl_tx_list,
1014	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1015	    &sc->rl_ldata.rl_tx_list_map);
1016	if (error) {
1017		device_printf(dev, "could not allocate TX DMA ring\n");
1018		return (error);
1019	}
1020
1021	/* Load the map for the TX ring. */
1022
1023	sc->rl_ldata.rl_tx_list_addr = 0;
1024	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1025	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1026	     tx_list_size, re_dma_map_addr,
1027	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1028	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1029		device_printf(dev, "could not load TX DMA ring\n");
1030		return (ENOMEM);
1031	}
1032
1033	/* Create DMA maps for TX buffers */
1034
1035	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1036		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1037		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1038		if (error) {
1039			device_printf(dev, "could not create DMA map for TX\n");
1040			return (error);
1041		}
1042	}
1043
1044	/*
1045	 * Allocate map for RX descriptor list.
1046	 */
1047	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1048	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1049	    NULL, rx_list_size, 1, rx_list_size, 0,
1050	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1051	if (error) {
1052		device_printf(dev, "could not create RX DMA ring tag\n");
1053		return (error);
1054	}
1055
1056	/* Allocate DMA'able memory for the RX ring */
1057
1058	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1059	    (void **)&sc->rl_ldata.rl_rx_list,
1060	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1061	    &sc->rl_ldata.rl_rx_list_map);
1062	if (error) {
1063		device_printf(dev, "could not allocate RX DMA ring\n");
1064		return (error);
1065	}
1066
1067	/* Load the map for the RX ring. */
1068
1069	sc->rl_ldata.rl_rx_list_addr = 0;
1070	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1071	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1072	     rx_list_size, re_dma_map_addr,
1073	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1074	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1075		device_printf(dev, "could not load RX DMA ring\n");
1076		return (ENOMEM);
1077	}
1078
1079	/* Create DMA maps for RX buffers */
1080
1081	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1082	    &sc->rl_ldata.rl_rx_sparemap);
1083	if (error) {
1084		device_printf(dev, "could not create spare DMA map for RX\n");
1085		return (error);
1086	}
1087	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1088		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1089		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1090		if (error) {
1091			device_printf(dev, "could not create DMA map for RX\n");
1092			return (error);
1093		}
1094	}
1095
1096	return (0);
1097}
1098
1099/*
1100 * Attach the interface. Allocate softc structures, do ifmedia
1101 * setup and ethernet/BPF attach.
1102 */
1103static int
1104re_attach(device_t dev)
1105{
1106	u_char			eaddr[ETHER_ADDR_LEN];
1107	u_int16_t		as[ETHER_ADDR_LEN / 2];
1108	struct rl_softc		*sc;
1109	struct ifnet		*ifp;
1110	struct rl_hwrev		*hw_rev;
1111	int			hwrev;
1112	u_int16_t		devid, re_did = 0;
1113	int			error = 0, rid, i;
1114	int			msic, reg;
1115	uint8_t			cfg;
1116
1117	sc = device_get_softc(dev);
1118	sc->rl_dev = dev;
1119
1120	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1121	    MTX_DEF);
1122	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1123
1124	/*
1125	 * Map control/status registers.
1126	 */
1127	pci_enable_busmaster(dev);
1128
1129	devid = pci_get_device(dev);
1130	/* Prefer memory space register mapping over IO space. */
1131	sc->rl_res_id = PCIR_BAR(1);
1132	sc->rl_res_type = SYS_RES_MEMORY;
1133	/* RTL8168/8101E seems to use different BARs. */
1134	if (devid == RT_DEVICEID_8168 || devid == RT_DEVICEID_8101E)
1135		sc->rl_res_id = PCIR_BAR(2);
1136	sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1137	    &sc->rl_res_id, RF_ACTIVE);
1138
1139	if (sc->rl_res == NULL) {
1140		sc->rl_res_id = PCIR_BAR(0);
1141		sc->rl_res_type = SYS_RES_IOPORT;
1142		sc->rl_res = bus_alloc_resource_any(dev, sc->rl_res_type,
1143		    &sc->rl_res_id, RF_ACTIVE);
1144		if (sc->rl_res == NULL) {
1145			device_printf(dev, "couldn't map ports/memory\n");
1146			error = ENXIO;
1147			goto fail;
1148		}
1149	}
1150
1151	sc->rl_btag = rman_get_bustag(sc->rl_res);
1152	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1153
1154	msic = 0;
1155	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1156		msic = pci_msi_count(dev);
1157		if (bootverbose)
1158			device_printf(dev, "MSI count : %d\n", msic);
1159	}
1160	if (msic == RL_MSI_MESSAGES  && msi_disable == 0) {
1161		if (pci_alloc_msi(dev, &msic) == 0) {
1162			if (msic == RL_MSI_MESSAGES) {
1163				device_printf(dev, "Using %d MSI messages\n",
1164				    msic);
1165				sc->rl_flags |= RL_FLAG_MSI;
1166				/* Explicitly set MSI enable bit. */
1167				CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1168				cfg = CSR_READ_1(sc, RL_CFG2);
1169				cfg |= RL_CFG2_MSI;
1170				CSR_WRITE_1(sc, RL_CFG2, cfg);
1171				CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1172			} else
1173				pci_release_msi(dev);
1174		}
1175	}
1176
1177	/* Allocate interrupt */
1178	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1179		rid = 0;
1180		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1181		    RF_SHAREABLE | RF_ACTIVE);
1182		if (sc->rl_irq[0] == NULL) {
1183			device_printf(dev, "couldn't allocate IRQ resources\n");
1184			error = ENXIO;
1185			goto fail;
1186		}
1187	} else {
1188		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1189			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1190			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1191			if (sc->rl_irq[i] == NULL) {
1192				device_printf(dev,
1193				    "couldn't llocate IRQ resources for "
1194				    "message %d\n", rid);
1195				error = ENXIO;
1196				goto fail;
1197			}
1198		}
1199	}
1200
1201	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1202		CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1203		cfg = CSR_READ_1(sc, RL_CFG2);
1204		if ((cfg & RL_CFG2_MSI) != 0) {
1205			device_printf(dev, "turning off MSI enable bit.\n");
1206			cfg &= ~RL_CFG2_MSI;
1207			CSR_WRITE_1(sc, RL_CFG2, cfg);
1208		}
1209		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1210	}
1211
1212	/* Reset the adapter. */
1213	RL_LOCK(sc);
1214	re_reset(sc);
1215	RL_UNLOCK(sc);
1216
1217	hw_rev = re_hwrevs;
1218	hwrev = CSR_READ_4(sc, RL_TXCFG);
1219	device_printf(dev, "Chip rev. 0x%08x\n", hwrev & 0x7c800000);
1220	device_printf(dev, "MAC rev. 0x%08x\n", hwrev & 0x00700000);
1221	hwrev &= RL_TXCFG_HWREV;
1222	while (hw_rev->rl_desc != NULL) {
1223		if (hw_rev->rl_rev == hwrev) {
1224			sc->rl_type = hw_rev->rl_type;
1225			break;
1226		}
1227		hw_rev++;
1228	}
1229	if (hw_rev->rl_desc == NULL) {
1230		device_printf(dev, "Unknown H/W revision: 0x%08x\n", hwrev);
1231		error = ENXIO;
1232		goto fail;
1233	}
1234
1235	switch (hw_rev->rl_rev) {
1236	case RL_HWREV_8139CPLUS:
1237		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_FASTETHER;
1238		break;
1239	case RL_HWREV_8100E:
1240	case RL_HWREV_8101E:
1241		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_INVMAR |
1242		    RL_FLAG_PHYWAKE | RL_FLAG_FASTETHER;
1243		break;
1244	case RL_HWREV_8102E:
1245	case RL_HWREV_8102EL:
1246		sc->rl_flags |= RL_FLAG_NOJUMBO | RL_FLAG_INVMAR |
1247		    RL_FLAG_PHYWAKE | RL_FLAG_PAR | RL_FLAG_DESCV2 |
1248		    RL_FLAG_MACSTAT | RL_FLAG_FASTETHER;
1249		break;
1250	case RL_HWREV_8168_SPIN1:
1251	case RL_HWREV_8168_SPIN2:
1252	case RL_HWREV_8168_SPIN3:
1253		sc->rl_flags |= RL_FLAG_INVMAR | RL_FLAG_PHYWAKE |
1254		    RL_FLAG_MACSTAT;
1255		break;
1256	case RL_HWREV_8168C:
1257	case RL_HWREV_8168C_SPIN2:
1258	case RL_HWREV_8168CP:
1259	case RL_HWREV_8168D:
1260		sc->rl_flags |= RL_FLAG_INVMAR | RL_FLAG_PHYWAKE |
1261		    RL_FLAG_PAR | RL_FLAG_DESCV2 | RL_FLAG_MACSTAT;
1262		/*
1263		 * These controllers support jumbo frame but it seems
1264		 * that enabling it requires touching additional magic
1265		 * registers. Depending on MAC revisions some
1266		 * controllers need to disable checksum offload. So
1267		 * disable jumbo frame until I have better idea what
1268		 * it really requires to make it support.
1269		 * RTL8168C/CP : supports up to 6KB jumbo frame.
1270		 * RTL8111C/CP : supports up to 9KB jumbo frame.
1271		 */
1272		sc->rl_flags |= RL_FLAG_NOJUMBO;
1273		break;
1274	case RL_HWREV_8169_8110SB:
1275	case RL_HWREV_8169_8110SC:
1276	case RL_HWREV_8169_8110SBL:
1277		sc->rl_flags |= RL_FLAG_PHYWAKE;
1278		break;
1279	default:
1280		break;
1281	}
1282
1283	/* Enable PME. */
1284	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
1285	cfg = CSR_READ_1(sc, RL_CFG1);
1286	cfg |= RL_CFG1_PME;
1287	CSR_WRITE_1(sc, RL_CFG1, cfg);
1288	cfg = CSR_READ_1(sc, RL_CFG5);
1289	cfg &= RL_CFG5_PME_STS;
1290	CSR_WRITE_1(sc, RL_CFG5, cfg);
1291	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
1292
1293	if ((sc->rl_flags & RL_FLAG_PAR) != 0) {
1294		/*
1295		 * XXX Should have a better way to extract station
1296		 * address from EEPROM.
1297		 */
1298		for (i = 0; i < ETHER_ADDR_LEN; i++)
1299			eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1300	} else {
1301		sc->rl_eewidth = RL_9356_ADDR_LEN;
1302		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1303		if (re_did != 0x8129)
1304			sc->rl_eewidth = RL_9346_ADDR_LEN;
1305
1306		/*
1307		 * Get station address from the EEPROM.
1308		 */
1309		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1310		for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1311			as[i] = le16toh(as[i]);
1312		bcopy(as, eaddr, sizeof(eaddr));
1313	}
1314
1315	if (sc->rl_type == RL_8169) {
1316		/* Set RX length mask and number of descriptors. */
1317		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1318		sc->rl_txstart = RL_GTXSTART;
1319		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1320		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1321	} else {
1322		/* Set RX length mask and number of descriptors. */
1323		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1324		sc->rl_txstart = RL_TXSTART;
1325		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1326		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1327	}
1328
1329	error = re_allocmem(dev, sc);
1330	if (error)
1331		goto fail;
1332
1333	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1334	if (ifp == NULL) {
1335		device_printf(dev, "can not if_alloc()\n");
1336		error = ENOSPC;
1337		goto fail;
1338	}
1339
1340	/* Take PHY out of power down mode. */
1341	if ((sc->rl_flags & RL_FLAG_PHYWAKE) != 0) {
1342		re_gmii_writereg(dev, 1, 0x1f, 0);
1343		re_gmii_writereg(dev, 1, 0x0e, 0);
1344	}
1345
1346	/* Do MII setup */
1347	if (mii_phy_probe(dev, &sc->rl_miibus,
1348	    re_ifmedia_upd, re_ifmedia_sts)) {
1349		device_printf(dev, "MII without any phy!\n");
1350		error = ENXIO;
1351		goto fail;
1352	}
1353
1354	ifp->if_softc = sc;
1355	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1356	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1357	ifp->if_ioctl = re_ioctl;
1358	ifp->if_start = re_start;
1359	ifp->if_hwassist = RE_CSUM_FEATURES;
1360	ifp->if_capabilities = IFCAP_HWCSUM;
1361	ifp->if_capenable = ifp->if_capabilities;
1362	ifp->if_init = re_init;
1363	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1364	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1365	IFQ_SET_READY(&ifp->if_snd);
1366
1367	TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1368	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1369
1370	/*
1371	 * XXX
1372	 * Still have no idea how to make TSO work on 8168C, 8168CP,
1373	 * 8111C and 8111CP.
1374	 */
1375	if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1376		ifp->if_hwassist |= CSUM_TSO;
1377		ifp->if_capabilities |= IFCAP_TSO4;
1378	}
1379
1380	/*
1381	 * Call MI attach routine.
1382	 */
1383	ether_ifattach(ifp, eaddr);
1384
1385	/* VLAN capability setup */
1386	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1387	if (ifp->if_capabilities & IFCAP_HWCSUM)
1388		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1389	/* Enable WOL if PM is supported. */
1390	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &reg) == 0)
1391		ifp->if_capabilities |= IFCAP_WOL;
1392	ifp->if_capenable = ifp->if_capabilities;
1393	/*
1394	 * Don't enable TSO by default. Under certain
1395	 * circumtances the controller generated corrupted
1396	 * packets in TSO size.
1397	 */
1398	ifp->if_hwassist &= ~CSUM_TSO;
1399	ifp->if_capenable &= ~IFCAP_TSO4;
1400#ifdef DEVICE_POLLING
1401	ifp->if_capabilities |= IFCAP_POLLING;
1402#endif
1403	/*
1404	 * Tell the upper layer(s) we support long frames.
1405	 * Must appear after the call to ether_ifattach() because
1406	 * ether_ifattach() sets ifi_hdrlen to the default value.
1407	 */
1408	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1409
1410#ifdef RE_DIAG
1411	/*
1412	 * Perform hardware diagnostic on the original RTL8169.
1413	 * Some 32-bit cards were incorrectly wired and would
1414	 * malfunction if plugged into a 64-bit slot.
1415	 */
1416
1417	if (hwrev == RL_HWREV_8169) {
1418		error = re_diag(sc);
1419		if (error) {
1420			device_printf(dev,
1421		    	"attach aborted due to hardware diag failure\n");
1422			ether_ifdetach(ifp);
1423			goto fail;
1424		}
1425	}
1426#endif
1427
1428	/* Hook interrupt last to avoid having to lock softc */
1429	if ((sc->rl_flags & RL_FLAG_MSI) == 0)
1430		error = bus_setup_intr(dev, sc->rl_irq[0],
1431		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1432		    &sc->rl_intrhand[0]);
1433	else {
1434		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1435			error = bus_setup_intr(dev, sc->rl_irq[i],
1436			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1437		    	    &sc->rl_intrhand[i]);
1438			if (error != 0)
1439				break;
1440		}
1441	}
1442	if (error) {
1443		device_printf(dev, "couldn't set up irq\n");
1444		ether_ifdetach(ifp);
1445	}
1446
1447fail:
1448
1449	if (error)
1450		re_detach(dev);
1451
1452	return (error);
1453}
1454
1455/*
1456 * Shutdown hardware and free up resources. This can be called any
1457 * time after the mutex has been initialized. It is called in both
1458 * the error case in attach and the normal detach case so it needs
1459 * to be careful about only freeing resources that have actually been
1460 * allocated.
1461 */
1462static int
1463re_detach(device_t dev)
1464{
1465	struct rl_softc		*sc;
1466	struct ifnet		*ifp;
1467	int			i, rid;
1468
1469	sc = device_get_softc(dev);
1470	ifp = sc->rl_ifp;
1471	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1472
1473	/* These should only be active if attach succeeded */
1474	if (device_is_attached(dev)) {
1475#ifdef DEVICE_POLLING
1476		if (ifp->if_capenable & IFCAP_POLLING)
1477			ether_poll_deregister(ifp);
1478#endif
1479		RL_LOCK(sc);
1480#if 0
1481		sc->suspended = 1;
1482#endif
1483		re_stop(sc);
1484		RL_UNLOCK(sc);
1485		callout_drain(&sc->rl_stat_callout);
1486		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1487		taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1488		/*
1489		 * Force off the IFF_UP flag here, in case someone
1490		 * still had a BPF descriptor attached to this
1491		 * interface. If they do, ether_ifdetach() will cause
1492		 * the BPF code to try and clear the promisc mode
1493		 * flag, which will bubble down to re_ioctl(),
1494		 * which will try to call re_init() again. This will
1495		 * turn the NIC back on and restart the MII ticker,
1496		 * which will panic the system when the kernel tries
1497		 * to invoke the re_tick() function that isn't there
1498		 * anymore.
1499		 */
1500		ifp->if_flags &= ~IFF_UP;
1501		ether_ifdetach(ifp);
1502	}
1503	if (sc->rl_miibus)
1504		device_delete_child(dev, sc->rl_miibus);
1505	bus_generic_detach(dev);
1506
1507	/*
1508	 * The rest is resource deallocation, so we should already be
1509	 * stopped here.
1510	 */
1511
1512	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1513		if (sc->rl_intrhand[i] != NULL) {
1514			bus_teardown_intr(dev, sc->rl_irq[i],
1515			    sc->rl_intrhand[i]);
1516			sc->rl_intrhand[i] = NULL;
1517		}
1518	}
1519	if (ifp != NULL)
1520		if_free(ifp);
1521	if ((sc->rl_flags & RL_FLAG_MSI) == 0) {
1522		if (sc->rl_irq[0] != NULL) {
1523			bus_release_resource(dev, SYS_RES_IRQ, 0,
1524			    sc->rl_irq[0]);
1525			sc->rl_irq[0] = NULL;
1526		}
1527	} else {
1528		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1529			if (sc->rl_irq[i] != NULL) {
1530				bus_release_resource(dev, SYS_RES_IRQ, rid,
1531				    sc->rl_irq[i]);
1532				sc->rl_irq[i] = NULL;
1533			}
1534		}
1535		pci_release_msi(dev);
1536	}
1537	if (sc->rl_res)
1538		bus_release_resource(dev, sc->rl_res_type, sc->rl_res_id,
1539		    sc->rl_res);
1540
1541	/* Unload and free the RX DMA ring memory and map */
1542
1543	if (sc->rl_ldata.rl_rx_list_tag) {
1544		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1545		    sc->rl_ldata.rl_rx_list_map);
1546		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1547		    sc->rl_ldata.rl_rx_list,
1548		    sc->rl_ldata.rl_rx_list_map);
1549		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1550	}
1551
1552	/* Unload and free the TX DMA ring memory and map */
1553
1554	if (sc->rl_ldata.rl_tx_list_tag) {
1555		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1556		    sc->rl_ldata.rl_tx_list_map);
1557		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1558		    sc->rl_ldata.rl_tx_list,
1559		    sc->rl_ldata.rl_tx_list_map);
1560		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1561	}
1562
1563	/* Destroy all the RX and TX buffer maps */
1564
1565	if (sc->rl_ldata.rl_tx_mtag) {
1566		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1567			bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1568			    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1569		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1570	}
1571	if (sc->rl_ldata.rl_rx_mtag) {
1572		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1573			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1574			    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1575		if (sc->rl_ldata.rl_rx_sparemap)
1576			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1577			    sc->rl_ldata.rl_rx_sparemap);
1578		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1579	}
1580
1581	/* Unload and free the stats buffer and map */
1582
1583	if (sc->rl_ldata.rl_stag) {
1584		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1585		    sc->rl_ldata.rl_rx_list_map);
1586		bus_dmamem_free(sc->rl_ldata.rl_stag,
1587		    sc->rl_ldata.rl_stats,
1588		    sc->rl_ldata.rl_smap);
1589		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1590	}
1591
1592	if (sc->rl_parent_tag)
1593		bus_dma_tag_destroy(sc->rl_parent_tag);
1594
1595	mtx_destroy(&sc->rl_mtx);
1596
1597	return (0);
1598}
1599
1600static __inline void
1601re_discard_rxbuf(struct rl_softc *sc, int idx)
1602{
1603	struct rl_desc		*desc;
1604	struct rl_rxdesc	*rxd;
1605	uint32_t		cmdstat;
1606
1607	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1608	desc = &sc->rl_ldata.rl_rx_list[idx];
1609	desc->rl_vlanctl = 0;
1610	cmdstat = rxd->rx_size;
1611	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1612		cmdstat |= RL_RDESC_CMD_EOR;
1613	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1614}
1615
1616static int
1617re_newbuf(struct rl_softc *sc, int idx)
1618{
1619	struct mbuf		*m;
1620	struct rl_rxdesc	*rxd;
1621	bus_dma_segment_t	segs[1];
1622	bus_dmamap_t		map;
1623	struct rl_desc		*desc;
1624	uint32_t		cmdstat;
1625	int			error, nsegs;
1626
1627	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1628	if (m == NULL)
1629		return (ENOBUFS);
1630
1631	m->m_len = m->m_pkthdr.len = MCLBYTES;
1632#ifdef RE_FIXUP_RX
1633	/*
1634	 * This is part of an evil trick to deal with non-x86 platforms.
1635	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1636	 * boundaries, but that will hose non-x86 machines. To get around
1637	 * this, we leave some empty space at the start of each buffer
1638	 * and for non-x86 hosts, we copy the buffer back six bytes
1639	 * to achieve word alignment. This is slightly more efficient
1640	 * than allocating a new buffer, copying the contents, and
1641	 * discarding the old buffer.
1642	 */
1643	m_adj(m, RE_ETHER_ALIGN);
1644#endif
1645	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1646	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1647	if (error != 0) {
1648		m_freem(m);
1649		return (ENOBUFS);
1650	}
1651	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1652
1653	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1654	if (rxd->rx_m != NULL) {
1655		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1656		    BUS_DMASYNC_POSTREAD);
1657		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1658	}
1659
1660	rxd->rx_m = m;
1661	map = rxd->rx_dmamap;
1662	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1663	rxd->rx_size = segs[0].ds_len;
1664	sc->rl_ldata.rl_rx_sparemap = map;
1665	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1666	    BUS_DMASYNC_PREREAD);
1667
1668	desc = &sc->rl_ldata.rl_rx_list[idx];
1669	desc->rl_vlanctl = 0;
1670	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1671	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1672	cmdstat = segs[0].ds_len;
1673	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1674		cmdstat |= RL_RDESC_CMD_EOR;
1675	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1676
1677	return (0);
1678}
1679
1680#ifdef RE_FIXUP_RX
1681static __inline void
1682re_fixup_rx(struct mbuf *m)
1683{
1684	int                     i;
1685	uint16_t                *src, *dst;
1686
1687	src = mtod(m, uint16_t *);
1688	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1689
1690	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1691		*dst++ = *src++;
1692
1693	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1694}
1695#endif
1696
1697static int
1698re_tx_list_init(struct rl_softc *sc)
1699{
1700	struct rl_desc		*desc;
1701	int			i;
1702
1703	RL_LOCK_ASSERT(sc);
1704
1705	bzero(sc->rl_ldata.rl_tx_list,
1706	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1707	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1708		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1709	/* Set EOR. */
1710	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1711	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1712
1713	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1714	    sc->rl_ldata.rl_tx_list_map,
1715	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1716
1717	sc->rl_ldata.rl_tx_prodidx = 0;
1718	sc->rl_ldata.rl_tx_considx = 0;
1719	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1720
1721	return (0);
1722}
1723
1724static int
1725re_rx_list_init(struct rl_softc *sc)
1726{
1727	int			error, i;
1728
1729	bzero(sc->rl_ldata.rl_rx_list,
1730	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1731	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1732		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1733		if ((error = re_newbuf(sc, i)) != 0)
1734			return (error);
1735	}
1736
1737	/* Flush the RX descriptors */
1738
1739	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1740	    sc->rl_ldata.rl_rx_list_map,
1741	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1742
1743	sc->rl_ldata.rl_rx_prodidx = 0;
1744	sc->rl_head = sc->rl_tail = NULL;
1745
1746	return (0);
1747}
1748
1749/*
1750 * RX handler for C+ and 8169. For the gigE chips, we support
1751 * the reception of jumbo frames that have been fragmented
1752 * across multiple 2K mbuf cluster buffers.
1753 */
1754static int
1755re_rxeof(struct rl_softc *sc)
1756{
1757	struct mbuf		*m;
1758	struct ifnet		*ifp;
1759	int			i, total_len;
1760	struct rl_desc		*cur_rx;
1761	u_int32_t		rxstat, rxvlan;
1762	int			maxpkt = 16;
1763
1764	RL_LOCK_ASSERT(sc);
1765
1766	ifp = sc->rl_ifp;
1767
1768	/* Invalidate the descriptor memory */
1769
1770	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1771	    sc->rl_ldata.rl_rx_list_map,
1772	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1773
1774	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1775	    i = RL_RX_DESC_NXT(sc, i)) {
1776		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1777		rxstat = le32toh(cur_rx->rl_cmdstat);
1778		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1779			break;
1780		total_len = rxstat & sc->rl_rxlenmask;
1781		rxvlan = le32toh(cur_rx->rl_vlanctl);
1782		m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1783
1784		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1785			if (re_newbuf(sc, i) != 0) {
1786				/*
1787				 * If this is part of a multi-fragment packet,
1788				 * discard all the pieces.
1789				 */
1790				if (sc->rl_head != NULL) {
1791					m_freem(sc->rl_head);
1792					sc->rl_head = sc->rl_tail = NULL;
1793				}
1794				re_discard_rxbuf(sc, i);
1795				continue;
1796			}
1797			m->m_len = RE_RX_DESC_BUFLEN;
1798			if (sc->rl_head == NULL)
1799				sc->rl_head = sc->rl_tail = m;
1800			else {
1801				m->m_flags &= ~M_PKTHDR;
1802				sc->rl_tail->m_next = m;
1803				sc->rl_tail = m;
1804			}
1805			continue;
1806		}
1807
1808		/*
1809		 * NOTE: for the 8139C+, the frame length field
1810		 * is always 12 bits in size, but for the gigE chips,
1811		 * it is 13 bits (since the max RX frame length is 16K).
1812		 * Unfortunately, all 32 bits in the status word
1813		 * were already used, so to make room for the extra
1814		 * length bit, RealTek took out the 'frame alignment
1815		 * error' bit and shifted the other status bits
1816		 * over one slot. The OWN, EOR, FS and LS bits are
1817		 * still in the same places. We have already extracted
1818		 * the frame length and checked the OWN bit, so rather
1819		 * than using an alternate bit mapping, we shift the
1820		 * status bits one space to the right so we can evaluate
1821		 * them using the 8169 status as though it was in the
1822		 * same format as that of the 8139C+.
1823		 */
1824		if (sc->rl_type == RL_8169)
1825			rxstat >>= 1;
1826
1827		/*
1828		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1829		 * set, but if CRC is clear, it will still be a valid frame.
1830		 */
1831		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1832		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1833			ifp->if_ierrors++;
1834			/*
1835			 * If this is part of a multi-fragment packet,
1836			 * discard all the pieces.
1837			 */
1838			if (sc->rl_head != NULL) {
1839				m_freem(sc->rl_head);
1840				sc->rl_head = sc->rl_tail = NULL;
1841			}
1842			re_discard_rxbuf(sc, i);
1843			continue;
1844		}
1845
1846		/*
1847		 * If allocating a replacement mbuf fails,
1848		 * reload the current one.
1849		 */
1850
1851		if (re_newbuf(sc, i) != 0) {
1852			ifp->if_iqdrops++;
1853			if (sc->rl_head != NULL) {
1854				m_freem(sc->rl_head);
1855				sc->rl_head = sc->rl_tail = NULL;
1856			}
1857			re_discard_rxbuf(sc, i);
1858			continue;
1859		}
1860
1861		if (sc->rl_head != NULL) {
1862			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1863			if (m->m_len == 0)
1864				m->m_len = RE_RX_DESC_BUFLEN;
1865			/*
1866			 * Special case: if there's 4 bytes or less
1867			 * in this buffer, the mbuf can be discarded:
1868			 * the last 4 bytes is the CRC, which we don't
1869			 * care about anyway.
1870			 */
1871			if (m->m_len <= ETHER_CRC_LEN) {
1872				sc->rl_tail->m_len -=
1873				    (ETHER_CRC_LEN - m->m_len);
1874				m_freem(m);
1875			} else {
1876				m->m_len -= ETHER_CRC_LEN;
1877				m->m_flags &= ~M_PKTHDR;
1878				sc->rl_tail->m_next = m;
1879			}
1880			m = sc->rl_head;
1881			sc->rl_head = sc->rl_tail = NULL;
1882			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1883		} else
1884			m->m_pkthdr.len = m->m_len =
1885			    (total_len - ETHER_CRC_LEN);
1886
1887#ifdef RE_FIXUP_RX
1888		re_fixup_rx(m);
1889#endif
1890		ifp->if_ipackets++;
1891		m->m_pkthdr.rcvif = ifp;
1892
1893		/* Do RX checksumming if enabled */
1894
1895		if (ifp->if_capenable & IFCAP_RXCSUM) {
1896			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
1897				/* Check IP header checksum */
1898				if (rxstat & RL_RDESC_STAT_PROTOID)
1899					m->m_pkthdr.csum_flags |=
1900					    CSUM_IP_CHECKED;
1901				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1902					m->m_pkthdr.csum_flags |=
1903					    CSUM_IP_VALID;
1904
1905				/* Check TCP/UDP checksum */
1906				if ((RL_TCPPKT(rxstat) &&
1907				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1908				    (RL_UDPPKT(rxstat) &&
1909				     !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1910					m->m_pkthdr.csum_flags |=
1911						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1912					m->m_pkthdr.csum_data = 0xffff;
1913				}
1914			} else {
1915				/*
1916				 * RTL8168C/RTL816CP/RTL8111C/RTL8111CP
1917				 */
1918				if ((rxstat & RL_RDESC_STAT_PROTOID) &&
1919				    (rxvlan & RL_RDESC_IPV4))
1920					m->m_pkthdr.csum_flags |=
1921					    CSUM_IP_CHECKED;
1922				if (!(rxstat & RL_RDESC_STAT_IPSUMBAD) &&
1923				    (rxvlan & RL_RDESC_IPV4))
1924					m->m_pkthdr.csum_flags |=
1925					    CSUM_IP_VALID;
1926				if (((rxstat & RL_RDESC_STAT_TCP) &&
1927				    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1928				    ((rxstat & RL_RDESC_STAT_UDP) &&
1929				    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1930					m->m_pkthdr.csum_flags |=
1931						CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1932					m->m_pkthdr.csum_data = 0xffff;
1933				}
1934			}
1935		}
1936		maxpkt--;
1937		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1938			m->m_pkthdr.ether_vtag =
1939			    bswap16((rxvlan & RL_RDESC_VLANCTL_DATA));
1940			m->m_flags |= M_VLANTAG;
1941		}
1942		RL_UNLOCK(sc);
1943		(*ifp->if_input)(ifp, m);
1944		RL_LOCK(sc);
1945	}
1946
1947	/* Flush the RX DMA ring */
1948
1949	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1950	    sc->rl_ldata.rl_rx_list_map,
1951	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1952
1953	sc->rl_ldata.rl_rx_prodidx = i;
1954
1955	if (maxpkt)
1956		return(EAGAIN);
1957
1958	return(0);
1959}
1960
1961static void
1962re_txeof(struct rl_softc *sc)
1963{
1964	struct ifnet		*ifp;
1965	struct rl_txdesc	*txd;
1966	u_int32_t		txstat;
1967	int			cons;
1968
1969	cons = sc->rl_ldata.rl_tx_considx;
1970	if (cons == sc->rl_ldata.rl_tx_prodidx)
1971		return;
1972
1973	ifp = sc->rl_ifp;
1974	/* Invalidate the TX descriptor list */
1975	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1976	    sc->rl_ldata.rl_tx_list_map,
1977	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1978
1979	for (; cons != sc->rl_ldata.rl_tx_prodidx;
1980	    cons = RL_TX_DESC_NXT(sc, cons)) {
1981		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
1982		if (txstat & RL_TDESC_STAT_OWN)
1983			break;
1984		/*
1985		 * We only stash mbufs in the last descriptor
1986		 * in a fragment chain, which also happens to
1987		 * be the only place where the TX status bits
1988		 * are valid.
1989		 */
1990		if (txstat & RL_TDESC_CMD_EOF) {
1991			txd = &sc->rl_ldata.rl_tx_desc[cons];
1992			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
1993			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
1994			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
1995			    txd->tx_dmamap);
1996			KASSERT(txd->tx_m != NULL,
1997			    ("%s: freeing NULL mbufs!", __func__));
1998			m_freem(txd->tx_m);
1999			txd->tx_m = NULL;
2000			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
2001			    RL_TDESC_STAT_COLCNT))
2002				ifp->if_collisions++;
2003			if (txstat & RL_TDESC_STAT_TXERRSUM)
2004				ifp->if_oerrors++;
2005			else
2006				ifp->if_opackets++;
2007		}
2008		sc->rl_ldata.rl_tx_free++;
2009		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2010	}
2011	sc->rl_ldata.rl_tx_considx = cons;
2012
2013	/* No changes made to the TX ring, so no flush needed */
2014
2015	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
2016		/*
2017		 * Some chips will ignore a second TX request issued
2018		 * while an existing transmission is in progress. If
2019		 * the transmitter goes idle but there are still
2020		 * packets waiting to be sent, we need to restart the
2021		 * channel here to flush them out. This only seems to
2022		 * be required with the PCIe devices.
2023		 */
2024		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2025
2026#ifdef RE_TX_MODERATION
2027		/*
2028		 * If not all descriptors have been reaped yet, reload
2029		 * the timer so that we will eventually get another
2030		 * interrupt that will cause us to re-enter this routine.
2031		 * This is done in case the transmitter has gone idle.
2032		 */
2033		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2034#endif
2035	} else
2036		sc->rl_watchdog_timer = 0;
2037}
2038
2039static void
2040re_tick(void *xsc)
2041{
2042	struct rl_softc		*sc;
2043	struct mii_data		*mii;
2044
2045	sc = xsc;
2046
2047	RL_LOCK_ASSERT(sc);
2048
2049	mii = device_get_softc(sc->rl_miibus);
2050	mii_tick(mii);
2051	re_watchdog(sc);
2052	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2053}
2054
2055#ifdef DEVICE_POLLING
2056static void
2057re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2058{
2059	struct rl_softc *sc = ifp->if_softc;
2060
2061	RL_LOCK(sc);
2062	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2063		re_poll_locked(ifp, cmd, count);
2064	RL_UNLOCK(sc);
2065}
2066
2067static void
2068re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2069{
2070	struct rl_softc *sc = ifp->if_softc;
2071
2072	RL_LOCK_ASSERT(sc);
2073
2074	sc->rxcycles = count;
2075	re_rxeof(sc);
2076	re_txeof(sc);
2077
2078	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2079		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2080
2081	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2082		u_int16_t       status;
2083
2084		status = CSR_READ_2(sc, RL_ISR);
2085		if (status == 0xffff)
2086			return;
2087		if (status)
2088			CSR_WRITE_2(sc, RL_ISR, status);
2089
2090		/*
2091		 * XXX check behaviour on receiver stalls.
2092		 */
2093
2094		if (status & RL_ISR_SYSTEM_ERR) {
2095			re_reset(sc);
2096			re_init_locked(sc);
2097		}
2098	}
2099}
2100#endif /* DEVICE_POLLING */
2101
2102static int
2103re_intr(void *arg)
2104{
2105	struct rl_softc		*sc;
2106	uint16_t		status;
2107
2108	sc = arg;
2109
2110	status = CSR_READ_2(sc, RL_ISR);
2111	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2112                return (FILTER_STRAY);
2113	CSR_WRITE_2(sc, RL_IMR, 0);
2114
2115	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2116
2117	return (FILTER_HANDLED);
2118}
2119
2120static void
2121re_int_task(void *arg, int npending)
2122{
2123	struct rl_softc		*sc;
2124	struct ifnet		*ifp;
2125	u_int16_t		status;
2126	int			rval = 0;
2127
2128	sc = arg;
2129	ifp = sc->rl_ifp;
2130
2131	RL_LOCK(sc);
2132
2133	status = CSR_READ_2(sc, RL_ISR);
2134        CSR_WRITE_2(sc, RL_ISR, status);
2135
2136	if (sc->suspended ||
2137	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2138		RL_UNLOCK(sc);
2139		return;
2140	}
2141
2142#ifdef DEVICE_POLLING
2143	if  (ifp->if_capenable & IFCAP_POLLING) {
2144		RL_UNLOCK(sc);
2145		return;
2146	}
2147#endif
2148
2149	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2150		rval = re_rxeof(sc);
2151
2152	if (status & (
2153#ifdef RE_TX_MODERATION
2154	    RL_ISR_TIMEOUT_EXPIRED|
2155#else
2156	    RL_ISR_TX_OK|
2157#endif
2158	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2159		re_txeof(sc);
2160
2161	if (status & RL_ISR_SYSTEM_ERR) {
2162		re_reset(sc);
2163		re_init_locked(sc);
2164	}
2165
2166	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2167		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2168
2169	RL_UNLOCK(sc);
2170
2171        if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2172		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2173		return;
2174	}
2175
2176	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2177}
2178
2179static int
2180re_encap(struct rl_softc *sc, struct mbuf **m_head)
2181{
2182	struct rl_txdesc	*txd, *txd_last;
2183	bus_dma_segment_t	segs[RL_NTXSEGS];
2184	bus_dmamap_t		map;
2185	struct mbuf		*m_new;
2186	struct rl_desc		*desc;
2187	int			nsegs, prod;
2188	int			i, error, ei, si;
2189	int			padlen;
2190	uint32_t		cmdstat, csum_flags, vlanctl;
2191
2192	RL_LOCK_ASSERT(sc);
2193	M_ASSERTPKTHDR((*m_head));
2194
2195	/*
2196	 * With some of the RealTek chips, using the checksum offload
2197	 * support in conjunction with the autopadding feature results
2198	 * in the transmission of corrupt frames. For example, if we
2199	 * need to send a really small IP fragment that's less than 60
2200	 * bytes in size, and IP header checksumming is enabled, the
2201	 * resulting ethernet frame that appears on the wire will
2202	 * have garbled payload. To work around this, if TX IP checksum
2203	 * offload is enabled, we always manually pad short frames out
2204	 * to the minimum ethernet frame size.
2205	 */
2206	if ((sc->rl_flags & RL_FLAG_DESCV2) == 0 &&
2207	    (*m_head)->m_pkthdr.len < RL_IP4CSUMTX_PADLEN &&
2208	    ((*m_head)->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2209		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2210		if (M_WRITABLE(*m_head) == 0) {
2211			/* Get a writable copy. */
2212			m_new = m_dup(*m_head, M_DONTWAIT);
2213			m_freem(*m_head);
2214			if (m_new == NULL) {
2215				*m_head = NULL;
2216				return (ENOBUFS);
2217			}
2218			*m_head = m_new;
2219		}
2220		if ((*m_head)->m_next != NULL ||
2221		    M_TRAILINGSPACE(*m_head) < padlen) {
2222			m_new = m_defrag(*m_head, M_DONTWAIT);
2223			if (m_new == NULL) {
2224				m_freem(*m_head);
2225				*m_head = NULL;
2226				return (ENOBUFS);
2227			}
2228		} else
2229			m_new = *m_head;
2230
2231		/*
2232		 * Manually pad short frames, and zero the pad space
2233		 * to avoid leaking data.
2234		 */
2235		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2236		m_new->m_pkthdr.len += padlen;
2237		m_new->m_len = m_new->m_pkthdr.len;
2238		*m_head = m_new;
2239	}
2240
2241	prod = sc->rl_ldata.rl_tx_prodidx;
2242	txd = &sc->rl_ldata.rl_tx_desc[prod];
2243	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2244	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2245	if (error == EFBIG) {
2246		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2247		if (m_new == NULL) {
2248			m_freem(*m_head);
2249			*m_head = NULL;
2250			return (ENOBUFS);
2251		}
2252		*m_head = m_new;
2253		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2254		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2255		if (error != 0) {
2256			m_freem(*m_head);
2257			*m_head = NULL;
2258			return (error);
2259		}
2260	} else if (error != 0)
2261		return (error);
2262	if (nsegs == 0) {
2263		m_freem(*m_head);
2264		*m_head = NULL;
2265		return (EIO);
2266	}
2267
2268	/* Check for number of available descriptors. */
2269	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2270		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2271		return (ENOBUFS);
2272	}
2273
2274	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2275	    BUS_DMASYNC_PREWRITE);
2276
2277	/*
2278	 * Set up checksum offload. Note: checksum offload bits must
2279	 * appear in all descriptors of a multi-descriptor transmit
2280	 * attempt. This is according to testing done with an 8169
2281	 * chip. This is a requirement.
2282	 */
2283	vlanctl = 0;
2284	csum_flags = 0;
2285	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2286		csum_flags = RL_TDESC_CMD_LGSEND |
2287		    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2288		    RL_TDESC_CMD_MSSVAL_SHIFT);
2289	else {
2290		/*
2291		 * Unconditionally enable IP checksum if TCP or UDP
2292		 * checksum is required. Otherwise, TCP/UDP checksum
2293		 * does't make effects.
2294		 */
2295		if (((*m_head)->m_pkthdr.csum_flags & RE_CSUM_FEATURES) != 0) {
2296			if ((sc->rl_flags & RL_FLAG_DESCV2) == 0) {
2297				csum_flags |= RL_TDESC_CMD_IPCSUM;
2298				if (((*m_head)->m_pkthdr.csum_flags &
2299				    CSUM_TCP) != 0)
2300					csum_flags |= RL_TDESC_CMD_TCPCSUM;
2301				if (((*m_head)->m_pkthdr.csum_flags &
2302				    CSUM_UDP) != 0)
2303					csum_flags |= RL_TDESC_CMD_UDPCSUM;
2304			} else {
2305				vlanctl |= RL_TDESC_CMD_IPCSUMV2;
2306				if (((*m_head)->m_pkthdr.csum_flags &
2307				    CSUM_TCP) != 0)
2308					vlanctl |= RL_TDESC_CMD_TCPCSUMV2;
2309				if (((*m_head)->m_pkthdr.csum_flags &
2310				    CSUM_UDP) != 0)
2311					vlanctl |= RL_TDESC_CMD_UDPCSUMV2;
2312			}
2313		}
2314	}
2315
2316	/*
2317	 * Set up hardware VLAN tagging. Note: vlan tag info must
2318	 * appear in all descriptors of a multi-descriptor
2319	 * transmission attempt.
2320	 */
2321	if ((*m_head)->m_flags & M_VLANTAG)
2322		vlanctl |= bswap16((*m_head)->m_pkthdr.ether_vtag) |
2323		    RL_TDESC_VLANCTL_TAG;
2324
2325	si = prod;
2326	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2327		desc = &sc->rl_ldata.rl_tx_list[prod];
2328		desc->rl_vlanctl = htole32(vlanctl);
2329		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2330		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2331		cmdstat = segs[i].ds_len;
2332		if (i != 0)
2333			cmdstat |= RL_TDESC_CMD_OWN;
2334		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2335			cmdstat |= RL_TDESC_CMD_EOR;
2336		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2337		sc->rl_ldata.rl_tx_free--;
2338	}
2339	/* Update producer index. */
2340	sc->rl_ldata.rl_tx_prodidx = prod;
2341
2342	/* Set EOF on the last descriptor. */
2343	ei = RL_TX_DESC_PRV(sc, prod);
2344	desc = &sc->rl_ldata.rl_tx_list[ei];
2345	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2346
2347	desc = &sc->rl_ldata.rl_tx_list[si];
2348	/* Set SOF and transfer ownership of packet to the chip. */
2349	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2350
2351	/*
2352	 * Insure that the map for this transmission
2353	 * is placed at the array index of the last descriptor
2354	 * in this chain.  (Swap last and first dmamaps.)
2355	 */
2356	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2357	map = txd->tx_dmamap;
2358	txd->tx_dmamap = txd_last->tx_dmamap;
2359	txd_last->tx_dmamap = map;
2360	txd_last->tx_m = *m_head;
2361
2362	return (0);
2363}
2364
2365static void
2366re_tx_task(void *arg, int npending)
2367{
2368	struct ifnet		*ifp;
2369
2370	ifp = arg;
2371	re_start(ifp);
2372}
2373
2374/*
2375 * Main transmit routine for C+ and gigE NICs.
2376 */
2377static void
2378re_start(struct ifnet *ifp)
2379{
2380	struct rl_softc		*sc;
2381	struct mbuf		*m_head;
2382	int			queued;
2383
2384	sc = ifp->if_softc;
2385
2386	RL_LOCK(sc);
2387
2388	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2389	    IFF_DRV_RUNNING || (sc->rl_flags & RL_FLAG_LINK) == 0) {
2390		RL_UNLOCK(sc);
2391		return;
2392	}
2393
2394	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2395	    sc->rl_ldata.rl_tx_free > 1;) {
2396		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2397		if (m_head == NULL)
2398			break;
2399
2400		if (re_encap(sc, &m_head) != 0) {
2401			if (m_head == NULL)
2402				break;
2403			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2404			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2405			break;
2406		}
2407
2408		/*
2409		 * If there's a BPF listener, bounce a copy of this frame
2410		 * to him.
2411		 */
2412		ETHER_BPF_MTAP(ifp, m_head);
2413
2414		queued++;
2415	}
2416
2417	if (queued == 0) {
2418#ifdef RE_TX_MODERATION
2419		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2420			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2421#endif
2422		RL_UNLOCK(sc);
2423		return;
2424	}
2425
2426	/* Flush the TX descriptors */
2427
2428	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2429	    sc->rl_ldata.rl_tx_list_map,
2430	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2431
2432	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2433
2434#ifdef RE_TX_MODERATION
2435	/*
2436	 * Use the countdown timer for interrupt moderation.
2437	 * 'TX done' interrupts are disabled. Instead, we reset the
2438	 * countdown timer, which will begin counting until it hits
2439	 * the value in the TIMERINT register, and then trigger an
2440	 * interrupt. Each time we write to the TIMERCNT register,
2441	 * the timer count is reset to 0.
2442	 */
2443	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2444#endif
2445
2446	/*
2447	 * Set a timeout in case the chip goes out to lunch.
2448	 */
2449	sc->rl_watchdog_timer = 5;
2450
2451	RL_UNLOCK(sc);
2452}
2453
2454static void
2455re_init(void *xsc)
2456{
2457	struct rl_softc		*sc = xsc;
2458
2459	RL_LOCK(sc);
2460	re_init_locked(sc);
2461	RL_UNLOCK(sc);
2462}
2463
2464static void
2465re_init_locked(struct rl_softc *sc)
2466{
2467	struct ifnet		*ifp = sc->rl_ifp;
2468	struct mii_data		*mii;
2469	u_int32_t		rxcfg = 0;
2470	uint16_t		cfg;
2471	union {
2472		uint32_t align_dummy;
2473		u_char eaddr[ETHER_ADDR_LEN];
2474        } eaddr;
2475
2476	RL_LOCK_ASSERT(sc);
2477
2478	mii = device_get_softc(sc->rl_miibus);
2479
2480	/*
2481	 * Cancel pending I/O and free all RX/TX buffers.
2482	 */
2483	re_stop(sc);
2484
2485	/*
2486	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2487	 * RX checksum offload. We must configure the C+ register
2488	 * before all others.
2489	 */
2490	cfg = RL_CPLUSCMD_PCI_MRW;
2491	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
2492		cfg |= RL_CPLUSCMD_RXCSUM_ENB;
2493	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0)
2494		cfg |= RL_CPLUSCMD_VLANSTRIP;
2495	if ((sc->rl_flags & RL_FLAG_MACSTAT) != 0) {
2496		cfg |= RL_CPLUSCMD_MACSTAT_DIS;
2497		/* XXX magic. */
2498		cfg |= 0x0001;
2499	} else
2500		cfg |= RL_CPLUSCMD_RXENB | RL_CPLUSCMD_TXENB;
2501	CSR_WRITE_2(sc, RL_CPLUS_CMD, cfg);
2502	/*
2503	 * Disable TSO if interface MTU size is greater than MSS
2504	 * allowed in controller.
2505	 */
2506	if (ifp->if_mtu > RL_TSO_MTU && (ifp->if_capenable & IFCAP_TSO4) != 0) {
2507		ifp->if_capenable &= ~IFCAP_TSO4;
2508		ifp->if_hwassist &= ~CSUM_TSO;
2509	}
2510
2511	/*
2512	 * Init our MAC address.  Even though the chipset
2513	 * documentation doesn't mention it, we need to enter "Config
2514	 * register write enable" mode to modify the ID registers.
2515	 */
2516	/* Copy MAC address on stack to align. */
2517	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2518	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2519	CSR_WRITE_4(sc, RL_IDR0,
2520	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2521	CSR_WRITE_4(sc, RL_IDR4,
2522	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2523	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2524
2525	/*
2526	 * For C+ mode, initialize the RX descriptors and mbufs.
2527	 */
2528	re_rx_list_init(sc);
2529	re_tx_list_init(sc);
2530
2531	/*
2532	 * Load the addresses of the RX and TX lists into the chip.
2533	 */
2534
2535	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2536	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2537	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2538	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2539
2540	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2541	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2542	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2543	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2544
2545	/*
2546	 * Enable transmit and receive.
2547	 */
2548	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2549
2550	/*
2551	 * Set the initial TX and RX configuration.
2552	 */
2553	if (sc->rl_testmode) {
2554		if (sc->rl_type == RL_8169)
2555			CSR_WRITE_4(sc, RL_TXCFG,
2556			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2557		else
2558			CSR_WRITE_4(sc, RL_TXCFG,
2559			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2560	} else
2561		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2562
2563	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2564
2565	CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG);
2566
2567	/* Set the individual bit to receive frames for this host only. */
2568	rxcfg = CSR_READ_4(sc, RL_RXCFG);
2569	rxcfg |= RL_RXCFG_RX_INDIV;
2570
2571	/* If we want promiscuous mode, set the allframes bit. */
2572	if (ifp->if_flags & IFF_PROMISC)
2573		rxcfg |= RL_RXCFG_RX_ALLPHYS;
2574	else
2575		rxcfg &= ~RL_RXCFG_RX_ALLPHYS;
2576	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2577
2578	/*
2579	 * Set capture broadcast bit to capture broadcast frames.
2580	 */
2581	if (ifp->if_flags & IFF_BROADCAST)
2582		rxcfg |= RL_RXCFG_RX_BROAD;
2583	else
2584		rxcfg &= ~RL_RXCFG_RX_BROAD;
2585	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2586
2587	/*
2588	 * Program the multicast filter, if necessary.
2589	 */
2590	re_setmulti(sc);
2591
2592#ifdef DEVICE_POLLING
2593	/*
2594	 * Disable interrupts if we are polling.
2595	 */
2596	if (ifp->if_capenable & IFCAP_POLLING)
2597		CSR_WRITE_2(sc, RL_IMR, 0);
2598	else	/* otherwise ... */
2599#endif
2600
2601	/*
2602	 * Enable interrupts.
2603	 */
2604	if (sc->rl_testmode)
2605		CSR_WRITE_2(sc, RL_IMR, 0);
2606	else
2607		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2608	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2609
2610	/* Set initial TX threshold */
2611	sc->rl_txthresh = RL_TX_THRESH_INIT;
2612
2613	/* Start RX/TX process. */
2614	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2615#ifdef notdef
2616	/* Enable receiver and transmitter. */
2617	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2618#endif
2619
2620#ifdef RE_TX_MODERATION
2621	/*
2622	 * Initialize the timer interrupt register so that
2623	 * a timer interrupt will be generated once the timer
2624	 * reaches a certain number of ticks. The timer is
2625	 * reloaded on each transmit. This gives us TX interrupt
2626	 * moderation, which dramatically improves TX frame rate.
2627	 */
2628	if (sc->rl_type == RL_8169)
2629		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2630	else
2631		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2632#endif
2633
2634	/*
2635	 * For 8169 gigE NICs, set the max allowed RX packet
2636	 * size so we can receive jumbo frames.
2637	 */
2638	if (sc->rl_type == RL_8169)
2639		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2640
2641	if (sc->rl_testmode)
2642		return;
2643
2644	mii_mediachg(mii);
2645
2646	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2647
2648	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2649	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2650
2651	sc->rl_flags &= ~RL_FLAG_LINK;
2652	sc->rl_watchdog_timer = 0;
2653	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2654}
2655
2656/*
2657 * Set media options.
2658 */
2659static int
2660re_ifmedia_upd(struct ifnet *ifp)
2661{
2662	struct rl_softc		*sc;
2663	struct mii_data		*mii;
2664	int			error;
2665
2666	sc = ifp->if_softc;
2667	mii = device_get_softc(sc->rl_miibus);
2668	RL_LOCK(sc);
2669	error = mii_mediachg(mii);
2670	RL_UNLOCK(sc);
2671
2672	return (error);
2673}
2674
2675/*
2676 * Report current media status.
2677 */
2678static void
2679re_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2680{
2681	struct rl_softc		*sc;
2682	struct mii_data		*mii;
2683
2684	sc = ifp->if_softc;
2685	mii = device_get_softc(sc->rl_miibus);
2686
2687	RL_LOCK(sc);
2688	mii_pollstat(mii);
2689	RL_UNLOCK(sc);
2690	ifmr->ifm_active = mii->mii_media_active;
2691	ifmr->ifm_status = mii->mii_media_status;
2692}
2693
2694static int
2695re_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2696{
2697	struct rl_softc		*sc = ifp->if_softc;
2698	struct ifreq		*ifr = (struct ifreq *) data;
2699	struct mii_data		*mii;
2700	int			error = 0;
2701
2702	switch (command) {
2703	case SIOCSIFMTU:
2704		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > RL_JUMBO_MTU) {
2705			error = EINVAL;
2706			break;
2707		}
2708		if ((sc->rl_flags & RL_FLAG_NOJUMBO) != 0 &&
2709		    ifr->ifr_mtu > RL_MAX_FRAMELEN) {
2710			error = EINVAL;
2711			break;
2712		}
2713		RL_LOCK(sc);
2714		if (ifp->if_mtu != ifr->ifr_mtu)
2715			ifp->if_mtu = ifr->ifr_mtu;
2716		if (ifp->if_mtu > RL_TSO_MTU &&
2717		    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2718			ifp->if_capenable &= ~IFCAP_TSO4;
2719			ifp->if_hwassist &= ~CSUM_TSO;
2720		}
2721		RL_UNLOCK(sc);
2722		break;
2723	case SIOCSIFFLAGS:
2724		RL_LOCK(sc);
2725		if ((ifp->if_flags & IFF_UP) != 0) {
2726			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2727				if (((ifp->if_flags ^ sc->rl_if_flags)
2728				    & (IFF_PROMISC | IFF_ALLMULTI)) != 0)
2729					re_setmulti(sc);
2730			} else
2731				re_init_locked(sc);
2732		} else {
2733			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2734				re_stop(sc);
2735		}
2736		sc->rl_if_flags = ifp->if_flags;
2737		RL_UNLOCK(sc);
2738		break;
2739	case SIOCADDMULTI:
2740	case SIOCDELMULTI:
2741		RL_LOCK(sc);
2742		re_setmulti(sc);
2743		RL_UNLOCK(sc);
2744		break;
2745	case SIOCGIFMEDIA:
2746	case SIOCSIFMEDIA:
2747		mii = device_get_softc(sc->rl_miibus);
2748		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2749		break;
2750	case SIOCSIFCAP:
2751	    {
2752		int mask, reinit;
2753
2754		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2755		reinit = 0;
2756#ifdef DEVICE_POLLING
2757		if (mask & IFCAP_POLLING) {
2758			if (ifr->ifr_reqcap & IFCAP_POLLING) {
2759				error = ether_poll_register(re_poll, ifp);
2760				if (error)
2761					return(error);
2762				RL_LOCK(sc);
2763				/* Disable interrupts */
2764				CSR_WRITE_2(sc, RL_IMR, 0x0000);
2765				ifp->if_capenable |= IFCAP_POLLING;
2766				RL_UNLOCK(sc);
2767			} else {
2768				error = ether_poll_deregister(ifp);
2769				/* Enable interrupts. */
2770				RL_LOCK(sc);
2771				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2772				ifp->if_capenable &= ~IFCAP_POLLING;
2773				RL_UNLOCK(sc);
2774			}
2775		}
2776#endif /* DEVICE_POLLING */
2777		if (mask & IFCAP_HWCSUM) {
2778			ifp->if_capenable ^= IFCAP_HWCSUM;
2779			if (ifp->if_capenable & IFCAP_TXCSUM)
2780				ifp->if_hwassist |= RE_CSUM_FEATURES;
2781			else
2782				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2783			reinit = 1;
2784		}
2785		if (mask & IFCAP_VLAN_HWTAGGING) {
2786			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2787			reinit = 1;
2788		}
2789		if (mask & IFCAP_TSO4) {
2790			ifp->if_capenable ^= IFCAP_TSO4;
2791			if ((IFCAP_TSO4 & ifp->if_capenable) &&
2792			    (IFCAP_TSO4 & ifp->if_capabilities))
2793				ifp->if_hwassist |= CSUM_TSO;
2794			else
2795				ifp->if_hwassist &= ~CSUM_TSO;
2796			if (ifp->if_mtu > RL_TSO_MTU &&
2797			    (ifp->if_capenable & IFCAP_TSO4) != 0) {
2798				ifp->if_capenable &= ~IFCAP_TSO4;
2799				ifp->if_hwassist &= ~CSUM_TSO;
2800			}
2801		}
2802		if ((mask & IFCAP_WOL) != 0 &&
2803		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
2804			if ((mask & IFCAP_WOL_UCAST) != 0)
2805				ifp->if_capenable ^= IFCAP_WOL_UCAST;
2806			if ((mask & IFCAP_WOL_MCAST) != 0)
2807				ifp->if_capenable ^= IFCAP_WOL_MCAST;
2808			if ((mask & IFCAP_WOL_MAGIC) != 0)
2809				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2810		}
2811		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2812			re_init(sc);
2813		VLAN_CAPABILITIES(ifp);
2814	    }
2815		break;
2816	default:
2817		error = ether_ioctl(ifp, command, data);
2818		break;
2819	}
2820
2821	return (error);
2822}
2823
2824static void
2825re_watchdog(struct rl_softc *sc)
2826{
2827	struct ifnet		*ifp;
2828
2829	RL_LOCK_ASSERT(sc);
2830
2831	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2832		return;
2833
2834	ifp = sc->rl_ifp;
2835	re_txeof(sc);
2836	if (sc->rl_ldata.rl_tx_free == sc->rl_ldata.rl_tx_desc_cnt) {
2837		if_printf(ifp, "watchdog timeout (missed Tx interrupts) "
2838		    "-- recovering\n");
2839		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2840			taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2841		return;
2842	}
2843
2844	if_printf(ifp, "watchdog timeout\n");
2845	ifp->if_oerrors++;
2846
2847	re_rxeof(sc);
2848	re_init_locked(sc);
2849	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2850		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2851}
2852
2853/*
2854 * Stop the adapter and free any mbufs allocated to the
2855 * RX and TX lists.
2856 */
2857static void
2858re_stop(struct rl_softc *sc)
2859{
2860	int			i;
2861	struct ifnet		*ifp;
2862	struct rl_txdesc	*txd;
2863	struct rl_rxdesc	*rxd;
2864
2865	RL_LOCK_ASSERT(sc);
2866
2867	ifp = sc->rl_ifp;
2868
2869	sc->rl_watchdog_timer = 0;
2870	callout_stop(&sc->rl_stat_callout);
2871	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2872
2873	CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2874	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2875	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2876
2877	if (sc->rl_head != NULL) {
2878		m_freem(sc->rl_head);
2879		sc->rl_head = sc->rl_tail = NULL;
2880	}
2881
2882	/* Free the TX list buffers. */
2883
2884	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2885		txd = &sc->rl_ldata.rl_tx_desc[i];
2886		if (txd->tx_m != NULL) {
2887			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2888			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2889			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2890			    txd->tx_dmamap);
2891			m_freem(txd->tx_m);
2892			txd->tx_m = NULL;
2893		}
2894	}
2895
2896	/* Free the RX list buffers. */
2897
2898	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2899		rxd = &sc->rl_ldata.rl_rx_desc[i];
2900		if (rxd->rx_m != NULL) {
2901			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2902			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2903			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2904			    rxd->rx_dmamap);
2905			m_freem(rxd->rx_m);
2906			rxd->rx_m = NULL;
2907		}
2908	}
2909}
2910
2911/*
2912 * Device suspend routine.  Stop the interface and save some PCI
2913 * settings in case the BIOS doesn't restore them properly on
2914 * resume.
2915 */
2916static int
2917re_suspend(device_t dev)
2918{
2919	struct rl_softc		*sc;
2920
2921	sc = device_get_softc(dev);
2922
2923	RL_LOCK(sc);
2924	re_stop(sc);
2925	re_setwol(sc);
2926	sc->suspended = 1;
2927	RL_UNLOCK(sc);
2928
2929	return (0);
2930}
2931
2932/*
2933 * Device resume routine.  Restore some PCI settings in case the BIOS
2934 * doesn't, re-enable busmastering, and restart the interface if
2935 * appropriate.
2936 */
2937static int
2938re_resume(device_t dev)
2939{
2940	struct rl_softc		*sc;
2941	struct ifnet		*ifp;
2942
2943	sc = device_get_softc(dev);
2944
2945	RL_LOCK(sc);
2946
2947	ifp = sc->rl_ifp;
2948
2949	/* reinitialize interface if necessary */
2950	if (ifp->if_flags & IFF_UP)
2951		re_init_locked(sc);
2952
2953	/*
2954	 * Clear WOL matching such that normal Rx filtering
2955	 * wouldn't interfere with WOL patterns.
2956	 */
2957	re_clrwol(sc);
2958	sc->suspended = 0;
2959	RL_UNLOCK(sc);
2960
2961	return (0);
2962}
2963
2964/*
2965 * Stop all chip I/O so that the kernel's probe routines don't
2966 * get confused by errant DMAs when rebooting.
2967 */
2968static int
2969re_shutdown(device_t dev)
2970{
2971	struct rl_softc		*sc;
2972
2973	sc = device_get_softc(dev);
2974
2975	RL_LOCK(sc);
2976	re_stop(sc);
2977	/*
2978	 * Mark interface as down since otherwise we will panic if
2979	 * interrupt comes in later on, which can happen in some
2980	 * cases.
2981	 */
2982	sc->rl_ifp->if_flags &= ~IFF_UP;
2983	re_setwol(sc);
2984	RL_UNLOCK(sc);
2985
2986	return (0);
2987}
2988
2989static void
2990re_setwol(struct rl_softc *sc)
2991{
2992	struct ifnet		*ifp;
2993	int			pmc;
2994	uint16_t		pmstat;
2995	uint8_t			v;
2996
2997	RL_LOCK_ASSERT(sc);
2998
2999	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3000		return;
3001
3002	ifp = sc->rl_ifp;
3003	/* Enable config register write. */
3004	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3005
3006	/* Enable PME. */
3007	v = CSR_READ_1(sc, RL_CFG1);
3008	v &= ~RL_CFG1_PME;
3009	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3010		v |= RL_CFG1_PME;
3011	CSR_WRITE_1(sc, RL_CFG1, v);
3012
3013	v = CSR_READ_1(sc, RL_CFG3);
3014	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3015	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) != 0)
3016		v |= RL_CFG3_WOL_MAGIC;
3017	CSR_WRITE_1(sc, RL_CFG3, v);
3018
3019	/* Config register write done. */
3020	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3021
3022	v = CSR_READ_1(sc, RL_CFG5);
3023	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3024	v &= ~RL_CFG5_WOL_LANWAKE;
3025	if ((ifp->if_capenable & IFCAP_WOL_UCAST) != 0)
3026		v |= RL_CFG5_WOL_UCAST;
3027	if ((ifp->if_capenable & IFCAP_WOL_MCAST) != 0)
3028		v |= RL_CFG5_WOL_MCAST | RL_CFG5_WOL_BCAST;
3029	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3030		v |= RL_CFG5_WOL_LANWAKE;
3031	CSR_WRITE_1(sc, RL_CFG5, v);
3032
3033	/*
3034	 * It seems that hardware resets its link speed to 100Mbps in
3035	 * power down mode so switching to 100Mbps in driver is not
3036	 * needed.
3037	 */
3038
3039	/* Request PME if WOL is requested. */
3040	pmstat = pci_read_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, 2);
3041	pmstat &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
3042	if ((ifp->if_capenable & IFCAP_WOL) != 0)
3043		pmstat |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
3044	pci_write_config(sc->rl_dev, pmc + PCIR_POWER_STATUS, pmstat, 2);
3045}
3046
3047static void
3048re_clrwol(struct rl_softc *sc)
3049{
3050	int			pmc;
3051	uint8_t			v;
3052
3053	RL_LOCK_ASSERT(sc);
3054
3055	if (pci_find_extcap(sc->rl_dev, PCIY_PMG, &pmc) != 0)
3056		return;
3057
3058	/* Enable config register write. */
3059	CSR_WRITE_1(sc, RL_EECMD, RL_EE_MODE);
3060
3061	v = CSR_READ_1(sc, RL_CFG3);
3062	v &= ~(RL_CFG3_WOL_LINK | RL_CFG3_WOL_MAGIC);
3063	CSR_WRITE_1(sc, RL_CFG3, v);
3064
3065	/* Config register write done. */
3066	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
3067
3068	v = CSR_READ_1(sc, RL_CFG5);
3069	v &= ~(RL_CFG5_WOL_BCAST | RL_CFG5_WOL_MCAST | RL_CFG5_WOL_UCAST);
3070	v &= ~RL_CFG5_WOL_LANWAKE;
3071	CSR_WRITE_1(sc, RL_CFG5, v);
3072}
3073