if_re.c revision 175418
1/*-
2 * Copyright (c) 1997, 1998-2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/dev/re/if_re.c 175418 2008-01-17 23:37:47Z jhb $");
35
36/*
37 * RealTek 8139C+/8169/8169S/8110S/8168/8111/8101E PCI NIC driver
38 *
39 * Written by Bill Paul <wpaul@windriver.com>
40 * Senior Networking Software Engineer
41 * Wind River Systems
42 */
43
44/*
45 * This driver is designed to support RealTek's next generation of
46 * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47 * seven devices in this family: the RTL8139C+, the RTL8169, the RTL8169S,
48 * RTL8110S, the RTL8168, the RTL8111 and the RTL8101E.
49 *
50 * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51 * with the older 8139 family, however it also supports a special
52 * C+ mode of operation that provides several new performance enhancing
53 * features. These include:
54 *
55 *	o Descriptor based DMA mechanism. Each descriptor represents
56 *	  a single packet fragment. Data buffers may be aligned on
57 *	  any byte boundary.
58 *
59 *	o 64-bit DMA
60 *
61 *	o TCP/IP checksum offload for both RX and TX
62 *
63 *	o High and normal priority transmit DMA rings
64 *
65 *	o VLAN tag insertion and extraction
66 *
67 *	o TCP large send (segmentation offload)
68 *
69 * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70 * programming API is fairly straightforward. The RX filtering, EEPROM
71 * access and PHY access is the same as it is on the older 8139 series
72 * chips.
73 *
74 * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75 * same programming API and feature set as the 8139C+ with the following
76 * differences and additions:
77 *
78 *	o 1000Mbps mode
79 *
80 *	o Jumbo frames
81 *
82 *	o GMII and TBI ports/registers for interfacing with copper
83 *	  or fiber PHYs
84 *
85 *	o RX and TX DMA rings can have up to 1024 descriptors
86 *	  (the 8139C+ allows a maximum of 64)
87 *
88 *	o Slight differences in register layout from the 8139C+
89 *
90 * The TX start and timer interrupt registers are at different locations
91 * on the 8169 than they are on the 8139C+. Also, the status word in the
92 * RX descriptor has a slightly different bit layout. The 8169 does not
93 * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94 * copper gigE PHY.
95 *
96 * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97 * (the 'S' stands for 'single-chip'). These devices have the same
98 * programming API as the older 8169, but also have some vendor-specific
99 * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100 * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101 *
102 * This driver takes advantage of the RX and TX checksum offload and
103 * VLAN tag insertion/extraction features. It also implements TX
104 * interrupt moderation using the timer interrupt registers, which
105 * significantly reduces TX interrupt load. There is also support
106 * for jumbo frames, however the 8169/8169S/8110S can not transmit
107 * jumbo frames larger than 7440, so the max MTU possible with this
108 * driver is 7422 bytes.
109 */
110
111#ifdef HAVE_KERNEL_OPTION_HEADERS
112#include "opt_device_polling.h"
113#endif
114
115#include <sys/param.h>
116#include <sys/endian.h>
117#include <sys/systm.h>
118#include <sys/sockio.h>
119#include <sys/mbuf.h>
120#include <sys/malloc.h>
121#include <sys/module.h>
122#include <sys/kernel.h>
123#include <sys/socket.h>
124#include <sys/lock.h>
125#include <sys/mutex.h>
126#include <sys/taskqueue.h>
127
128#include <net/if.h>
129#include <net/if_arp.h>
130#include <net/ethernet.h>
131#include <net/if_dl.h>
132#include <net/if_media.h>
133#include <net/if_types.h>
134#include <net/if_vlan_var.h>
135
136#include <net/bpf.h>
137
138#include <machine/bus.h>
139#include <machine/resource.h>
140#include <sys/bus.h>
141#include <sys/rman.h>
142
143#include <dev/mii/mii.h>
144#include <dev/mii/miivar.h>
145
146#include <dev/pci/pcireg.h>
147#include <dev/pci/pcivar.h>
148
149#include <pci/if_rlreg.h>
150
151MODULE_DEPEND(re, pci, 1, 1, 1);
152MODULE_DEPEND(re, ether, 1, 1, 1);
153MODULE_DEPEND(re, miibus, 1, 1, 1);
154
155/* "device miibus" required.  See GENERIC if you get errors here. */
156#include "miibus_if.h"
157
158/*
159 * Default to using PIO access for this driver.
160 */
161#define RE_USEIOSPACE
162
163/* Tunables. */
164static int msi_disable = 0;
165TUNABLE_INT("hw.re.msi_disable", &msi_disable);
166
167#define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
168
169/*
170 * Various supported device vendors/types and their names.
171 */
172static struct rl_type re_devs[] = {
173	{ DLINK_VENDORID, DLINK_DEVICEID_528T, RL_HWREV_8169S,
174		"D-Link DGE-528(T) Gigabit Ethernet Adapter" },
175	{ DLINK_VENDORID, DLINK_DEVICEID_528T, RL_HWREV_8169_8110SB,
176		"D-Link DGE-528(T) Rev.B1 Gigabit Ethernet Adapter" },
177	{ RT_VENDORID, RT_DEVICEID_8139, RL_HWREV_8139CPLUS,
178		"RealTek 8139C+ 10/100BaseTX" },
179	{ RT_VENDORID, RT_DEVICEID_8101E, RL_HWREV_8101E,
180		"RealTek 8101E PCIe 10/100baseTX" },
181	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN1,
182		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
183	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN2,
184		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
185	{ RT_VENDORID, RT_DEVICEID_8168, RL_HWREV_8168_SPIN3,
186		"RealTek 8168/8111B PCIe Gigabit Ethernet" },
187	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169,
188		"RealTek 8169 Gigabit Ethernet" },
189	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169S,
190		"RealTek 8169S Single-chip Gigabit Ethernet" },
191	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169_8110SB,
192		"RealTek 8169SB/8110SB Single-chip Gigabit Ethernet" },
193	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169_8110SC,
194		"RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
195	{ RT_VENDORID, RT_DEVICEID_8169SC, RL_HWREV_8169_8110SC,
196		"RealTek 8169SC/8110SC Single-chip Gigabit Ethernet" },
197	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8110S,
198		"RealTek 8110S Single-chip Gigabit Ethernet" },
199	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, RL_HWREV_8169S,
200		"Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
201	{ LINKSYS_VENDORID, LINKSYS_DEVICEID_EG1032, RL_HWREV_8169S,
202		"Linksys EG1032 (RTL8169S) Gigabit Ethernet" },
203	{ USR_VENDORID, USR_DEVICEID_997902, RL_HWREV_8169S,
204		"US Robotics 997902 (RTL8169S) Gigabit Ethernet" },
205	{ 0, 0, 0, NULL }
206};
207
208static struct rl_hwrev re_hwrevs[] = {
209	{ RL_HWREV_8139, RL_8139,  "" },
210	{ RL_HWREV_8139A, RL_8139, "A" },
211	{ RL_HWREV_8139AG, RL_8139, "A-G" },
212	{ RL_HWREV_8139B, RL_8139, "B" },
213	{ RL_HWREV_8130, RL_8139, "8130" },
214	{ RL_HWREV_8139C, RL_8139, "C" },
215	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
216	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
217	{ RL_HWREV_8168_SPIN1, RL_8169, "8168"},
218	{ RL_HWREV_8169, RL_8169, "8169"},
219	{ RL_HWREV_8169S, RL_8169, "8169S"},
220	{ RL_HWREV_8110S, RL_8169, "8110S"},
221	{ RL_HWREV_8169_8110SB, RL_8169, "8169SB"},
222	{ RL_HWREV_8169_8110SC, RL_8169, "8169SC"},
223	{ RL_HWREV_8100, RL_8139, "8100"},
224	{ RL_HWREV_8101, RL_8139, "8101"},
225	{ RL_HWREV_8100E, RL_8169, "8100E"},
226	{ RL_HWREV_8101E, RL_8169, "8101E"},
227	{ RL_HWREV_8168_SPIN2, RL_8169, "8168"},
228	{ RL_HWREV_8168_SPIN3, RL_8169, "8168"},
229	{ 0, 0, NULL }
230};
231
232static int re_probe		(device_t);
233static int re_attach		(device_t);
234static int re_detach		(device_t);
235
236static int re_encap		(struct rl_softc *, struct mbuf **);
237
238static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
239static int re_allocmem		(device_t, struct rl_softc *);
240static __inline void re_discard_rxbuf
241				(struct rl_softc *, int);
242static int re_newbuf		(struct rl_softc *, int);
243static int re_rx_list_init	(struct rl_softc *);
244static int re_tx_list_init	(struct rl_softc *);
245#ifdef RE_FIXUP_RX
246static __inline void re_fixup_rx
247				(struct mbuf *);
248#endif
249static int re_rxeof		(struct rl_softc *);
250static void re_txeof		(struct rl_softc *);
251#ifdef DEVICE_POLLING
252static void re_poll		(struct ifnet *, enum poll_cmd, int);
253static void re_poll_locked	(struct ifnet *, enum poll_cmd, int);
254#endif
255static int re_intr		(void *);
256static void re_tick		(void *);
257static void re_tx_task		(void *, int);
258static void re_int_task		(void *, int);
259static void re_start		(struct ifnet *);
260static int re_ioctl		(struct ifnet *, u_long, caddr_t);
261static void re_init		(void *);
262static void re_init_locked	(struct rl_softc *);
263static void re_stop		(struct rl_softc *);
264static void re_watchdog		(struct rl_softc *);
265static int re_suspend		(device_t);
266static int re_resume		(device_t);
267static int re_shutdown		(device_t);
268static int re_ifmedia_upd	(struct ifnet *);
269static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
270
271static void re_eeprom_putbyte	(struct rl_softc *, int);
272static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
273static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int);
274static int re_gmii_readreg	(device_t, int, int);
275static int re_gmii_writereg	(device_t, int, int, int);
276
277static int re_miibus_readreg	(device_t, int, int);
278static int re_miibus_writereg	(device_t, int, int, int);
279static void re_miibus_statchg	(device_t);
280
281static void re_setmulti		(struct rl_softc *);
282static void re_reset		(struct rl_softc *);
283
284#ifdef RE_DIAG
285static int re_diag		(struct rl_softc *);
286#endif
287
288#ifdef RE_USEIOSPACE
289#define RL_RES			SYS_RES_IOPORT
290#define RL_RID			RL_PCI_LOIO
291#else
292#define RL_RES			SYS_RES_MEMORY
293#define RL_RID			RL_PCI_LOMEM
294#endif
295
296static device_method_t re_methods[] = {
297	/* Device interface */
298	DEVMETHOD(device_probe,		re_probe),
299	DEVMETHOD(device_attach,	re_attach),
300	DEVMETHOD(device_detach,	re_detach),
301	DEVMETHOD(device_suspend,	re_suspend),
302	DEVMETHOD(device_resume,	re_resume),
303	DEVMETHOD(device_shutdown,	re_shutdown),
304
305	/* bus interface */
306	DEVMETHOD(bus_print_child,	bus_generic_print_child),
307	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
308
309	/* MII interface */
310	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
311	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
312	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
313
314	{ 0, 0 }
315};
316
317static driver_t re_driver = {
318	"re",
319	re_methods,
320	sizeof(struct rl_softc)
321};
322
323static devclass_t re_devclass;
324
325DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
326DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
327DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
328
329#define EE_SET(x)					\
330	CSR_WRITE_1(sc, RL_EECMD,			\
331		CSR_READ_1(sc, RL_EECMD) | x)
332
333#define EE_CLR(x)					\
334	CSR_WRITE_1(sc, RL_EECMD,			\
335		CSR_READ_1(sc, RL_EECMD) & ~x)
336
337/*
338 * Send a read command and address to the EEPROM, check for ACK.
339 */
340static void
341re_eeprom_putbyte(sc, addr)
342	struct rl_softc		*sc;
343	int			addr;
344{
345	register int		d, i;
346
347	d = addr | (RL_9346_READ << sc->rl_eewidth);
348
349	/*
350	 * Feed in each bit and strobe the clock.
351	 */
352
353	for (i = 1 << (sc->rl_eewidth + 3); i; i >>= 1) {
354		if (d & i) {
355			EE_SET(RL_EE_DATAIN);
356		} else {
357			EE_CLR(RL_EE_DATAIN);
358		}
359		DELAY(100);
360		EE_SET(RL_EE_CLK);
361		DELAY(150);
362		EE_CLR(RL_EE_CLK);
363		DELAY(100);
364	}
365
366	return;
367}
368
369/*
370 * Read a word of data stored in the EEPROM at address 'addr.'
371 */
372static void
373re_eeprom_getword(sc, addr, dest)
374	struct rl_softc		*sc;
375	int			addr;
376	u_int16_t		*dest;
377{
378	register int		i;
379	u_int16_t		word = 0;
380
381	/*
382	 * Send address of word we want to read.
383	 */
384	re_eeprom_putbyte(sc, addr);
385
386	/*
387	 * Start reading bits from EEPROM.
388	 */
389	for (i = 0x8000; i; i >>= 1) {
390		EE_SET(RL_EE_CLK);
391		DELAY(100);
392		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
393			word |= i;
394		EE_CLR(RL_EE_CLK);
395		DELAY(100);
396	}
397
398	*dest = word;
399
400	return;
401}
402
403/*
404 * Read a sequence of words from the EEPROM.
405 */
406static void
407re_read_eeprom(sc, dest, off, cnt)
408	struct rl_softc		*sc;
409	caddr_t			dest;
410	int			off;
411	int			cnt;
412{
413	int			i;
414	u_int16_t		word = 0, *ptr;
415
416	CSR_SETBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
417
418        DELAY(100);
419
420	for (i = 0; i < cnt; i++) {
421		CSR_SETBIT_1(sc, RL_EECMD, RL_EE_SEL);
422		re_eeprom_getword(sc, off + i, &word);
423		CSR_CLRBIT_1(sc, RL_EECMD, RL_EE_SEL);
424		ptr = (u_int16_t *)(dest + (i * 2));
425                *ptr = word;
426	}
427
428	CSR_CLRBIT_1(sc, RL_EECMD, RL_EEMODE_PROGRAM);
429
430	return;
431}
432
433static int
434re_gmii_readreg(dev, phy, reg)
435	device_t		dev;
436	int			phy, reg;
437{
438	struct rl_softc		*sc;
439	u_int32_t		rval;
440	int			i;
441
442	if (phy != 1)
443		return (0);
444
445	sc = device_get_softc(dev);
446
447	/* Let the rgephy driver read the GMEDIASTAT register */
448
449	if (reg == RL_GMEDIASTAT) {
450		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
451		return (rval);
452	}
453
454	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
455	DELAY(1000);
456
457	for (i = 0; i < RL_TIMEOUT; i++) {
458		rval = CSR_READ_4(sc, RL_PHYAR);
459		if (rval & RL_PHYAR_BUSY)
460			break;
461		DELAY(100);
462	}
463
464	if (i == RL_TIMEOUT) {
465		device_printf(sc->rl_dev, "PHY read failed\n");
466		return (0);
467	}
468
469	return (rval & RL_PHYAR_PHYDATA);
470}
471
472static int
473re_gmii_writereg(dev, phy, reg, data)
474	device_t		dev;
475	int			phy, reg, data;
476{
477	struct rl_softc		*sc;
478	u_int32_t		rval;
479	int			i;
480
481	sc = device_get_softc(dev);
482
483	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
484	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
485	DELAY(1000);
486
487	for (i = 0; i < RL_TIMEOUT; i++) {
488		rval = CSR_READ_4(sc, RL_PHYAR);
489		if (!(rval & RL_PHYAR_BUSY))
490			break;
491		DELAY(100);
492	}
493
494	if (i == RL_TIMEOUT) {
495		device_printf(sc->rl_dev, "PHY write failed\n");
496		return (0);
497	}
498
499	return (0);
500}
501
502static int
503re_miibus_readreg(dev, phy, reg)
504	device_t		dev;
505	int			phy, reg;
506{
507	struct rl_softc		*sc;
508	u_int16_t		rval = 0;
509	u_int16_t		re8139_reg = 0;
510
511	sc = device_get_softc(dev);
512
513	if (sc->rl_type == RL_8169) {
514		rval = re_gmii_readreg(dev, phy, reg);
515		return (rval);
516	}
517
518	/* Pretend the internal PHY is only at address 0 */
519	if (phy) {
520		return (0);
521	}
522	switch (reg) {
523	case MII_BMCR:
524		re8139_reg = RL_BMCR;
525		break;
526	case MII_BMSR:
527		re8139_reg = RL_BMSR;
528		break;
529	case MII_ANAR:
530		re8139_reg = RL_ANAR;
531		break;
532	case MII_ANER:
533		re8139_reg = RL_ANER;
534		break;
535	case MII_ANLPAR:
536		re8139_reg = RL_LPAR;
537		break;
538	case MII_PHYIDR1:
539	case MII_PHYIDR2:
540		return (0);
541	/*
542	 * Allow the rlphy driver to read the media status
543	 * register. If we have a link partner which does not
544	 * support NWAY, this is the register which will tell
545	 * us the results of parallel detection.
546	 */
547	case RL_MEDIASTAT:
548		rval = CSR_READ_1(sc, RL_MEDIASTAT);
549		return (rval);
550	default:
551		device_printf(sc->rl_dev, "bad phy register\n");
552		return (0);
553	}
554	rval = CSR_READ_2(sc, re8139_reg);
555	if (sc->rl_type == RL_8139CPLUS && re8139_reg == RL_BMCR) {
556		/* 8139C+ has different bit layout. */
557		rval &= ~(BMCR_LOOP | BMCR_ISO);
558	}
559	return (rval);
560}
561
562static int
563re_miibus_writereg(dev, phy, reg, data)
564	device_t		dev;
565	int			phy, reg, data;
566{
567	struct rl_softc		*sc;
568	u_int16_t		re8139_reg = 0;
569	int			rval = 0;
570
571	sc = device_get_softc(dev);
572
573	if (sc->rl_type == RL_8169) {
574		rval = re_gmii_writereg(dev, phy, reg, data);
575		return (rval);
576	}
577
578	/* Pretend the internal PHY is only at address 0 */
579	if (phy)
580		return (0);
581
582	switch (reg) {
583	case MII_BMCR:
584		re8139_reg = RL_BMCR;
585		if (sc->rl_type == RL_8139CPLUS) {
586			/* 8139C+ has different bit layout. */
587			data &= ~(BMCR_LOOP | BMCR_ISO);
588		}
589		break;
590	case MII_BMSR:
591		re8139_reg = RL_BMSR;
592		break;
593	case MII_ANAR:
594		re8139_reg = RL_ANAR;
595		break;
596	case MII_ANER:
597		re8139_reg = RL_ANER;
598		break;
599	case MII_ANLPAR:
600		re8139_reg = RL_LPAR;
601		break;
602	case MII_PHYIDR1:
603	case MII_PHYIDR2:
604		return (0);
605		break;
606	default:
607		device_printf(sc->rl_dev, "bad phy register\n");
608		return (0);
609	}
610	CSR_WRITE_2(sc, re8139_reg, data);
611	return (0);
612}
613
614static void
615re_miibus_statchg(dev)
616	device_t		dev;
617{
618
619}
620
621/*
622 * Program the 64-bit multicast hash filter.
623 */
624static void
625re_setmulti(sc)
626	struct rl_softc		*sc;
627{
628	struct ifnet		*ifp;
629	int			h = 0;
630	u_int32_t		hashes[2] = { 0, 0 };
631	struct ifmultiaddr	*ifma;
632	u_int32_t		rxfilt;
633	int			mcnt = 0;
634	u_int32_t		hwrev;
635
636	RL_LOCK_ASSERT(sc);
637
638	ifp = sc->rl_ifp;
639
640
641	rxfilt = CSR_READ_4(sc, RL_RXCFG);
642	rxfilt &= ~(RL_RXCFG_RX_ALLPHYS | RL_RXCFG_RX_MULTI);
643	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
644		if (ifp->if_flags & IFF_PROMISC)
645			rxfilt |= RL_RXCFG_RX_ALLPHYS;
646		/*
647		 * Unlike other hardwares, we have to explicitly set
648		 * RL_RXCFG_RX_MULTI to receive multicast frames in
649		 * promiscuous mode.
650		 */
651		rxfilt |= RL_RXCFG_RX_MULTI;
652		CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
653		CSR_WRITE_4(sc, RL_MAR0, 0xFFFFFFFF);
654		CSR_WRITE_4(sc, RL_MAR4, 0xFFFFFFFF);
655		return;
656	}
657
658	/* first, zot all the existing hash bits */
659	CSR_WRITE_4(sc, RL_MAR0, 0);
660	CSR_WRITE_4(sc, RL_MAR4, 0);
661
662	/* now program new ones */
663	IF_ADDR_LOCK(ifp);
664	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
665		if (ifma->ifma_addr->sa_family != AF_LINK)
666			continue;
667		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
668		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
669		if (h < 32)
670			hashes[0] |= (1 << h);
671		else
672			hashes[1] |= (1 << (h - 32));
673		mcnt++;
674	}
675	IF_ADDR_UNLOCK(ifp);
676
677	if (mcnt)
678		rxfilt |= RL_RXCFG_RX_MULTI;
679	else
680		rxfilt &= ~RL_RXCFG_RX_MULTI;
681
682	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
683
684	/*
685	 * For some unfathomable reason, RealTek decided to reverse
686	 * the order of the multicast hash registers in the PCI Express
687	 * parts. This means we have to write the hash pattern in reverse
688	 * order for those devices.
689	 */
690
691	hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
692
693	switch (hwrev) {
694	case RL_HWREV_8100E:
695	case RL_HWREV_8101E:
696	case RL_HWREV_8168_SPIN1:
697	case RL_HWREV_8168_SPIN2:
698	case RL_HWREV_8168_SPIN3:
699		CSR_WRITE_4(sc, RL_MAR0, bswap32(hashes[1]));
700		CSR_WRITE_4(sc, RL_MAR4, bswap32(hashes[0]));
701		break;
702	default:
703		CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
704		CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
705		break;
706	}
707}
708
709static void
710re_reset(sc)
711	struct rl_softc		*sc;
712{
713	register int		i;
714
715	RL_LOCK_ASSERT(sc);
716
717	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
718
719	for (i = 0; i < RL_TIMEOUT; i++) {
720		DELAY(10);
721		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
722			break;
723	}
724	if (i == RL_TIMEOUT)
725		device_printf(sc->rl_dev, "reset never completed!\n");
726
727	CSR_WRITE_1(sc, 0x82, 1);
728}
729
730#ifdef RE_DIAG
731
732/*
733 * The following routine is designed to test for a defect on some
734 * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
735 * lines connected to the bus, however for a 32-bit only card, they
736 * should be pulled high. The result of this defect is that the
737 * NIC will not work right if you plug it into a 64-bit slot: DMA
738 * operations will be done with 64-bit transfers, which will fail
739 * because the 64-bit data lines aren't connected.
740 *
741 * There's no way to work around this (short of talking a soldering
742 * iron to the board), however we can detect it. The method we use
743 * here is to put the NIC into digital loopback mode, set the receiver
744 * to promiscuous mode, and then try to send a frame. We then compare
745 * the frame data we sent to what was received. If the data matches,
746 * then the NIC is working correctly, otherwise we know the user has
747 * a defective NIC which has been mistakenly plugged into a 64-bit PCI
748 * slot. In the latter case, there's no way the NIC can work correctly,
749 * so we print out a message on the console and abort the device attach.
750 */
751
752static int
753re_diag(sc)
754	struct rl_softc		*sc;
755{
756	struct ifnet		*ifp = sc->rl_ifp;
757	struct mbuf		*m0;
758	struct ether_header	*eh;
759	struct rl_desc		*cur_rx;
760	u_int16_t		status;
761	u_int32_t		rxstat;
762	int			total_len, i, error = 0, phyaddr;
763	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
764	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
765
766	/* Allocate a single mbuf */
767	MGETHDR(m0, M_DONTWAIT, MT_DATA);
768	if (m0 == NULL)
769		return (ENOBUFS);
770
771	RL_LOCK(sc);
772
773	/*
774	 * Initialize the NIC in test mode. This sets the chip up
775	 * so that it can send and receive frames, but performs the
776	 * following special functions:
777	 * - Puts receiver in promiscuous mode
778	 * - Enables digital loopback mode
779	 * - Leaves interrupts turned off
780	 */
781
782	ifp->if_flags |= IFF_PROMISC;
783	sc->rl_testmode = 1;
784	re_reset(sc);
785	re_init_locked(sc);
786	sc->rl_link = 1;
787	if (sc->rl_type == RL_8169)
788		phyaddr = 1;
789	else
790		phyaddr = 0;
791
792	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_RESET);
793	for (i = 0; i < RL_TIMEOUT; i++) {
794		status = re_miibus_readreg(sc->rl_dev, phyaddr, MII_BMCR);
795		if (!(status & BMCR_RESET))
796			break;
797	}
798
799	re_miibus_writereg(sc->rl_dev, phyaddr, MII_BMCR, BMCR_LOOP);
800	CSR_WRITE_2(sc, RL_ISR, RL_INTRS);
801
802	DELAY(100000);
803
804	/* Put some data in the mbuf */
805
806	eh = mtod(m0, struct ether_header *);
807	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
808	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
809	eh->ether_type = htons(ETHERTYPE_IP);
810	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
811
812	/*
813	 * Queue the packet, start transmission.
814	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
815	 */
816
817	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
818	RL_UNLOCK(sc);
819	/* XXX: re_diag must not be called when in ALTQ mode */
820	IF_HANDOFF(&ifp->if_snd, m0, ifp);
821	RL_LOCK(sc);
822	m0 = NULL;
823
824	/* Wait for it to propagate through the chip */
825
826	DELAY(100000);
827	for (i = 0; i < RL_TIMEOUT; i++) {
828		status = CSR_READ_2(sc, RL_ISR);
829		CSR_WRITE_2(sc, RL_ISR, status);
830		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
831		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
832			break;
833		DELAY(10);
834	}
835
836	if (i == RL_TIMEOUT) {
837		device_printf(sc->rl_dev,
838		    "diagnostic failed, failed to receive packet in"
839		    " loopback mode\n");
840		error = EIO;
841		goto done;
842	}
843
844	/*
845	 * The packet should have been dumped into the first
846	 * entry in the RX DMA ring. Grab it from there.
847	 */
848
849	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
850	    sc->rl_ldata.rl_rx_list_map,
851	    BUS_DMASYNC_POSTREAD);
852	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
853	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap,
854	    BUS_DMASYNC_POSTREAD);
855	bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
856	    sc->rl_ldata.rl_rx_desc[0].rx_dmamap);
857
858	m0 = sc->rl_ldata.rl_rx_desc[0].rx_m;
859	sc->rl_ldata.rl_rx_desc[0].rx_m = NULL;
860	eh = mtod(m0, struct ether_header *);
861
862	cur_rx = &sc->rl_ldata.rl_rx_list[0];
863	total_len = RL_RXBYTES(cur_rx);
864	rxstat = le32toh(cur_rx->rl_cmdstat);
865
866	if (total_len != ETHER_MIN_LEN) {
867		device_printf(sc->rl_dev,
868		    "diagnostic failed, received short packet\n");
869		error = EIO;
870		goto done;
871	}
872
873	/* Test that the received packet data matches what we sent. */
874
875	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
876	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
877	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
878		device_printf(sc->rl_dev, "WARNING, DMA FAILURE!\n");
879		device_printf(sc->rl_dev, "expected TX data: %6D/%6D/0x%x\n",
880		    dst, ":", src, ":", ETHERTYPE_IP);
881		device_printf(sc->rl_dev, "received RX data: %6D/%6D/0x%x\n",
882		    eh->ether_dhost, ":",  eh->ether_shost, ":",
883		    ntohs(eh->ether_type));
884		device_printf(sc->rl_dev, "You may have a defective 32-bit "
885		    "NIC plugged into a 64-bit PCI slot.\n");
886		device_printf(sc->rl_dev, "Please re-install the NIC in a "
887		    "32-bit slot for proper operation.\n");
888		device_printf(sc->rl_dev, "Read the re(4) man page for more "
889		    "details.\n");
890		error = EIO;
891	}
892
893done:
894	/* Turn interface off, release resources */
895
896	sc->rl_testmode = 0;
897	sc->rl_link = 0;
898	ifp->if_flags &= ~IFF_PROMISC;
899	re_stop(sc);
900	if (m0 != NULL)
901		m_freem(m0);
902
903	RL_UNLOCK(sc);
904
905	return (error);
906}
907
908#endif
909
910/*
911 * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
912 * IDs against our list and return a device name if we find a match.
913 */
914static int
915re_probe(dev)
916	device_t		dev;
917{
918	struct rl_type		*t;
919	struct rl_softc		*sc;
920	int			rid;
921	u_int32_t		hwrev;
922
923	t = re_devs;
924	sc = device_get_softc(dev);
925
926	while (t->rl_name != NULL) {
927		if ((pci_get_vendor(dev) == t->rl_vid) &&
928		    (pci_get_device(dev) == t->rl_did)) {
929			/*
930			 * Only attach to rev. 3 of the Linksys EG1032 adapter.
931			 * Rev. 2 i supported by sk(4).
932			 */
933			if ((t->rl_vid == LINKSYS_VENDORID) &&
934				(t->rl_did == LINKSYS_DEVICEID_EG1032) &&
935				(pci_get_subdevice(dev) !=
936				LINKSYS_SUBDEVICE_EG1032_REV3)) {
937				t++;
938				continue;
939			}
940
941			/*
942			 * Temporarily map the I/O space
943			 * so we can read the chip ID register.
944			 */
945			rid = RL_RID;
946			sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
947			    RF_ACTIVE);
948			if (sc->rl_res == NULL) {
949				device_printf(dev,
950				    "couldn't map ports/memory\n");
951				return (ENXIO);
952			}
953			sc->rl_btag = rman_get_bustag(sc->rl_res);
954			sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
955			hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
956			bus_release_resource(dev, RL_RES,
957			    RL_RID, sc->rl_res);
958			if (t->rl_basetype == hwrev) {
959				device_set_desc(dev, t->rl_name);
960				return (BUS_PROBE_DEFAULT);
961			}
962		}
963		t++;
964	}
965
966	return (ENXIO);
967}
968
969/*
970 * Map a single buffer address.
971 */
972
973static void
974re_dma_map_addr(arg, segs, nseg, error)
975	void			*arg;
976	bus_dma_segment_t	*segs;
977	int			nseg;
978	int			error;
979{
980	bus_addr_t		*addr;
981
982	if (error)
983		return;
984
985	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
986	addr = arg;
987	*addr = segs->ds_addr;
988}
989
990static int
991re_allocmem(dev, sc)
992	device_t		dev;
993	struct rl_softc		*sc;
994{
995	bus_size_t		rx_list_size, tx_list_size;
996	int			error;
997	int			i;
998
999	rx_list_size = sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc);
1000	tx_list_size = sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc);
1001
1002	/*
1003	 * Allocate the parent bus DMA tag appropriate for PCI.
1004	 */
1005	error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0,
1006	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1007	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1008	    NULL, NULL, &sc->rl_parent_tag);
1009	if (error) {
1010		device_printf(dev, "could not allocate parent DMA tag\n");
1011		return (error);
1012	}
1013
1014	/*
1015	 * Allocate map for TX mbufs.
1016	 */
1017	error = bus_dma_tag_create(sc->rl_parent_tag, 1, 0,
1018	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
1019	    NULL, MCLBYTES * RL_NTXSEGS, RL_NTXSEGS, 4096, 0,
1020	    NULL, NULL, &sc->rl_ldata.rl_tx_mtag);
1021	if (error) {
1022		device_printf(dev, "could not allocate TX DMA tag\n");
1023		return (error);
1024	}
1025
1026	/*
1027	 * Allocate map for RX mbufs.
1028	 */
1029
1030	error = bus_dma_tag_create(sc->rl_parent_tag, sizeof(uint64_t), 0,
1031	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1032	    MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rl_ldata.rl_rx_mtag);
1033	if (error) {
1034		device_printf(dev, "could not allocate RX DMA tag\n");
1035		return (error);
1036	}
1037
1038	/*
1039	 * Allocate map for TX descriptor list.
1040	 */
1041	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1042	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1043	    NULL, tx_list_size, 1, tx_list_size, 0,
1044	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
1045	if (error) {
1046		device_printf(dev, "could not allocate TX DMA ring tag\n");
1047		return (error);
1048	}
1049
1050	/* Allocate DMA'able memory for the TX ring */
1051
1052	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
1053	    (void **)&sc->rl_ldata.rl_tx_list,
1054	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1055	    &sc->rl_ldata.rl_tx_list_map);
1056	if (error) {
1057		device_printf(dev, "could not allocate TX DMA ring\n");
1058		return (error);
1059	}
1060
1061	/* Load the map for the TX ring. */
1062
1063	sc->rl_ldata.rl_tx_list_addr = 0;
1064	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
1065	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
1066	     tx_list_size, re_dma_map_addr,
1067	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
1068	if (error != 0 || sc->rl_ldata.rl_tx_list_addr == 0) {
1069		device_printf(dev, "could not load TX DMA ring\n");
1070		return (ENOMEM);
1071	}
1072
1073	/* Create DMA maps for TX buffers */
1074
1075	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
1076		error = bus_dmamap_create(sc->rl_ldata.rl_tx_mtag, 0,
1077		    &sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1078		if (error) {
1079			device_printf(dev, "could not create DMA map for TX\n");
1080			return (error);
1081		}
1082	}
1083
1084	/*
1085	 * Allocate map for RX descriptor list.
1086	 */
1087	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1088	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1089	    NULL, rx_list_size, 1, rx_list_size, 0,
1090	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1091	if (error) {
1092		device_printf(dev, "could not create RX DMA ring tag\n");
1093		return (error);
1094	}
1095
1096	/* Allocate DMA'able memory for the RX ring */
1097
1098	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1099	    (void **)&sc->rl_ldata.rl_rx_list,
1100	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
1101	    &sc->rl_ldata.rl_rx_list_map);
1102	if (error) {
1103		device_printf(dev, "could not allocate RX DMA ring\n");
1104		return (error);
1105	}
1106
1107	/* Load the map for the RX ring. */
1108
1109	sc->rl_ldata.rl_rx_list_addr = 0;
1110	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1111	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1112	     rx_list_size, re_dma_map_addr,
1113	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1114	if (error != 0 || sc->rl_ldata.rl_rx_list_addr == 0) {
1115		device_printf(dev, "could not load RX DMA ring\n");
1116		return (ENOMEM);
1117	}
1118
1119	/* Create DMA maps for RX buffers */
1120
1121	error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1122	    &sc->rl_ldata.rl_rx_sparemap);
1123	if (error) {
1124		device_printf(dev, "could not create spare DMA map for RX\n");
1125		return (error);
1126	}
1127	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1128		error = bus_dmamap_create(sc->rl_ldata.rl_rx_mtag, 0,
1129		    &sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1130		if (error) {
1131			device_printf(dev, "could not create DMA map for RX\n");
1132			return (error);
1133		}
1134	}
1135
1136	return (0);
1137}
1138
1139/*
1140 * Attach the interface. Allocate softc structures, do ifmedia
1141 * setup and ethernet/BPF attach.
1142 */
1143static int
1144re_attach(dev)
1145	device_t		dev;
1146{
1147	u_char			eaddr[ETHER_ADDR_LEN];
1148	u_int16_t		as[ETHER_ADDR_LEN / 2];
1149	struct rl_softc		*sc;
1150	struct ifnet		*ifp;
1151	struct rl_hwrev		*hw_rev;
1152	int			hwrev;
1153	u_int16_t		re_did = 0;
1154	int			error = 0, rid, i;
1155	int			msic, reg;
1156
1157	sc = device_get_softc(dev);
1158	sc->rl_dev = dev;
1159
1160	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1161	    MTX_DEF);
1162	callout_init_mtx(&sc->rl_stat_callout, &sc->rl_mtx, 0);
1163
1164	/*
1165	 * Map control/status registers.
1166	 */
1167	pci_enable_busmaster(dev);
1168
1169	rid = RL_RID;
1170	sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
1171	    RF_ACTIVE);
1172
1173	if (sc->rl_res == NULL) {
1174		device_printf(dev, "couldn't map ports/memory\n");
1175		error = ENXIO;
1176		goto fail;
1177	}
1178
1179	sc->rl_btag = rman_get_bustag(sc->rl_res);
1180	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1181
1182	msic = 0;
1183	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
1184		msic = pci_msi_count(dev);
1185		if (bootverbose)
1186			device_printf(dev, "MSI count : %d\n", msic);
1187	}
1188	if (msic == RL_MSI_MESSAGES  && msi_disable == 0) {
1189		if (pci_alloc_msi(dev, &msic) == 0) {
1190			if (msic == RL_MSI_MESSAGES) {
1191				device_printf(dev, "Using %d MSI messages\n",
1192				    msic);
1193				sc->rl_msi = 1;
1194			} else
1195				pci_release_msi(dev);
1196		}
1197	}
1198
1199	/* Allocate interrupt */
1200	if (sc->rl_msi == 0) {
1201		rid = 0;
1202		sc->rl_irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1203		    RF_SHAREABLE | RF_ACTIVE);
1204		if (sc->rl_irq[0] == NULL) {
1205			device_printf(dev, "couldn't allocate IRQ resources\n");
1206			error = ENXIO;
1207			goto fail;
1208		}
1209	} else {
1210		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1211			sc->rl_irq[i] = bus_alloc_resource_any(dev,
1212			    SYS_RES_IRQ, &rid, RF_ACTIVE);
1213			if (sc->rl_irq[i] == NULL) {
1214				device_printf(dev,
1215				    "couldn't llocate IRQ resources for "
1216				    "message %d\n", rid);
1217				error = ENXIO;
1218				goto fail;
1219			}
1220		}
1221	}
1222
1223	/* Reset the adapter. */
1224	RL_LOCK(sc);
1225	re_reset(sc);
1226	RL_UNLOCK(sc);
1227
1228	hw_rev = re_hwrevs;
1229	hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
1230	while (hw_rev->rl_desc != NULL) {
1231		if (hw_rev->rl_rev == hwrev) {
1232			sc->rl_type = hw_rev->rl_type;
1233			break;
1234		}
1235		hw_rev++;
1236	}
1237	if (hw_rev->rl_desc == NULL) {
1238		device_printf(dev, "Unknown H/W revision: %08x\n", hwrev);
1239		error = ENXIO;
1240		goto fail;
1241	}
1242
1243	sc->rl_eewidth = RL_9356_ADDR_LEN;
1244	re_read_eeprom(sc, (caddr_t)&re_did, 0, 1);
1245	if (re_did != 0x8129)
1246	        sc->rl_eewidth = RL_9346_ADDR_LEN;
1247
1248	/*
1249	 * Get station address from the EEPROM.
1250	 */
1251	re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3);
1252	for (i = 0; i < ETHER_ADDR_LEN / 2; i++)
1253		as[i] = le16toh(as[i]);
1254	bcopy(as, eaddr, sizeof(eaddr));
1255
1256	if (sc->rl_type == RL_8169) {
1257		/* Set RX length mask and number of descriptors. */
1258		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1259		sc->rl_txstart = RL_GTXSTART;
1260		sc->rl_ldata.rl_tx_desc_cnt = RL_8169_TX_DESC_CNT;
1261		sc->rl_ldata.rl_rx_desc_cnt = RL_8169_RX_DESC_CNT;
1262	} else {
1263		/* Set RX length mask and number of descriptors. */
1264		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1265		sc->rl_txstart = RL_TXSTART;
1266		sc->rl_ldata.rl_tx_desc_cnt = RL_8139_TX_DESC_CNT;
1267		sc->rl_ldata.rl_rx_desc_cnt = RL_8139_RX_DESC_CNT;
1268	}
1269
1270	error = re_allocmem(dev, sc);
1271	if (error)
1272		goto fail;
1273
1274	ifp = sc->rl_ifp = if_alloc(IFT_ETHER);
1275	if (ifp == NULL) {
1276		device_printf(dev, "can not if_alloc()\n");
1277		error = ENOSPC;
1278		goto fail;
1279	}
1280
1281	/* Do MII setup */
1282	if (mii_phy_probe(dev, &sc->rl_miibus,
1283	    re_ifmedia_upd, re_ifmedia_sts)) {
1284		device_printf(dev, "MII without any phy!\n");
1285		error = ENXIO;
1286		goto fail;
1287	}
1288
1289	/* Take PHY out of power down mode. */
1290	if (sc->rl_type == RL_8169) {
1291		uint32_t rev;
1292
1293		rev = CSR_READ_4(sc, RL_TXCFG);
1294		/* HWVERID 0, 1 and 2 :  bit26-30, bit23 */
1295		rev &= 0x7c800000;
1296		if (rev != 0) {
1297			/* RTL8169S single chip */
1298			switch (rev) {
1299			case RL_HWREV_8169_8110SB:
1300			case RL_HWREV_8169_8110SC:
1301			case RL_HWREV_8168_SPIN2:
1302			case RL_HWREV_8168_SPIN3:
1303				re_gmii_writereg(dev, 1, 0x1f, 0);
1304				re_gmii_writereg(dev, 1, 0x0e, 0);
1305				break;
1306			default:
1307				break;
1308			}
1309		}
1310	}
1311
1312	ifp->if_softc = sc;
1313	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1314	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1315	ifp->if_ioctl = re_ioctl;
1316	ifp->if_start = re_start;
1317	ifp->if_hwassist = RE_CSUM_FEATURES | CSUM_TSO;
1318	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_TSO4;
1319	ifp->if_capenable = ifp->if_capabilities;
1320	ifp->if_init = re_init;
1321	IFQ_SET_MAXLEN(&ifp->if_snd, RL_IFQ_MAXLEN);
1322	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1323	IFQ_SET_READY(&ifp->if_snd);
1324
1325	TASK_INIT(&sc->rl_txtask, 1, re_tx_task, ifp);
1326	TASK_INIT(&sc->rl_inttask, 0, re_int_task, sc);
1327
1328	/*
1329	 * Call MI attach routine.
1330	 */
1331	ether_ifattach(ifp, eaddr);
1332
1333	/* VLAN capability setup */
1334	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1335	if (ifp->if_capabilities & IFCAP_HWCSUM)
1336		ifp->if_capabilities |= IFCAP_VLAN_HWCSUM;
1337	ifp->if_capenable = ifp->if_capabilities;
1338#ifdef DEVICE_POLLING
1339	ifp->if_capabilities |= IFCAP_POLLING;
1340#endif
1341	/*
1342	 * Tell the upper layer(s) we support long frames.
1343	 * Must appear after the call to ether_ifattach() because
1344	 * ether_ifattach() sets ifi_hdrlen to the default value.
1345	 */
1346	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1347
1348#ifdef RE_DIAG
1349	/*
1350	 * Perform hardware diagnostic on the original RTL8169.
1351	 * Some 32-bit cards were incorrectly wired and would
1352	 * malfunction if plugged into a 64-bit slot.
1353	 */
1354
1355	if (hwrev == RL_HWREV_8169) {
1356		error = re_diag(sc);
1357		if (error) {
1358			device_printf(dev,
1359		    	"attach aborted due to hardware diag failure\n");
1360			ether_ifdetach(ifp);
1361			goto fail;
1362		}
1363	}
1364#endif
1365
1366	/* Hook interrupt last to avoid having to lock softc */
1367	if (sc->rl_msi == 0)
1368		error = bus_setup_intr(dev, sc->rl_irq[0],
1369		    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1370		    &sc->rl_intrhand[0]);
1371	else {
1372		for (i = 0; i < RL_MSI_MESSAGES; i++) {
1373			error = bus_setup_intr(dev, sc->rl_irq[i],
1374			    INTR_TYPE_NET | INTR_MPSAFE, re_intr, NULL, sc,
1375		    	    &sc->rl_intrhand[i]);
1376			if (error != 0)
1377				break;
1378		}
1379	}
1380	if (error) {
1381		device_printf(dev, "couldn't set up irq\n");
1382		ether_ifdetach(ifp);
1383	}
1384
1385fail:
1386
1387	if (error)
1388		re_detach(dev);
1389
1390	return (error);
1391}
1392
1393/*
1394 * Shutdown hardware and free up resources. This can be called any
1395 * time after the mutex has been initialized. It is called in both
1396 * the error case in attach and the normal detach case so it needs
1397 * to be careful about only freeing resources that have actually been
1398 * allocated.
1399 */
1400static int
1401re_detach(dev)
1402	device_t		dev;
1403{
1404	struct rl_softc		*sc;
1405	struct ifnet		*ifp;
1406	int			i, rid;
1407
1408	sc = device_get_softc(dev);
1409	ifp = sc->rl_ifp;
1410	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1411
1412#ifdef DEVICE_POLLING
1413	if (ifp->if_capenable & IFCAP_POLLING)
1414		ether_poll_deregister(ifp);
1415#endif
1416	/* These should only be active if attach succeeded */
1417	if (device_is_attached(dev)) {
1418		RL_LOCK(sc);
1419#if 0
1420		sc->suspended = 1;
1421#endif
1422		re_stop(sc);
1423		RL_UNLOCK(sc);
1424		callout_drain(&sc->rl_stat_callout);
1425		taskqueue_drain(taskqueue_fast, &sc->rl_inttask);
1426		taskqueue_drain(taskqueue_fast, &sc->rl_txtask);
1427		/*
1428		 * Force off the IFF_UP flag here, in case someone
1429		 * still had a BPF descriptor attached to this
1430		 * interface. If they do, ether_ifdetach() will cause
1431		 * the BPF code to try and clear the promisc mode
1432		 * flag, which will bubble down to re_ioctl(),
1433		 * which will try to call re_init() again. This will
1434		 * turn the NIC back on and restart the MII ticker,
1435		 * which will panic the system when the kernel tries
1436		 * to invoke the re_tick() function that isn't there
1437		 * anymore.
1438		 */
1439		ifp->if_flags &= ~IFF_UP;
1440		ether_ifdetach(ifp);
1441	}
1442	if (sc->rl_miibus)
1443		device_delete_child(dev, sc->rl_miibus);
1444	bus_generic_detach(dev);
1445
1446	/*
1447	 * The rest is resource deallocation, so we should already be
1448	 * stopped here.
1449	 */
1450
1451	for (i = 0; i < RL_MSI_MESSAGES; i++) {
1452		if (sc->rl_intrhand[i] != NULL) {
1453			bus_teardown_intr(dev, sc->rl_irq[i],
1454			    sc->rl_intrhand[i]);
1455			sc->rl_intrhand[i] = NULL;
1456		}
1457	}
1458	if (ifp != NULL)
1459		if_free(ifp);
1460	if (sc->rl_msi == 0) {
1461		if (sc->rl_irq[0] != NULL) {
1462			bus_release_resource(dev, SYS_RES_IRQ, 0,
1463			    sc->rl_irq[0]);
1464			sc->rl_irq[0] = NULL;
1465		}
1466	} else {
1467		for (i = 0, rid = 1; i < RL_MSI_MESSAGES; i++, rid++) {
1468			if (sc->rl_irq[i] != NULL) {
1469				bus_release_resource(dev, SYS_RES_IRQ, rid,
1470				    sc->rl_irq[i]);
1471				sc->rl_irq[i] = NULL;
1472			}
1473		}
1474		pci_release_msi(dev);
1475	}
1476	if (sc->rl_res)
1477		bus_release_resource(dev, RL_RES, RL_RID, sc->rl_res);
1478
1479	/* Unload and free the RX DMA ring memory and map */
1480
1481	if (sc->rl_ldata.rl_rx_list_tag) {
1482		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1483		    sc->rl_ldata.rl_rx_list_map);
1484		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1485		    sc->rl_ldata.rl_rx_list,
1486		    sc->rl_ldata.rl_rx_list_map);
1487		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1488	}
1489
1490	/* Unload and free the TX DMA ring memory and map */
1491
1492	if (sc->rl_ldata.rl_tx_list_tag) {
1493		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1494		    sc->rl_ldata.rl_tx_list_map);
1495		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1496		    sc->rl_ldata.rl_tx_list,
1497		    sc->rl_ldata.rl_tx_list_map);
1498		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1499	}
1500
1501	/* Destroy all the RX and TX buffer maps */
1502
1503	if (sc->rl_ldata.rl_tx_mtag) {
1504		for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1505			bus_dmamap_destroy(sc->rl_ldata.rl_tx_mtag,
1506			    sc->rl_ldata.rl_tx_desc[i].tx_dmamap);
1507		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_mtag);
1508	}
1509	if (sc->rl_ldata.rl_rx_mtag) {
1510		for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++)
1511			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1512			    sc->rl_ldata.rl_rx_desc[i].rx_dmamap);
1513		if (sc->rl_ldata.rl_rx_sparemap)
1514			bus_dmamap_destroy(sc->rl_ldata.rl_rx_mtag,
1515			    sc->rl_ldata.rl_rx_sparemap);
1516		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_mtag);
1517	}
1518
1519	/* Unload and free the stats buffer and map */
1520
1521	if (sc->rl_ldata.rl_stag) {
1522		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1523		    sc->rl_ldata.rl_rx_list_map);
1524		bus_dmamem_free(sc->rl_ldata.rl_stag,
1525		    sc->rl_ldata.rl_stats,
1526		    sc->rl_ldata.rl_smap);
1527		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1528	}
1529
1530	if (sc->rl_parent_tag)
1531		bus_dma_tag_destroy(sc->rl_parent_tag);
1532
1533	mtx_destroy(&sc->rl_mtx);
1534
1535	return (0);
1536}
1537
1538static __inline void
1539re_discard_rxbuf(sc, idx)
1540	struct rl_softc		*sc;
1541	int			idx;
1542{
1543	struct rl_desc		*desc;
1544	struct rl_rxdesc	*rxd;
1545	uint32_t		cmdstat;
1546
1547	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1548	desc = &sc->rl_ldata.rl_rx_list[idx];
1549	desc->rl_vlanctl = 0;
1550	cmdstat = rxd->rx_size;
1551	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1552		cmdstat |= RL_RDESC_CMD_EOR;
1553	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1554}
1555
1556static int
1557re_newbuf(sc, idx)
1558	struct rl_softc		*sc;
1559	int			idx;
1560{
1561	struct mbuf		*m;
1562	struct rl_rxdesc	*rxd;
1563	bus_dma_segment_t	segs[1];
1564	bus_dmamap_t		map;
1565	struct rl_desc		*desc;
1566	uint32_t		cmdstat;
1567	int			error, nsegs;
1568
1569	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1570	if (m == NULL)
1571		return (ENOBUFS);
1572
1573	m->m_len = m->m_pkthdr.len = MCLBYTES;
1574#ifdef RE_FIXUP_RX
1575	/*
1576	 * This is part of an evil trick to deal with non-x86 platforms.
1577	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1578	 * boundaries, but that will hose non-x86 machines. To get around
1579	 * this, we leave some empty space at the start of each buffer
1580	 * and for non-x86 hosts, we copy the buffer back six bytes
1581	 * to achieve word alignment. This is slightly more efficient
1582	 * than allocating a new buffer, copying the contents, and
1583	 * discarding the old buffer.
1584	 */
1585	m_adj(m, RE_ETHER_ALIGN);
1586#endif
1587	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_rx_mtag,
1588	    sc->rl_ldata.rl_rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
1589	if (error != 0) {
1590		m_freem(m);
1591		return (ENOBUFS);
1592	}
1593	KASSERT(nsegs == 1, ("%s: %d segment returned!", __func__, nsegs));
1594
1595	rxd = &sc->rl_ldata.rl_rx_desc[idx];
1596	if (rxd->rx_m != NULL) {
1597		bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1598		    BUS_DMASYNC_POSTREAD);
1599		bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap);
1600	}
1601
1602	rxd->rx_m = m;
1603	map = rxd->rx_dmamap;
1604	rxd->rx_dmamap = sc->rl_ldata.rl_rx_sparemap;
1605	rxd->rx_size = segs[0].ds_len;
1606	sc->rl_ldata.rl_rx_sparemap = map;
1607	bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd->rx_dmamap,
1608	    BUS_DMASYNC_PREREAD);
1609
1610	desc = &sc->rl_ldata.rl_rx_list[idx];
1611	desc->rl_vlanctl = 0;
1612	desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[0].ds_addr));
1613	desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[0].ds_addr));
1614	cmdstat = segs[0].ds_len;
1615	if (idx == sc->rl_ldata.rl_rx_desc_cnt - 1)
1616		cmdstat |= RL_RDESC_CMD_EOR;
1617	desc->rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
1618
1619	return (0);
1620}
1621
1622#ifdef RE_FIXUP_RX
1623static __inline void
1624re_fixup_rx(m)
1625	struct mbuf		*m;
1626{
1627	int                     i;
1628	uint16_t                *src, *dst;
1629
1630	src = mtod(m, uint16_t *);
1631	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1632
1633	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1634		*dst++ = *src++;
1635
1636	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1637
1638	return;
1639}
1640#endif
1641
1642static int
1643re_tx_list_init(sc)
1644	struct rl_softc		*sc;
1645{
1646	struct rl_desc		*desc;
1647	int			i;
1648
1649	RL_LOCK_ASSERT(sc);
1650
1651	bzero(sc->rl_ldata.rl_tx_list,
1652	    sc->rl_ldata.rl_tx_desc_cnt * sizeof(struct rl_desc));
1653	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++)
1654		sc->rl_ldata.rl_tx_desc[i].tx_m = NULL;
1655	/* Set EOR. */
1656	desc = &sc->rl_ldata.rl_tx_list[sc->rl_ldata.rl_tx_desc_cnt - 1];
1657	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOR);
1658
1659	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1660	    sc->rl_ldata.rl_tx_list_map,
1661	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1662
1663	sc->rl_ldata.rl_tx_prodidx = 0;
1664	sc->rl_ldata.rl_tx_considx = 0;
1665	sc->rl_ldata.rl_tx_free = sc->rl_ldata.rl_tx_desc_cnt;
1666
1667	return (0);
1668}
1669
1670static int
1671re_rx_list_init(sc)
1672	struct rl_softc		*sc;
1673{
1674	int			error, i;
1675
1676	bzero(sc->rl_ldata.rl_rx_list,
1677	    sc->rl_ldata.rl_rx_desc_cnt * sizeof(struct rl_desc));
1678	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
1679		sc->rl_ldata.rl_rx_desc[i].rx_m = NULL;
1680		if ((error = re_newbuf(sc, i)) != 0)
1681			return (error);
1682	}
1683
1684	/* Flush the RX descriptors */
1685
1686	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1687	    sc->rl_ldata.rl_rx_list_map,
1688	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1689
1690	sc->rl_ldata.rl_rx_prodidx = 0;
1691	sc->rl_head = sc->rl_tail = NULL;
1692
1693	return (0);
1694}
1695
1696/*
1697 * RX handler for C+ and 8169. For the gigE chips, we support
1698 * the reception of jumbo frames that have been fragmented
1699 * across multiple 2K mbuf cluster buffers.
1700 */
1701static int
1702re_rxeof(sc)
1703	struct rl_softc		*sc;
1704{
1705	struct mbuf		*m;
1706	struct ifnet		*ifp;
1707	int			i, total_len;
1708	struct rl_desc		*cur_rx;
1709	u_int32_t		rxstat, rxvlan;
1710	int			maxpkt = 16;
1711
1712	RL_LOCK_ASSERT(sc);
1713
1714	ifp = sc->rl_ifp;
1715
1716	/* Invalidate the descriptor memory */
1717
1718	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1719	    sc->rl_ldata.rl_rx_list_map,
1720	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1721
1722	for (i = sc->rl_ldata.rl_rx_prodidx; maxpkt > 0;
1723	    i = RL_RX_DESC_NXT(sc, i)) {
1724		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1725		rxstat = le32toh(cur_rx->rl_cmdstat);
1726		if ((rxstat & RL_RDESC_STAT_OWN) != 0)
1727			break;
1728		total_len = rxstat & sc->rl_rxlenmask;
1729		rxvlan = le32toh(cur_rx->rl_vlanctl);
1730		m = sc->rl_ldata.rl_rx_desc[i].rx_m;
1731
1732		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1733			if (re_newbuf(sc, i) != 0) {
1734				/*
1735				 * If this is part of a multi-fragment packet,
1736				 * discard all the pieces.
1737				 */
1738				if (sc->rl_head != NULL) {
1739					m_freem(sc->rl_head);
1740					sc->rl_head = sc->rl_tail = NULL;
1741				}
1742				re_discard_rxbuf(sc, i);
1743				continue;
1744			}
1745			m->m_len = RE_RX_DESC_BUFLEN;
1746			if (sc->rl_head == NULL)
1747				sc->rl_head = sc->rl_tail = m;
1748			else {
1749				m->m_flags &= ~M_PKTHDR;
1750				sc->rl_tail->m_next = m;
1751				sc->rl_tail = m;
1752			}
1753			continue;
1754		}
1755
1756		/*
1757		 * NOTE: for the 8139C+, the frame length field
1758		 * is always 12 bits in size, but for the gigE chips,
1759		 * it is 13 bits (since the max RX frame length is 16K).
1760		 * Unfortunately, all 32 bits in the status word
1761		 * were already used, so to make room for the extra
1762		 * length bit, RealTek took out the 'frame alignment
1763		 * error' bit and shifted the other status bits
1764		 * over one slot. The OWN, EOR, FS and LS bits are
1765		 * still in the same places. We have already extracted
1766		 * the frame length and checked the OWN bit, so rather
1767		 * than using an alternate bit mapping, we shift the
1768		 * status bits one space to the right so we can evaluate
1769		 * them using the 8169 status as though it was in the
1770		 * same format as that of the 8139C+.
1771		 */
1772		if (sc->rl_type == RL_8169)
1773			rxstat >>= 1;
1774
1775		/*
1776		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1777		 * set, but if CRC is clear, it will still be a valid frame.
1778		 */
1779		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1780		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1781			ifp->if_ierrors++;
1782			/*
1783			 * If this is part of a multi-fragment packet,
1784			 * discard all the pieces.
1785			 */
1786			if (sc->rl_head != NULL) {
1787				m_freem(sc->rl_head);
1788				sc->rl_head = sc->rl_tail = NULL;
1789			}
1790			re_discard_rxbuf(sc, i);
1791			continue;
1792		}
1793
1794		/*
1795		 * If allocating a replacement mbuf fails,
1796		 * reload the current one.
1797		 */
1798
1799		if (re_newbuf(sc, i) != 0) {
1800			ifp->if_iqdrops++;
1801			if (sc->rl_head != NULL) {
1802				m_freem(sc->rl_head);
1803				sc->rl_head = sc->rl_tail = NULL;
1804			}
1805			re_discard_rxbuf(sc, i);
1806			continue;
1807		}
1808
1809		if (sc->rl_head != NULL) {
1810			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1811			if (m->m_len == 0)
1812				m->m_len = RE_RX_DESC_BUFLEN;
1813			/*
1814			 * Special case: if there's 4 bytes or less
1815			 * in this buffer, the mbuf can be discarded:
1816			 * the last 4 bytes is the CRC, which we don't
1817			 * care about anyway.
1818			 */
1819			if (m->m_len <= ETHER_CRC_LEN) {
1820				sc->rl_tail->m_len -=
1821				    (ETHER_CRC_LEN - m->m_len);
1822				m_freem(m);
1823			} else {
1824				m->m_len -= ETHER_CRC_LEN;
1825				m->m_flags &= ~M_PKTHDR;
1826				sc->rl_tail->m_next = m;
1827			}
1828			m = sc->rl_head;
1829			sc->rl_head = sc->rl_tail = NULL;
1830			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1831		} else
1832			m->m_pkthdr.len = m->m_len =
1833			    (total_len - ETHER_CRC_LEN);
1834
1835#ifdef RE_FIXUP_RX
1836		re_fixup_rx(m);
1837#endif
1838		ifp->if_ipackets++;
1839		m->m_pkthdr.rcvif = ifp;
1840
1841		/* Do RX checksumming if enabled */
1842
1843		if (ifp->if_capenable & IFCAP_RXCSUM) {
1844
1845			/* Check IP header checksum */
1846			if (rxstat & RL_RDESC_STAT_PROTOID)
1847				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
1848			if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1849				m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1850
1851			/* Check TCP/UDP checksum */
1852			if ((RL_TCPPKT(rxstat) &&
1853			    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1854			    (RL_UDPPKT(rxstat) &&
1855			    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1856				m->m_pkthdr.csum_flags |=
1857				    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1858				m->m_pkthdr.csum_data = 0xffff;
1859			}
1860		}
1861		maxpkt--;
1862		if (rxvlan & RL_RDESC_VLANCTL_TAG) {
1863			m->m_pkthdr.ether_vtag =
1864			    ntohs((rxvlan & RL_RDESC_VLANCTL_DATA));
1865			m->m_flags |= M_VLANTAG;
1866		}
1867		RL_UNLOCK(sc);
1868		(*ifp->if_input)(ifp, m);
1869		RL_LOCK(sc);
1870	}
1871
1872	/* Flush the RX DMA ring */
1873
1874	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1875	    sc->rl_ldata.rl_rx_list_map,
1876	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1877
1878	sc->rl_ldata.rl_rx_prodidx = i;
1879
1880	if (maxpkt)
1881		return(EAGAIN);
1882
1883	return(0);
1884}
1885
1886static void
1887re_txeof(sc)
1888	struct rl_softc		*sc;
1889{
1890	struct ifnet		*ifp;
1891	struct rl_txdesc	*txd;
1892	u_int32_t		txstat;
1893	int			cons;
1894
1895	cons = sc->rl_ldata.rl_tx_considx;
1896	if (cons == sc->rl_ldata.rl_tx_prodidx)
1897		return;
1898
1899	ifp = sc->rl_ifp;
1900	/* Invalidate the TX descriptor list */
1901	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1902	    sc->rl_ldata.rl_tx_list_map,
1903	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1904
1905	for (; cons != sc->rl_ldata.rl_tx_prodidx;
1906	    cons = RL_TX_DESC_NXT(sc, cons)) {
1907		txstat = le32toh(sc->rl_ldata.rl_tx_list[cons].rl_cmdstat);
1908		if (txstat & RL_TDESC_STAT_OWN)
1909			break;
1910		/*
1911		 * We only stash mbufs in the last descriptor
1912		 * in a fragment chain, which also happens to
1913		 * be the only place where the TX status bits
1914		 * are valid.
1915		 */
1916		if (txstat & RL_TDESC_CMD_EOF) {
1917			txd = &sc->rl_ldata.rl_tx_desc[cons];
1918			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
1919			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
1920			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
1921			    txd->tx_dmamap);
1922			KASSERT(txd->tx_m != NULL,
1923			    ("%s: freeing NULL mbufs!", __func__));
1924			m_freem(txd->tx_m);
1925			txd->tx_m = NULL;
1926			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
1927			    RL_TDESC_STAT_COLCNT))
1928				ifp->if_collisions++;
1929			if (txstat & RL_TDESC_STAT_TXERRSUM)
1930				ifp->if_oerrors++;
1931			else
1932				ifp->if_opackets++;
1933		}
1934		sc->rl_ldata.rl_tx_free++;
1935		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1936	}
1937	sc->rl_ldata.rl_tx_considx = cons;
1938
1939	/* No changes made to the TX ring, so no flush needed */
1940
1941	if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt) {
1942		/*
1943		 * Some chips will ignore a second TX request issued
1944		 * while an existing transmission is in progress. If
1945		 * the transmitter goes idle but there are still
1946		 * packets waiting to be sent, we need to restart the
1947		 * channel here to flush them out. This only seems to
1948		 * be required with the PCIe devices.
1949		 */
1950		CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
1951
1952#ifdef RE_TX_MODERATION
1953		/*
1954		 * If not all descriptors have been reaped yet, reload
1955		 * the timer so that we will eventually get another
1956		 * interrupt that will cause us to re-enter this routine.
1957		 * This is done in case the transmitter has gone idle.
1958		 */
1959		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
1960#endif
1961	} else
1962		sc->rl_watchdog_timer = 0;
1963}
1964
1965static void
1966re_tick(xsc)
1967	void			*xsc;
1968{
1969	struct rl_softc		*sc;
1970	struct mii_data		*mii;
1971	struct ifnet		*ifp;
1972
1973	sc = xsc;
1974	ifp = sc->rl_ifp;
1975
1976	RL_LOCK_ASSERT(sc);
1977
1978	re_watchdog(sc);
1979
1980	mii = device_get_softc(sc->rl_miibus);
1981	mii_tick(mii);
1982	if (sc->rl_link) {
1983		if (!(mii->mii_media_status & IFM_ACTIVE))
1984			sc->rl_link = 0;
1985	} else {
1986		if (mii->mii_media_status & IFM_ACTIVE &&
1987		    IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
1988			sc->rl_link = 1;
1989			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1990				taskqueue_enqueue_fast(taskqueue_fast,
1991				    &sc->rl_txtask);
1992		}
1993	}
1994
1995	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
1996}
1997
1998#ifdef DEVICE_POLLING
1999static void
2000re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
2001{
2002	struct rl_softc *sc = ifp->if_softc;
2003
2004	RL_LOCK(sc);
2005	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2006		re_poll_locked(ifp, cmd, count);
2007	RL_UNLOCK(sc);
2008}
2009
2010static void
2011re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
2012{
2013	struct rl_softc *sc = ifp->if_softc;
2014
2015	RL_LOCK_ASSERT(sc);
2016
2017	sc->rxcycles = count;
2018	re_rxeof(sc);
2019	re_txeof(sc);
2020
2021	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2022		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2023
2024	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
2025		u_int16_t       status;
2026
2027		status = CSR_READ_2(sc, RL_ISR);
2028		if (status == 0xffff)
2029			return;
2030		if (status)
2031			CSR_WRITE_2(sc, RL_ISR, status);
2032
2033		/*
2034		 * XXX check behaviour on receiver stalls.
2035		 */
2036
2037		if (status & RL_ISR_SYSTEM_ERR) {
2038			re_reset(sc);
2039			re_init_locked(sc);
2040		}
2041	}
2042}
2043#endif /* DEVICE_POLLING */
2044
2045static int
2046re_intr(arg)
2047	void			*arg;
2048{
2049	struct rl_softc		*sc;
2050	uint16_t		status;
2051
2052	sc = arg;
2053
2054	status = CSR_READ_2(sc, RL_ISR);
2055	if (status == 0xFFFF || (status & RL_INTRS_CPLUS) == 0)
2056                return (FILTER_STRAY);
2057	CSR_WRITE_2(sc, RL_IMR, 0);
2058
2059	taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2060
2061	return (FILTER_HANDLED);
2062}
2063
2064static void
2065re_int_task(arg, npending)
2066	void			*arg;
2067	int			npending;
2068{
2069	struct rl_softc		*sc;
2070	struct ifnet		*ifp;
2071	u_int16_t		status;
2072	int			rval = 0;
2073
2074	sc = arg;
2075	ifp = sc->rl_ifp;
2076
2077	RL_LOCK(sc);
2078
2079	status = CSR_READ_2(sc, RL_ISR);
2080        CSR_WRITE_2(sc, RL_ISR, status);
2081
2082	if (sc->suspended ||
2083	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2084		RL_UNLOCK(sc);
2085		return;
2086	}
2087
2088#ifdef DEVICE_POLLING
2089	if  (ifp->if_capenable & IFCAP_POLLING) {
2090		RL_UNLOCK(sc);
2091		return;
2092	}
2093#endif
2094
2095	if (status & (RL_ISR_RX_OK|RL_ISR_RX_ERR|RL_ISR_FIFO_OFLOW))
2096		rval = re_rxeof(sc);
2097
2098#ifdef RE_TX_MODERATION
2099	if (status & (RL_ISR_TIMEOUT_EXPIRED|
2100#else
2101	if (status & (RL_ISR_TX_OK|
2102#endif
2103	    RL_ISR_TX_ERR|RL_ISR_TX_DESC_UNAVAIL))
2104		re_txeof(sc);
2105
2106	if (status & RL_ISR_SYSTEM_ERR) {
2107		re_reset(sc);
2108		re_init_locked(sc);
2109	}
2110
2111	if (status & RL_ISR_LINKCHG) {
2112		callout_stop(&sc->rl_stat_callout);
2113		re_tick(sc);
2114	}
2115
2116	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2117		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_txtask);
2118
2119	RL_UNLOCK(sc);
2120
2121        if ((CSR_READ_2(sc, RL_ISR) & RL_INTRS_CPLUS) || rval) {
2122		taskqueue_enqueue_fast(taskqueue_fast, &sc->rl_inttask);
2123		return;
2124	}
2125
2126	CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2127
2128	return;
2129}
2130
2131static int
2132re_encap(sc, m_head)
2133	struct rl_softc		*sc;
2134	struct mbuf		**m_head;
2135{
2136	struct rl_txdesc	*txd, *txd_last;
2137	bus_dma_segment_t	segs[RL_NTXSEGS];
2138	bus_dmamap_t		map;
2139	struct mbuf		*m_new;
2140	struct rl_desc		*desc;
2141	int			nsegs, prod;
2142	int			i, error, ei, si;
2143	int			padlen;
2144	uint32_t		cmdstat, csum_flags;
2145
2146	RL_LOCK_ASSERT(sc);
2147	M_ASSERTPKTHDR((*m_head));
2148
2149	/*
2150	 * With some of the RealTek chips, using the checksum offload
2151	 * support in conjunction with the autopadding feature results
2152	 * in the transmission of corrupt frames. For example, if we
2153	 * need to send a really small IP fragment that's less than 60
2154	 * bytes in size, and IP header checksumming is enabled, the
2155	 * resulting ethernet frame that appears on the wire will
2156	 * have garbled payload. To work around this, if TX checksum
2157	 * offload is enabled, we always manually pad short frames out
2158	 * to the minimum ethernet frame size.
2159	 *
2160	 * Note: this appears unnecessary for TCP, and doing it for TCP
2161	 * with PCIe adapters seems to result in bad checksums.
2162	 */
2163	if ((*m_head)->m_pkthdr.csum_flags & (CSUM_IP | CSUM_UDP) &&
2164	    ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP) == 0 &&
2165            (*m_head)->m_pkthdr.len < RL_MIN_FRAMELEN) {
2166		padlen = RL_MIN_FRAMELEN - (*m_head)->m_pkthdr.len;
2167		if (M_WRITABLE(*m_head) == 0) {
2168			/* Get a writable copy. */
2169			m_new = m_dup(*m_head, M_DONTWAIT);
2170			m_freem(*m_head);
2171			if (m_new == NULL) {
2172				*m_head = NULL;
2173				return (ENOBUFS);
2174			}
2175			*m_head = m_new;
2176		}
2177		if ((*m_head)->m_next != NULL ||
2178		    M_TRAILINGSPACE(*m_head) < padlen) {
2179			m_new = m_defrag(*m_head, M_DONTWAIT);
2180			if (m_new == NULL) {
2181				m_freem(*m_head);
2182				*m_head = NULL;
2183				return (ENOBUFS);
2184			}
2185		} else
2186			m_new = *m_head;
2187
2188		/*
2189		 * Manually pad short frames, and zero the pad space
2190		 * to avoid leaking data.
2191		 */
2192		bzero(mtod(m_new, char *) + m_new->m_pkthdr.len, padlen);
2193		m_new->m_pkthdr.len += padlen;
2194		m_new->m_len = m_new->m_pkthdr.len;
2195		*m_head = m_new;
2196	}
2197
2198	prod = sc->rl_ldata.rl_tx_prodidx;
2199	txd = &sc->rl_ldata.rl_tx_desc[prod];
2200	error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2201	    *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2202	if (error == EFBIG) {
2203		m_new = m_collapse(*m_head, M_DONTWAIT, RL_NTXSEGS);
2204		if (m_new == NULL) {
2205			m_freem(*m_head);
2206			*m_head = NULL;
2207			return (ENOBUFS);
2208		}
2209		*m_head = m_new;
2210		error = bus_dmamap_load_mbuf_sg(sc->rl_ldata.rl_tx_mtag,
2211		    txd->tx_dmamap, *m_head, segs, &nsegs, BUS_DMA_NOWAIT);
2212		if (error != 0) {
2213			m_freem(*m_head);
2214			*m_head = NULL;
2215			return (error);
2216		}
2217	} else if (error != 0)
2218		return (error);
2219	if (nsegs == 0) {
2220		m_freem(*m_head);
2221		*m_head = NULL;
2222		return (EIO);
2223	}
2224
2225	/* Check for number of available descriptors. */
2226	if (sc->rl_ldata.rl_tx_free - nsegs <= 1) {
2227		bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap);
2228		return (ENOBUFS);
2229	}
2230
2231	bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, txd->tx_dmamap,
2232	    BUS_DMASYNC_PREWRITE);
2233
2234	/*
2235	 * Set up checksum offload. Note: checksum offload bits must
2236	 * appear in all descriptors of a multi-descriptor transmit
2237	 * attempt. This is according to testing done with an 8169
2238	 * chip. This is a requirement.
2239	 */
2240	csum_flags = 0;
2241	if (((*m_head)->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2242		csum_flags = RL_TDESC_CMD_LGSEND |
2243		    ((uint32_t)(*m_head)->m_pkthdr.tso_segsz <<
2244		    RL_TDESC_CMD_MSSVAL_SHIFT);
2245	else {
2246		if ((*m_head)->m_pkthdr.csum_flags & CSUM_IP)
2247			csum_flags |= RL_TDESC_CMD_IPCSUM;
2248		if ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP)
2249			csum_flags |= RL_TDESC_CMD_TCPCSUM;
2250		if ((*m_head)->m_pkthdr.csum_flags & CSUM_UDP)
2251			csum_flags |= RL_TDESC_CMD_UDPCSUM;
2252	}
2253
2254	si = prod;
2255	for (i = 0; i < nsegs; i++, prod = RL_TX_DESC_NXT(sc, prod)) {
2256		desc = &sc->rl_ldata.rl_tx_list[prod];
2257		desc->rl_vlanctl = 0;
2258		desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
2259		desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
2260		cmdstat = segs[i].ds_len;
2261		if (i != 0)
2262			cmdstat |= RL_TDESC_CMD_OWN;
2263		if (prod == sc->rl_ldata.rl_tx_desc_cnt - 1)
2264			cmdstat |= RL_TDESC_CMD_EOR;
2265		desc->rl_cmdstat = htole32(cmdstat | csum_flags);
2266		sc->rl_ldata.rl_tx_free--;
2267	}
2268	/* Update producer index. */
2269	sc->rl_ldata.rl_tx_prodidx = prod;
2270
2271	/* Set EOF on the last descriptor. */
2272	ei = RL_TX_DESC_PRV(sc, prod);
2273	desc = &sc->rl_ldata.rl_tx_list[ei];
2274	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
2275
2276	desc = &sc->rl_ldata.rl_tx_list[si];
2277	/*
2278	 * Set up hardware VLAN tagging. Note: vlan tag info must
2279	 * appear in the first descriptor of a multi-descriptor
2280	 * transmission attempt.
2281	 */
2282	if ((*m_head)->m_flags & M_VLANTAG)
2283		desc->rl_vlanctl =
2284		    htole32(htons((*m_head)->m_pkthdr.ether_vtag) |
2285		    RL_TDESC_VLANCTL_TAG);
2286	/* Set SOF and transfer ownership of packet to the chip. */
2287	desc->rl_cmdstat |= htole32(RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF);
2288
2289	/*
2290	 * Insure that the map for this transmission
2291	 * is placed at the array index of the last descriptor
2292	 * in this chain.  (Swap last and first dmamaps.)
2293	 */
2294	txd_last = &sc->rl_ldata.rl_tx_desc[ei];
2295	map = txd->tx_dmamap;
2296	txd->tx_dmamap = txd_last->tx_dmamap;
2297	txd_last->tx_dmamap = map;
2298	txd_last->tx_m = *m_head;
2299
2300	return (0);
2301}
2302
2303static void
2304re_tx_task(arg, npending)
2305	void			*arg;
2306	int			npending;
2307{
2308	struct ifnet		*ifp;
2309
2310	ifp = arg;
2311	re_start(ifp);
2312
2313	return;
2314}
2315
2316/*
2317 * Main transmit routine for C+ and gigE NICs.
2318 */
2319static void
2320re_start(ifp)
2321	struct ifnet		*ifp;
2322{
2323	struct rl_softc		*sc;
2324	struct mbuf		*m_head;
2325	int			queued;
2326
2327	sc = ifp->if_softc;
2328
2329	RL_LOCK(sc);
2330
2331	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
2332	    IFF_DRV_RUNNING || sc->rl_link == 0) {
2333		RL_UNLOCK(sc);
2334		return;
2335	}
2336
2337	for (queued = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
2338	    sc->rl_ldata.rl_tx_free > 1;) {
2339		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2340		if (m_head == NULL)
2341			break;
2342
2343		if (re_encap(sc, &m_head) != 0) {
2344			if (m_head == NULL)
2345				break;
2346			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2347			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2348			break;
2349		}
2350
2351		/*
2352		 * If there's a BPF listener, bounce a copy of this frame
2353		 * to him.
2354		 */
2355		ETHER_BPF_MTAP(ifp, m_head);
2356
2357		queued++;
2358	}
2359
2360	if (queued == 0) {
2361#ifdef RE_TX_MODERATION
2362		if (sc->rl_ldata.rl_tx_free != sc->rl_ldata.rl_tx_desc_cnt)
2363			CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2364#endif
2365		RL_UNLOCK(sc);
2366		return;
2367	}
2368
2369	/* Flush the TX descriptors */
2370
2371	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2372	    sc->rl_ldata.rl_tx_list_map,
2373	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2374
2375	CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
2376
2377#ifdef RE_TX_MODERATION
2378	/*
2379	 * Use the countdown timer for interrupt moderation.
2380	 * 'TX done' interrupts are disabled. Instead, we reset the
2381	 * countdown timer, which will begin counting until it hits
2382	 * the value in the TIMERINT register, and then trigger an
2383	 * interrupt. Each time we write to the TIMERCNT register,
2384	 * the timer count is reset to 0.
2385	 */
2386	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2387#endif
2388
2389	/*
2390	 * Set a timeout in case the chip goes out to lunch.
2391	 */
2392	sc->rl_watchdog_timer = 5;
2393
2394	RL_UNLOCK(sc);
2395
2396	return;
2397}
2398
2399static void
2400re_init(xsc)
2401	void			*xsc;
2402{
2403	struct rl_softc		*sc = xsc;
2404
2405	RL_LOCK(sc);
2406	re_init_locked(sc);
2407	RL_UNLOCK(sc);
2408}
2409
2410static void
2411re_init_locked(sc)
2412	struct rl_softc		*sc;
2413{
2414	struct ifnet		*ifp = sc->rl_ifp;
2415	struct mii_data		*mii;
2416	u_int32_t		rxcfg = 0;
2417	union {
2418		uint32_t align_dummy;
2419		u_char eaddr[ETHER_ADDR_LEN];
2420        } eaddr;
2421
2422	RL_LOCK_ASSERT(sc);
2423
2424	mii = device_get_softc(sc->rl_miibus);
2425
2426	/*
2427	 * Cancel pending I/O and free all RX/TX buffers.
2428	 */
2429	re_stop(sc);
2430
2431	/*
2432	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2433	 * RX checksum offload. We must configure the C+ register
2434	 * before all others.
2435	 */
2436	CSR_WRITE_2(sc, RL_CPLUS_CMD, RL_CPLUSCMD_RXENB|
2437	    RL_CPLUSCMD_TXENB|RL_CPLUSCMD_PCI_MRW|
2438	    RL_CPLUSCMD_VLANSTRIP|RL_CPLUSCMD_RXCSUM_ENB);
2439
2440	/*
2441	 * Init our MAC address.  Even though the chipset
2442	 * documentation doesn't mention it, we need to enter "Config
2443	 * register write enable" mode to modify the ID registers.
2444	 */
2445	/* Copy MAC address on stack to align. */
2446	bcopy(IF_LLADDR(ifp), eaddr.eaddr, ETHER_ADDR_LEN);
2447	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2448	CSR_WRITE_4(sc, RL_IDR0,
2449	    htole32(*(u_int32_t *)(&eaddr.eaddr[0])));
2450	CSR_WRITE_4(sc, RL_IDR4,
2451	    htole32(*(u_int32_t *)(&eaddr.eaddr[4])));
2452	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2453
2454	/*
2455	 * For C+ mode, initialize the RX descriptors and mbufs.
2456	 */
2457	re_rx_list_init(sc);
2458	re_tx_list_init(sc);
2459
2460	/*
2461	 * Load the addresses of the RX and TX lists into the chip.
2462	 */
2463
2464	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2465	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2466	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2467	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2468
2469	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2470	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2471	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2472	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2473
2474	/*
2475	 * Enable transmit and receive.
2476	 */
2477	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2478
2479	/*
2480	 * Set the initial TX and RX configuration.
2481	 */
2482	if (sc->rl_testmode) {
2483		if (sc->rl_type == RL_8169)
2484			CSR_WRITE_4(sc, RL_TXCFG,
2485			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2486		else
2487			CSR_WRITE_4(sc, RL_TXCFG,
2488			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2489	} else
2490		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2491
2492	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2493
2494	CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG);
2495
2496	/* Set the individual bit to receive frames for this host only. */
2497	rxcfg = CSR_READ_4(sc, RL_RXCFG);
2498	rxcfg |= RL_RXCFG_RX_INDIV;
2499
2500	/* If we want promiscuous mode, set the allframes bit. */
2501	if (ifp->if_flags & IFF_PROMISC)
2502		rxcfg |= RL_RXCFG_RX_ALLPHYS;
2503	else
2504		rxcfg &= ~RL_RXCFG_RX_ALLPHYS;
2505	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2506
2507	/*
2508	 * Set capture broadcast bit to capture broadcast frames.
2509	 */
2510	if (ifp->if_flags & IFF_BROADCAST)
2511		rxcfg |= RL_RXCFG_RX_BROAD;
2512	else
2513		rxcfg &= ~RL_RXCFG_RX_BROAD;
2514	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2515
2516	/*
2517	 * Program the multicast filter, if necessary.
2518	 */
2519	re_setmulti(sc);
2520
2521#ifdef DEVICE_POLLING
2522	/*
2523	 * Disable interrupts if we are polling.
2524	 */
2525	if (ifp->if_capenable & IFCAP_POLLING)
2526		CSR_WRITE_2(sc, RL_IMR, 0);
2527	else	/* otherwise ... */
2528#endif
2529
2530	/*
2531	 * Enable interrupts.
2532	 */
2533	if (sc->rl_testmode)
2534		CSR_WRITE_2(sc, RL_IMR, 0);
2535	else
2536		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2537	CSR_WRITE_2(sc, RL_ISR, RL_INTRS_CPLUS);
2538
2539	/* Set initial TX threshold */
2540	sc->rl_txthresh = RL_TX_THRESH_INIT;
2541
2542	/* Start RX/TX process. */
2543	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2544#ifdef notdef
2545	/* Enable receiver and transmitter. */
2546	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2547#endif
2548
2549#ifdef RE_TX_MODERATION
2550	/*
2551	 * Initialize the timer interrupt register so that
2552	 * a timer interrupt will be generated once the timer
2553	 * reaches a certain number of ticks. The timer is
2554	 * reloaded on each transmit. This gives us TX interrupt
2555	 * moderation, which dramatically improves TX frame rate.
2556	 */
2557	if (sc->rl_type == RL_8169)
2558		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2559	else
2560		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2561#endif
2562
2563	/*
2564	 * For 8169 gigE NICs, set the max allowed RX packet
2565	 * size so we can receive jumbo frames.
2566	 */
2567	if (sc->rl_type == RL_8169)
2568		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2569
2570	if (sc->rl_testmode)
2571		return;
2572
2573	mii_mediachg(mii);
2574
2575	CSR_WRITE_1(sc, RL_CFG1, CSR_READ_1(sc, RL_CFG1) | RL_CFG1_DRVLOAD);
2576
2577	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2578	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2579
2580	sc->rl_link = 0;
2581	sc->rl_watchdog_timer = 0;
2582	callout_reset(&sc->rl_stat_callout, hz, re_tick, sc);
2583}
2584
2585/*
2586 * Set media options.
2587 */
2588static int
2589re_ifmedia_upd(ifp)
2590	struct ifnet		*ifp;
2591{
2592	struct rl_softc		*sc;
2593	struct mii_data		*mii;
2594
2595	sc = ifp->if_softc;
2596	mii = device_get_softc(sc->rl_miibus);
2597	RL_LOCK(sc);
2598	mii_mediachg(mii);
2599	RL_UNLOCK(sc);
2600
2601	return (0);
2602}
2603
2604/*
2605 * Report current media status.
2606 */
2607static void
2608re_ifmedia_sts(ifp, ifmr)
2609	struct ifnet		*ifp;
2610	struct ifmediareq	*ifmr;
2611{
2612	struct rl_softc		*sc;
2613	struct mii_data		*mii;
2614
2615	sc = ifp->if_softc;
2616	mii = device_get_softc(sc->rl_miibus);
2617
2618	RL_LOCK(sc);
2619	mii_pollstat(mii);
2620	RL_UNLOCK(sc);
2621	ifmr->ifm_active = mii->mii_media_active;
2622	ifmr->ifm_status = mii->mii_media_status;
2623}
2624
2625static int
2626re_ioctl(ifp, command, data)
2627	struct ifnet		*ifp;
2628	u_long			command;
2629	caddr_t			data;
2630{
2631	struct rl_softc		*sc = ifp->if_softc;
2632	struct ifreq		*ifr = (struct ifreq *) data;
2633	struct mii_data		*mii;
2634	int			error = 0;
2635
2636	switch (command) {
2637	case SIOCSIFMTU:
2638		RL_LOCK(sc);
2639		if (ifr->ifr_mtu > RL_JUMBO_MTU)
2640			error = EINVAL;
2641		ifp->if_mtu = ifr->ifr_mtu;
2642		RL_UNLOCK(sc);
2643		break;
2644	case SIOCSIFFLAGS:
2645		RL_LOCK(sc);
2646		if ((ifp->if_flags & IFF_UP) != 0) {
2647			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2648				if (((ifp->if_flags ^ sc->rl_if_flags)
2649				    & IFF_PROMISC) != 0)
2650					re_setmulti(sc);
2651			} else
2652				re_init_locked(sc);
2653		} else {
2654			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
2655				re_stop(sc);
2656		}
2657		sc->rl_if_flags = ifp->if_flags;
2658		RL_UNLOCK(sc);
2659		break;
2660	case SIOCADDMULTI:
2661	case SIOCDELMULTI:
2662		RL_LOCK(sc);
2663		re_setmulti(sc);
2664		RL_UNLOCK(sc);
2665		break;
2666	case SIOCGIFMEDIA:
2667	case SIOCSIFMEDIA:
2668		mii = device_get_softc(sc->rl_miibus);
2669		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2670		break;
2671	case SIOCSIFCAP:
2672	    {
2673		int mask, reinit;
2674
2675		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2676		reinit = 0;
2677#ifdef DEVICE_POLLING
2678		if (mask & IFCAP_POLLING) {
2679			if (ifr->ifr_reqcap & IFCAP_POLLING) {
2680				error = ether_poll_register(re_poll, ifp);
2681				if (error)
2682					return(error);
2683				RL_LOCK(sc);
2684				/* Disable interrupts */
2685				CSR_WRITE_2(sc, RL_IMR, 0x0000);
2686				ifp->if_capenable |= IFCAP_POLLING;
2687				RL_UNLOCK(sc);
2688			} else {
2689				error = ether_poll_deregister(ifp);
2690				/* Enable interrupts. */
2691				RL_LOCK(sc);
2692				CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2693				ifp->if_capenable &= ~IFCAP_POLLING;
2694				RL_UNLOCK(sc);
2695			}
2696		}
2697#endif /* DEVICE_POLLING */
2698		if (mask & IFCAP_HWCSUM) {
2699			ifp->if_capenable ^= IFCAP_HWCSUM;
2700			if (ifp->if_capenable & IFCAP_TXCSUM)
2701				ifp->if_hwassist |= RE_CSUM_FEATURES;
2702			else
2703				ifp->if_hwassist &= ~RE_CSUM_FEATURES;
2704			reinit = 1;
2705		}
2706		if (mask & IFCAP_VLAN_HWTAGGING) {
2707			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2708			reinit = 1;
2709		}
2710		if (mask & IFCAP_TSO4) {
2711			ifp->if_capenable ^= IFCAP_TSO4;
2712			if ((IFCAP_TSO4 & ifp->if_capenable) &&
2713			    (IFCAP_TSO4 & ifp->if_capabilities))
2714				ifp->if_hwassist |= CSUM_TSO;
2715			else
2716				ifp->if_hwassist &= ~CSUM_TSO;
2717		}
2718		if (reinit && ifp->if_drv_flags & IFF_DRV_RUNNING)
2719			re_init(sc);
2720		VLAN_CAPABILITIES(ifp);
2721	    }
2722		break;
2723	default:
2724		error = ether_ioctl(ifp, command, data);
2725		break;
2726	}
2727
2728	return (error);
2729}
2730
2731static void
2732re_watchdog(sc)
2733	struct rl_softc		*sc;
2734{
2735
2736	RL_LOCK_ASSERT(sc);
2737
2738	if (sc->rl_watchdog_timer == 0 || --sc->rl_watchdog_timer != 0)
2739		return;
2740
2741	device_printf(sc->rl_dev, "watchdog timeout\n");
2742	sc->rl_ifp->if_oerrors++;
2743
2744	re_txeof(sc);
2745	re_rxeof(sc);
2746	re_init_locked(sc);
2747}
2748
2749/*
2750 * Stop the adapter and free any mbufs allocated to the
2751 * RX and TX lists.
2752 */
2753static void
2754re_stop(sc)
2755	struct rl_softc		*sc;
2756{
2757	register int		i;
2758	struct ifnet		*ifp;
2759	struct rl_txdesc	*txd;
2760	struct rl_rxdesc	*rxd;
2761
2762	RL_LOCK_ASSERT(sc);
2763
2764	ifp = sc->rl_ifp;
2765
2766	sc->rl_watchdog_timer = 0;
2767	callout_stop(&sc->rl_stat_callout);
2768	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2769
2770	CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2771	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2772	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
2773
2774	if (sc->rl_head != NULL) {
2775		m_freem(sc->rl_head);
2776		sc->rl_head = sc->rl_tail = NULL;
2777	}
2778
2779	/* Free the TX list buffers. */
2780
2781	for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
2782		txd = &sc->rl_ldata.rl_tx_desc[i];
2783		if (txd->tx_m != NULL) {
2784			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2785			    txd->tx_dmamap, BUS_DMASYNC_POSTWRITE);
2786			bus_dmamap_unload(sc->rl_ldata.rl_tx_mtag,
2787			    txd->tx_dmamap);
2788			m_freem(txd->tx_m);
2789			txd->tx_m = NULL;
2790		}
2791	}
2792
2793	/* Free the RX list buffers. */
2794
2795	for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
2796		rxd = &sc->rl_ldata.rl_rx_desc[i];
2797		if (rxd->rx_m != NULL) {
2798			bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
2799			    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
2800			bus_dmamap_unload(sc->rl_ldata.rl_rx_mtag,
2801			    rxd->rx_dmamap);
2802			m_freem(rxd->rx_m);
2803			rxd->rx_m = NULL;
2804		}
2805	}
2806}
2807
2808/*
2809 * Device suspend routine.  Stop the interface and save some PCI
2810 * settings in case the BIOS doesn't restore them properly on
2811 * resume.
2812 */
2813static int
2814re_suspend(dev)
2815	device_t		dev;
2816{
2817	struct rl_softc		*sc;
2818
2819	sc = device_get_softc(dev);
2820
2821	RL_LOCK(sc);
2822	re_stop(sc);
2823	sc->suspended = 1;
2824	RL_UNLOCK(sc);
2825
2826	return (0);
2827}
2828
2829/*
2830 * Device resume routine.  Restore some PCI settings in case the BIOS
2831 * doesn't, re-enable busmastering, and restart the interface if
2832 * appropriate.
2833 */
2834static int
2835re_resume(dev)
2836	device_t		dev;
2837{
2838	struct rl_softc		*sc;
2839	struct ifnet		*ifp;
2840
2841	sc = device_get_softc(dev);
2842
2843	RL_LOCK(sc);
2844
2845	ifp = sc->rl_ifp;
2846
2847	/* reinitialize interface if necessary */
2848	if (ifp->if_flags & IFF_UP)
2849		re_init_locked(sc);
2850
2851	sc->suspended = 0;
2852	RL_UNLOCK(sc);
2853
2854	return (0);
2855}
2856
2857/*
2858 * Stop all chip I/O so that the kernel's probe routines don't
2859 * get confused by errant DMAs when rebooting.
2860 */
2861static int
2862re_shutdown(dev)
2863	device_t		dev;
2864{
2865	struct rl_softc		*sc;
2866
2867	sc = device_get_softc(dev);
2868
2869	RL_LOCK(sc);
2870	re_stop(sc);
2871	/*
2872	 * Mark interface as down since otherwise we will panic if
2873	 * interrupt comes in later on, which can happen in some
2874	 * cases.
2875	 */
2876	sc->rl_ifp->if_flags &= ~IFF_UP;
2877	RL_UNLOCK(sc);
2878
2879	return (0);
2880}
2881