if_re.c revision 146734
1/*-
2 * Copyright (c) 1997, 1998-2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/dev/re/if_re.c 146734 2005-05-29 04:42:30Z nyan $");
35
36/*
37 * RealTek 8139C+/8169/8169S/8110S PCI NIC driver
38 *
39 * Written by Bill Paul <wpaul@windriver.com>
40 * Senior Networking Software Engineer
41 * Wind River Systems
42 */
43
44/*
45 * This driver is designed to support RealTek's next generation of
46 * 10/100 and 10/100/1000 PCI ethernet controllers. There are currently
47 * four devices in this family: the RTL8139C+, the RTL8169, the RTL8169S
48 * and the RTL8110S.
49 *
50 * The 8139C+ is a 10/100 ethernet chip. It is backwards compatible
51 * with the older 8139 family, however it also supports a special
52 * C+ mode of operation that provides several new performance enhancing
53 * features. These include:
54 *
55 *	o Descriptor based DMA mechanism. Each descriptor represents
56 *	  a single packet fragment. Data buffers may be aligned on
57 *	  any byte boundary.
58 *
59 *	o 64-bit DMA
60 *
61 *	o TCP/IP checksum offload for both RX and TX
62 *
63 *	o High and normal priority transmit DMA rings
64 *
65 *	o VLAN tag insertion and extraction
66 *
67 *	o TCP large send (segmentation offload)
68 *
69 * Like the 8139, the 8139C+ also has a built-in 10/100 PHY. The C+
70 * programming API is fairly straightforward. The RX filtering, EEPROM
71 * access and PHY access is the same as it is on the older 8139 series
72 * chips.
73 *
74 * The 8169 is a 64-bit 10/100/1000 gigabit ethernet MAC. It has almost the
75 * same programming API and feature set as the 8139C+ with the following
76 * differences and additions:
77 *
78 *	o 1000Mbps mode
79 *
80 *	o Jumbo frames
81 *
82 *	o GMII and TBI ports/registers for interfacing with copper
83 *	  or fiber PHYs
84 *
85 *	o RX and TX DMA rings can have up to 1024 descriptors
86 *	  (the 8139C+ allows a maximum of 64)
87 *
88 *	o Slight differences in register layout from the 8139C+
89 *
90 * The TX start and timer interrupt registers are at different locations
91 * on the 8169 than they are on the 8139C+. Also, the status word in the
92 * RX descriptor has a slightly different bit layout. The 8169 does not
93 * have a built-in PHY. Most reference boards use a Marvell 88E1000 'Alaska'
94 * copper gigE PHY.
95 *
96 * The 8169S/8110S 10/100/1000 devices have built-in copper gigE PHYs
97 * (the 'S' stands for 'single-chip'). These devices have the same
98 * programming API as the older 8169, but also have some vendor-specific
99 * registers for the on-board PHY. The 8110S is a LAN-on-motherboard
100 * part designed to be pin-compatible with the RealTek 8100 10/100 chip.
101 *
102 * This driver takes advantage of the RX and TX checksum offload and
103 * VLAN tag insertion/extraction features. It also implements TX
104 * interrupt moderation using the timer interrupt registers, which
105 * significantly reduces TX interrupt load. There is also support
106 * for jumbo frames, however the 8169/8169S/8110S can not transmit
107 * jumbo frames larger than 7440, so the max MTU possible with this
108 * driver is 7422 bytes.
109 */
110
111#include <sys/param.h>
112#include <sys/endian.h>
113#include <sys/systm.h>
114#include <sys/sockio.h>
115#include <sys/mbuf.h>
116#include <sys/malloc.h>
117#include <sys/module.h>
118#include <sys/kernel.h>
119#include <sys/socket.h>
120
121#include <net/if.h>
122#include <net/if_arp.h>
123#include <net/ethernet.h>
124#include <net/if_dl.h>
125#include <net/if_media.h>
126#include <net/if_vlan_var.h>
127
128#include <net/bpf.h>
129
130#include <machine/bus.h>
131#include <machine/resource.h>
132#include <sys/bus.h>
133#include <sys/rman.h>
134
135#include <dev/mii/mii.h>
136#include <dev/mii/miivar.h>
137
138#include <dev/pci/pcireg.h>
139#include <dev/pci/pcivar.h>
140
141MODULE_DEPEND(re, pci, 1, 1, 1);
142MODULE_DEPEND(re, ether, 1, 1, 1);
143MODULE_DEPEND(re, miibus, 1, 1, 1);
144
145/* "controller miibus0" required.  See GENERIC if you get errors here. */
146#include "miibus_if.h"
147
148/*
149 * Default to using PIO access for this driver.
150 */
151#define RE_USEIOSPACE
152
153#include <pci/if_rlreg.h>
154
155#define RE_CSUM_FEATURES    (CSUM_IP | CSUM_TCP | CSUM_UDP)
156
157/*
158 * Various supported device vendors/types and their names.
159 */
160static struct rl_type re_devs[] = {
161	{ RT_VENDORID, RT_DEVICEID_8139, RL_HWREV_8139CPLUS,
162		"RealTek 8139C+ 10/100BaseTX" },
163	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169,
164		"RealTek 8169 Gigabit Ethernet" },
165	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169S,
166		"RealTek 8169S Single-chip Gigabit Ethernet" },
167	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8169SB,
168		"RealTek 8169SB Single-chip Gigabit Ethernet" },
169	{ RT_VENDORID, RT_DEVICEID_8169, RL_HWREV_8110S,
170		"RealTek 8110S Single-chip Gigabit Ethernet" },
171	{ COREGA_VENDORID, COREGA_DEVICEID_CGLAPCIGT, RL_HWREV_8169S,
172		"Corega CG-LAPCIGT (RTL8169S) Gigabit Ethernet" },
173	{ 0, 0, 0, NULL }
174};
175
176static struct rl_hwrev re_hwrevs[] = {
177	{ RL_HWREV_8139, RL_8139,  "" },
178	{ RL_HWREV_8139A, RL_8139, "A" },
179	{ RL_HWREV_8139AG, RL_8139, "A-G" },
180	{ RL_HWREV_8139B, RL_8139, "B" },
181	{ RL_HWREV_8130, RL_8139, "8130" },
182	{ RL_HWREV_8139C, RL_8139, "C" },
183	{ RL_HWREV_8139D, RL_8139, "8139D/8100B/8100C" },
184	{ RL_HWREV_8139CPLUS, RL_8139CPLUS, "C+"},
185	{ RL_HWREV_8169, RL_8169, "8169"},
186	{ RL_HWREV_8169S, RL_8169, "8169S"},
187	{ RL_HWREV_8169SB, RL_8169, "8169SB"},
188	{ RL_HWREV_8110S, RL_8169, "8110S"},
189	{ RL_HWREV_8100, RL_8139, "8100"},
190	{ RL_HWREV_8101, RL_8139, "8101"},
191	{ 0, 0, NULL }
192};
193
194static int re_probe		(device_t);
195static int re_attach		(device_t);
196static int re_detach		(device_t);
197
198static int re_encap		(struct rl_softc *, struct mbuf **, int *);
199
200static void re_dma_map_addr	(void *, bus_dma_segment_t *, int, int);
201static void re_dma_map_desc	(void *, bus_dma_segment_t *, int,
202				    bus_size_t, int);
203static int re_allocmem		(device_t, struct rl_softc *);
204static int re_newbuf		(struct rl_softc *, int, struct mbuf *);
205static int re_rx_list_init	(struct rl_softc *);
206static int re_tx_list_init	(struct rl_softc *);
207#ifdef RE_FIXUP_RX
208static __inline void re_fixup_rx
209				(struct mbuf *);
210#endif
211static void re_rxeof		(struct rl_softc *);
212static void re_txeof		(struct rl_softc *);
213#ifdef DEVICE_POLLING
214static void re_poll		(struct ifnet *, enum poll_cmd, int);
215static void re_poll_locked	(struct ifnet *, enum poll_cmd, int);
216#endif
217static void re_intr		(void *);
218static void re_tick		(void *);
219static void re_tick_locked	(struct rl_softc *);
220static void re_start		(struct ifnet *);
221static void re_start_locked	(struct ifnet *);
222static int re_ioctl		(struct ifnet *, u_long, caddr_t);
223static void re_init		(void *);
224static void re_init_locked	(struct rl_softc *);
225static void re_stop		(struct rl_softc *);
226static void re_watchdog		(struct ifnet *);
227static int re_suspend		(device_t);
228static int re_resume		(device_t);
229static void re_shutdown		(device_t);
230static int re_ifmedia_upd	(struct ifnet *);
231static void re_ifmedia_sts	(struct ifnet *, struct ifmediareq *);
232
233static void re_eeprom_putbyte	(struct rl_softc *, int);
234static void re_eeprom_getword	(struct rl_softc *, int, u_int16_t *);
235static void re_read_eeprom	(struct rl_softc *, caddr_t, int, int, int);
236static int re_gmii_readreg	(device_t, int, int);
237static int re_gmii_writereg	(device_t, int, int, int);
238
239static int re_miibus_readreg	(device_t, int, int);
240static int re_miibus_writereg	(device_t, int, int, int);
241static void re_miibus_statchg	(device_t);
242
243static void re_setmulti		(struct rl_softc *);
244static void re_reset		(struct rl_softc *);
245
246static int re_diag		(struct rl_softc *);
247
248#ifdef RE_USEIOSPACE
249#define RL_RES			SYS_RES_IOPORT
250#define RL_RID			RL_PCI_LOIO
251#else
252#define RL_RES			SYS_RES_MEMORY
253#define RL_RID			RL_PCI_LOMEM
254#endif
255
256static device_method_t re_methods[] = {
257	/* Device interface */
258	DEVMETHOD(device_probe,		re_probe),
259	DEVMETHOD(device_attach,	re_attach),
260	DEVMETHOD(device_detach,	re_detach),
261	DEVMETHOD(device_suspend,	re_suspend),
262	DEVMETHOD(device_resume,	re_resume),
263	DEVMETHOD(device_shutdown,	re_shutdown),
264
265	/* bus interface */
266	DEVMETHOD(bus_print_child,	bus_generic_print_child),
267	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
268
269	/* MII interface */
270	DEVMETHOD(miibus_readreg,	re_miibus_readreg),
271	DEVMETHOD(miibus_writereg,	re_miibus_writereg),
272	DEVMETHOD(miibus_statchg,	re_miibus_statchg),
273
274	{ 0, 0 }
275};
276
277static driver_t re_driver = {
278	"re",
279	re_methods,
280	sizeof(struct rl_softc)
281};
282
283static devclass_t re_devclass;
284
285DRIVER_MODULE(re, pci, re_driver, re_devclass, 0, 0);
286DRIVER_MODULE(re, cardbus, re_driver, re_devclass, 0, 0);
287DRIVER_MODULE(miibus, re, miibus_driver, miibus_devclass, 0, 0);
288
289#define EE_SET(x)					\
290	CSR_WRITE_1(sc, RL_EECMD,			\
291		CSR_READ_1(sc, RL_EECMD) | x)
292
293#define EE_CLR(x)					\
294	CSR_WRITE_1(sc, RL_EECMD,			\
295		CSR_READ_1(sc, RL_EECMD) & ~x)
296
297/*
298 * Send a read command and address to the EEPROM, check for ACK.
299 */
300static void
301re_eeprom_putbyte(sc, addr)
302	struct rl_softc		*sc;
303	int			addr;
304{
305	register int		d, i;
306
307	d = addr | sc->rl_eecmd_read;
308
309	/*
310	 * Feed in each bit and strobe the clock.
311	 */
312	for (i = 0x400; i; i >>= 1) {
313		if (d & i) {
314			EE_SET(RL_EE_DATAIN);
315		} else {
316			EE_CLR(RL_EE_DATAIN);
317		}
318		DELAY(100);
319		EE_SET(RL_EE_CLK);
320		DELAY(150);
321		EE_CLR(RL_EE_CLK);
322		DELAY(100);
323	}
324}
325
326/*
327 * Read a word of data stored in the EEPROM at address 'addr.'
328 */
329static void
330re_eeprom_getword(sc, addr, dest)
331	struct rl_softc		*sc;
332	int			addr;
333	u_int16_t		*dest;
334{
335	register int		i;
336	u_int16_t		word = 0;
337
338	/* Enter EEPROM access mode. */
339	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_PROGRAM|RL_EE_SEL);
340
341	/*
342	 * Send address of word we want to read.
343	 */
344	re_eeprom_putbyte(sc, addr);
345
346	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_PROGRAM|RL_EE_SEL);
347
348	/*
349	 * Start reading bits from EEPROM.
350	 */
351	for (i = 0x8000; i; i >>= 1) {
352		EE_SET(RL_EE_CLK);
353		DELAY(100);
354		if (CSR_READ_1(sc, RL_EECMD) & RL_EE_DATAOUT)
355			word |= i;
356		EE_CLR(RL_EE_CLK);
357		DELAY(100);
358	}
359
360	/* Turn off EEPROM access mode. */
361	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
362
363	*dest = word;
364}
365
366/*
367 * Read a sequence of words from the EEPROM.
368 */
369static void
370re_read_eeprom(sc, dest, off, cnt, swap)
371	struct rl_softc		*sc;
372	caddr_t			dest;
373	int			off;
374	int			cnt;
375	int			swap;
376{
377	int			i;
378	u_int16_t		word = 0, *ptr;
379
380	for (i = 0; i < cnt; i++) {
381		re_eeprom_getword(sc, off + i, &word);
382		ptr = (u_int16_t *)(dest + (i * 2));
383		if (swap)
384			*ptr = ntohs(word);
385		else
386			*ptr = word;
387	}
388}
389
390static int
391re_gmii_readreg(dev, phy, reg)
392	device_t		dev;
393	int			phy, reg;
394{
395	struct rl_softc		*sc;
396	u_int32_t		rval;
397	int			i;
398
399	if (phy != 1)
400		return (0);
401
402	sc = device_get_softc(dev);
403
404	/* Let the rgephy driver read the GMEDIASTAT register */
405
406	if (reg == RL_GMEDIASTAT) {
407		rval = CSR_READ_1(sc, RL_GMEDIASTAT);
408		return (rval);
409	}
410
411	CSR_WRITE_4(sc, RL_PHYAR, reg << 16);
412	DELAY(1000);
413
414	for (i = 0; i < RL_TIMEOUT; i++) {
415		rval = CSR_READ_4(sc, RL_PHYAR);
416		if (rval & RL_PHYAR_BUSY)
417			break;
418		DELAY(100);
419	}
420
421	if (i == RL_TIMEOUT) {
422		printf ("re%d: PHY read failed\n", sc->rl_unit);
423		return (0);
424	}
425
426	return (rval & RL_PHYAR_PHYDATA);
427}
428
429static int
430re_gmii_writereg(dev, phy, reg, data)
431	device_t		dev;
432	int			phy, reg, data;
433{
434	struct rl_softc		*sc;
435	u_int32_t		rval;
436	int			i;
437
438	sc = device_get_softc(dev);
439
440	CSR_WRITE_4(sc, RL_PHYAR, (reg << 16) |
441	    (data & RL_PHYAR_PHYDATA) | RL_PHYAR_BUSY);
442	DELAY(1000);
443
444	for (i = 0; i < RL_TIMEOUT; i++) {
445		rval = CSR_READ_4(sc, RL_PHYAR);
446		if (!(rval & RL_PHYAR_BUSY))
447			break;
448		DELAY(100);
449	}
450
451	if (i == RL_TIMEOUT) {
452		printf ("re%d: PHY write failed\n", sc->rl_unit);
453		return (0);
454	}
455
456	return (0);
457}
458
459static int
460re_miibus_readreg(dev, phy, reg)
461	device_t		dev;
462	int			phy, reg;
463{
464	struct rl_softc		*sc;
465	u_int16_t		rval = 0;
466	u_int16_t		re8139_reg = 0;
467
468	sc = device_get_softc(dev);
469
470	if (sc->rl_type == RL_8169) {
471		rval = re_gmii_readreg(dev, phy, reg);
472		return (rval);
473	}
474
475	/* Pretend the internal PHY is only at address 0 */
476	if (phy) {
477		return (0);
478	}
479	switch (reg) {
480	case MII_BMCR:
481		re8139_reg = RL_BMCR;
482		break;
483	case MII_BMSR:
484		re8139_reg = RL_BMSR;
485		break;
486	case MII_ANAR:
487		re8139_reg = RL_ANAR;
488		break;
489	case MII_ANER:
490		re8139_reg = RL_ANER;
491		break;
492	case MII_ANLPAR:
493		re8139_reg = RL_LPAR;
494		break;
495	case MII_PHYIDR1:
496	case MII_PHYIDR2:
497		return (0);
498	/*
499	 * Allow the rlphy driver to read the media status
500	 * register. If we have a link partner which does not
501	 * support NWAY, this is the register which will tell
502	 * us the results of parallel detection.
503	 */
504	case RL_MEDIASTAT:
505		rval = CSR_READ_1(sc, RL_MEDIASTAT);
506		return (rval);
507	default:
508		printf("re%d: bad phy register\n", sc->rl_unit);
509		return (0);
510	}
511	rval = CSR_READ_2(sc, re8139_reg);
512	return (rval);
513}
514
515static int
516re_miibus_writereg(dev, phy, reg, data)
517	device_t		dev;
518	int			phy, reg, data;
519{
520	struct rl_softc		*sc;
521	u_int16_t		re8139_reg = 0;
522	int			rval = 0;
523
524	sc = device_get_softc(dev);
525
526	if (sc->rl_type == RL_8169) {
527		rval = re_gmii_writereg(dev, phy, reg, data);
528		return (rval);
529	}
530
531	/* Pretend the internal PHY is only at address 0 */
532	if (phy)
533		return (0);
534
535	switch (reg) {
536	case MII_BMCR:
537		re8139_reg = RL_BMCR;
538		break;
539	case MII_BMSR:
540		re8139_reg = RL_BMSR;
541		break;
542	case MII_ANAR:
543		re8139_reg = RL_ANAR;
544		break;
545	case MII_ANER:
546		re8139_reg = RL_ANER;
547		break;
548	case MII_ANLPAR:
549		re8139_reg = RL_LPAR;
550		break;
551	case MII_PHYIDR1:
552	case MII_PHYIDR2:
553		return (0);
554		break;
555	default:
556		printf("re%d: bad phy register\n", sc->rl_unit);
557		return (0);
558	}
559	CSR_WRITE_2(sc, re8139_reg, data);
560	return (0);
561}
562
563static void
564re_miibus_statchg(dev)
565	device_t		dev;
566{
567
568}
569
570/*
571 * Program the 64-bit multicast hash filter.
572 */
573static void
574re_setmulti(sc)
575	struct rl_softc		*sc;
576{
577	struct ifnet		*ifp;
578	int			h = 0;
579	u_int32_t		hashes[2] = { 0, 0 };
580	struct ifmultiaddr	*ifma;
581	u_int32_t		rxfilt;
582	int			mcnt = 0;
583
584	RL_LOCK_ASSERT(sc);
585
586	ifp = &sc->arpcom.ac_if;
587
588	rxfilt = CSR_READ_4(sc, RL_RXCFG);
589
590	if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) {
591		rxfilt |= RL_RXCFG_RX_MULTI;
592		CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
593		CSR_WRITE_4(sc, RL_MAR0, 0xFFFFFFFF);
594		CSR_WRITE_4(sc, RL_MAR4, 0xFFFFFFFF);
595		return;
596	}
597
598	/* first, zot all the existing hash bits */
599	CSR_WRITE_4(sc, RL_MAR0, 0);
600	CSR_WRITE_4(sc, RL_MAR4, 0);
601
602	/* now program new ones */
603	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
604		if (ifma->ifma_addr->sa_family != AF_LINK)
605			continue;
606		h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
607		    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
608		if (h < 32)
609			hashes[0] |= (1 << h);
610		else
611			hashes[1] |= (1 << (h - 32));
612		mcnt++;
613	}
614
615	if (mcnt)
616		rxfilt |= RL_RXCFG_RX_MULTI;
617	else
618		rxfilt &= ~RL_RXCFG_RX_MULTI;
619
620	CSR_WRITE_4(sc, RL_RXCFG, rxfilt);
621	CSR_WRITE_4(sc, RL_MAR0, hashes[0]);
622	CSR_WRITE_4(sc, RL_MAR4, hashes[1]);
623}
624
625static void
626re_reset(sc)
627	struct rl_softc		*sc;
628{
629	register int		i;
630
631	RL_LOCK_ASSERT(sc);
632
633	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_RESET);
634
635	for (i = 0; i < RL_TIMEOUT; i++) {
636		DELAY(10);
637		if (!(CSR_READ_1(sc, RL_COMMAND) & RL_CMD_RESET))
638			break;
639	}
640	if (i == RL_TIMEOUT)
641		printf("re%d: reset never completed!\n", sc->rl_unit);
642
643	CSR_WRITE_1(sc, 0x82, 1);
644}
645
646/*
647 * The following routine is designed to test for a defect on some
648 * 32-bit 8169 cards. Some of these NICs have the REQ64# and ACK64#
649 * lines connected to the bus, however for a 32-bit only card, they
650 * should be pulled high. The result of this defect is that the
651 * NIC will not work right if you plug it into a 64-bit slot: DMA
652 * operations will be done with 64-bit transfers, which will fail
653 * because the 64-bit data lines aren't connected.
654 *
655 * There's no way to work around this (short of talking a soldering
656 * iron to the board), however we can detect it. The method we use
657 * here is to put the NIC into digital loopback mode, set the receiver
658 * to promiscuous mode, and then try to send a frame. We then compare
659 * the frame data we sent to what was received. If the data matches,
660 * then the NIC is working correctly, otherwise we know the user has
661 * a defective NIC which has been mistakenly plugged into a 64-bit PCI
662 * slot. In the latter case, there's no way the NIC can work correctly,
663 * so we print out a message on the console and abort the device attach.
664 */
665
666static int
667re_diag(sc)
668	struct rl_softc		*sc;
669{
670	struct ifnet		*ifp = &sc->arpcom.ac_if;
671	struct mbuf		*m0;
672	struct ether_header	*eh;
673	struct rl_desc		*cur_rx;
674	u_int16_t		status;
675	u_int32_t		rxstat;
676	int			total_len, i, error = 0;
677	u_int8_t		dst[] = { 0x00, 'h', 'e', 'l', 'l', 'o' };
678	u_int8_t		src[] = { 0x00, 'w', 'o', 'r', 'l', 'd' };
679
680	/* Allocate a single mbuf */
681	MGETHDR(m0, M_DONTWAIT, MT_DATA);
682	if (m0 == NULL)
683		return (ENOBUFS);
684
685	RL_LOCK(sc);
686
687	/*
688	 * Initialize the NIC in test mode. This sets the chip up
689	 * so that it can send and receive frames, but performs the
690	 * following special functions:
691	 * - Puts receiver in promiscuous mode
692	 * - Enables digital loopback mode
693	 * - Leaves interrupts turned off
694	 */
695
696	ifp->if_flags |= IFF_PROMISC;
697	sc->rl_testmode = 1;
698	re_init_locked(sc);
699	re_stop(sc);
700	DELAY(100000);
701	re_init_locked(sc);
702
703	/* Put some data in the mbuf */
704
705	eh = mtod(m0, struct ether_header *);
706	bcopy ((char *)&dst, eh->ether_dhost, ETHER_ADDR_LEN);
707	bcopy ((char *)&src, eh->ether_shost, ETHER_ADDR_LEN);
708	eh->ether_type = htons(ETHERTYPE_IP);
709	m0->m_pkthdr.len = m0->m_len = ETHER_MIN_LEN - ETHER_CRC_LEN;
710
711	/*
712	 * Queue the packet, start transmission.
713	 * Note: IF_HANDOFF() ultimately calls re_start() for us.
714	 */
715
716	CSR_WRITE_2(sc, RL_ISR, 0xFFFF);
717	RL_UNLOCK(sc);
718	/* XXX: re_diag must not be called when in ALTQ mode */
719	IF_HANDOFF(&ifp->if_snd, m0, ifp);
720	RL_LOCK(sc);
721	m0 = NULL;
722
723	/* Wait for it to propagate through the chip */
724
725	DELAY(100000);
726	for (i = 0; i < RL_TIMEOUT; i++) {
727		status = CSR_READ_2(sc, RL_ISR);
728		if ((status & (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK)) ==
729		    (RL_ISR_TIMEOUT_EXPIRED|RL_ISR_RX_OK))
730			break;
731		DELAY(10);
732	}
733
734	if (i == RL_TIMEOUT) {
735		printf("re%d: diagnostic failed, failed to receive packet "
736		    "in loopback mode\n", sc->rl_unit);
737		error = EIO;
738		goto done;
739	}
740
741	/*
742	 * The packet should have been dumped into the first
743	 * entry in the RX DMA ring. Grab it from there.
744	 */
745
746	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
747	    sc->rl_ldata.rl_rx_list_map,
748	    BUS_DMASYNC_POSTREAD);
749	bus_dmamap_sync(sc->rl_ldata.rl_mtag,
750	    sc->rl_ldata.rl_rx_dmamap[0],
751	    BUS_DMASYNC_POSTWRITE);
752	bus_dmamap_unload(sc->rl_ldata.rl_mtag,
753	    sc->rl_ldata.rl_rx_dmamap[0]);
754
755	m0 = sc->rl_ldata.rl_rx_mbuf[0];
756	sc->rl_ldata.rl_rx_mbuf[0] = NULL;
757	eh = mtod(m0, struct ether_header *);
758
759	cur_rx = &sc->rl_ldata.rl_rx_list[0];
760	total_len = RL_RXBYTES(cur_rx);
761	rxstat = le32toh(cur_rx->rl_cmdstat);
762
763	if (total_len != ETHER_MIN_LEN) {
764		printf("re%d: diagnostic failed, received short packet\n",
765		    sc->rl_unit);
766		error = EIO;
767		goto done;
768	}
769
770	/* Test that the received packet data matches what we sent. */
771
772	if (bcmp((char *)&eh->ether_dhost, (char *)&dst, ETHER_ADDR_LEN) ||
773	    bcmp((char *)&eh->ether_shost, (char *)&src, ETHER_ADDR_LEN) ||
774	    ntohs(eh->ether_type) != ETHERTYPE_IP) {
775		printf("re%d: WARNING, DMA FAILURE!\n", sc->rl_unit);
776		printf("re%d: expected TX data: %6D/%6D/0x%x\n", sc->rl_unit,
777		    dst, ":", src, ":", ETHERTYPE_IP);
778		printf("re%d: received RX data: %6D/%6D/0x%x\n", sc->rl_unit,
779		    eh->ether_dhost, ":",  eh->ether_shost, ":",
780		    ntohs(eh->ether_type));
781		printf("re%d: You may have a defective 32-bit NIC plugged "
782		    "into a 64-bit PCI slot.\n", sc->rl_unit);
783		printf("re%d: Please re-install the NIC in a 32-bit slot "
784		    "for proper operation.\n", sc->rl_unit);
785		printf("re%d: Read the re(4) man page for more details.\n",
786		    sc->rl_unit);
787		error = EIO;
788	}
789
790done:
791	/* Turn interface off, release resources */
792
793	sc->rl_testmode = 0;
794	ifp->if_flags &= ~IFF_PROMISC;
795	re_stop(sc);
796	if (m0 != NULL)
797		m_freem(m0);
798
799	RL_UNLOCK(sc);
800
801	return (error);
802}
803
804/*
805 * Probe for a RealTek 8139C+/8169/8110 chip. Check the PCI vendor and device
806 * IDs against our list and return a device name if we find a match.
807 */
808static int
809re_probe(dev)
810	device_t		dev;
811{
812	struct rl_type		*t;
813	struct rl_softc		*sc;
814	int			rid;
815	u_int32_t		hwrev;
816
817	t = re_devs;
818	sc = device_get_softc(dev);
819
820	while (t->rl_name != NULL) {
821		if ((pci_get_vendor(dev) == t->rl_vid) &&
822		    (pci_get_device(dev) == t->rl_did)) {
823
824			/*
825			 * Temporarily map the I/O space
826			 * so we can read the chip ID register.
827			 */
828			rid = RL_RID;
829			sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
830			    RF_ACTIVE);
831			if (sc->rl_res == NULL) {
832				device_printf(dev,
833				    "couldn't map ports/memory\n");
834				return (ENXIO);
835			}
836			sc->rl_btag = rman_get_bustag(sc->rl_res);
837			sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
838			hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
839			bus_release_resource(dev, RL_RES,
840			    RL_RID, sc->rl_res);
841			if (t->rl_basetype == hwrev) {
842				device_set_desc(dev, t->rl_name);
843				return (BUS_PROBE_DEFAULT);
844			}
845		}
846		t++;
847	}
848
849	return (ENXIO);
850}
851
852/*
853 * This routine takes the segment list provided as the result of
854 * a bus_dma_map_load() operation and assigns the addresses/lengths
855 * to RealTek DMA descriptors. This can be called either by the RX
856 * code or the TX code. In the RX case, we'll probably wind up mapping
857 * at most one segment. For the TX case, there could be any number of
858 * segments since TX packets may span multiple mbufs. In either case,
859 * if the number of segments is larger than the rl_maxsegs limit
860 * specified by the caller, we abort the mapping operation. Sadly,
861 * whoever designed the buffer mapping API did not provide a way to
862 * return an error from here, so we have to fake it a bit.
863 */
864
865static void
866re_dma_map_desc(arg, segs, nseg, mapsize, error)
867	void			*arg;
868	bus_dma_segment_t	*segs;
869	int			nseg;
870	bus_size_t		mapsize;
871	int			error;
872{
873	struct rl_dmaload_arg	*ctx;
874	struct rl_desc		*d = NULL;
875	int			i = 0, idx;
876
877	if (error)
878		return;
879
880	ctx = arg;
881
882	/* Signal error to caller if there's too many segments */
883	if (nseg > ctx->rl_maxsegs) {
884		ctx->rl_maxsegs = 0;
885		return;
886	}
887
888	/*
889	 * Map the segment array into descriptors. Note that we set the
890	 * start-of-frame and end-of-frame markers for either TX or RX, but
891	 * they really only have meaning in the TX case. (In the RX case,
892	 * it's the chip that tells us where packets begin and end.)
893	 * We also keep track of the end of the ring and set the
894	 * end-of-ring bits as needed, and we set the ownership bits
895	 * in all except the very first descriptor. (The caller will
896	 * set this descriptor later when it start transmission or
897	 * reception.)
898	 */
899	idx = ctx->rl_idx;
900	for (;;) {
901		u_int32_t		cmdstat;
902		d = &ctx->rl_ring[idx];
903		if (le32toh(d->rl_cmdstat) & RL_RDESC_STAT_OWN) {
904			ctx->rl_maxsegs = 0;
905			return;
906		}
907		cmdstat = segs[i].ds_len;
908		d->rl_bufaddr_lo = htole32(RL_ADDR_LO(segs[i].ds_addr));
909		d->rl_bufaddr_hi = htole32(RL_ADDR_HI(segs[i].ds_addr));
910		if (i == 0)
911			cmdstat |= RL_TDESC_CMD_SOF;
912		else
913			cmdstat |= RL_TDESC_CMD_OWN;
914		if (idx == (RL_RX_DESC_CNT - 1))
915			cmdstat |= RL_TDESC_CMD_EOR;
916		d->rl_cmdstat = htole32(cmdstat | ctx->rl_flags);
917		i++;
918		if (i == nseg)
919			break;
920		RL_DESC_INC(idx);
921	}
922
923	d->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF);
924	ctx->rl_maxsegs = nseg;
925	ctx->rl_idx = idx;
926}
927
928/*
929 * Map a single buffer address.
930 */
931
932static void
933re_dma_map_addr(arg, segs, nseg, error)
934	void			*arg;
935	bus_dma_segment_t	*segs;
936	int			nseg;
937	int			error;
938{
939	u_int32_t		*addr;
940
941	if (error)
942		return;
943
944	KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg));
945	addr = arg;
946	*addr = segs->ds_addr;
947}
948
949static int
950re_allocmem(dev, sc)
951	device_t		dev;
952	struct rl_softc		*sc;
953{
954	int			error;
955	int			nseg;
956	int			i;
957
958	/*
959	 * Allocate map for RX mbufs.
960	 */
961	nseg = 32;
962	error = bus_dma_tag_create(sc->rl_parent_tag, ETHER_ALIGN, 0,
963	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
964	    NULL, MCLBYTES * nseg, nseg, MCLBYTES, BUS_DMA_ALLOCNOW,
965	    NULL, NULL, &sc->rl_ldata.rl_mtag);
966	if (error) {
967		device_printf(dev, "could not allocate dma tag\n");
968		return (ENOMEM);
969	}
970
971	/*
972	 * Allocate map for TX descriptor list.
973	 */
974	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
975	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
976	    NULL, RL_TX_LIST_SZ, 1, RL_TX_LIST_SZ, BUS_DMA_ALLOCNOW,
977	    NULL, NULL, &sc->rl_ldata.rl_tx_list_tag);
978	if (error) {
979		device_printf(dev, "could not allocate dma tag\n");
980		return (ENOMEM);
981	}
982
983	/* Allocate DMA'able memory for the TX ring */
984
985	error = bus_dmamem_alloc(sc->rl_ldata.rl_tx_list_tag,
986	    (void **)&sc->rl_ldata.rl_tx_list, BUS_DMA_NOWAIT | BUS_DMA_ZERO,
987	    &sc->rl_ldata.rl_tx_list_map);
988	if (error)
989		return (ENOMEM);
990
991	/* Load the map for the TX ring. */
992
993	error = bus_dmamap_load(sc->rl_ldata.rl_tx_list_tag,
994	     sc->rl_ldata.rl_tx_list_map, sc->rl_ldata.rl_tx_list,
995	     RL_TX_LIST_SZ, re_dma_map_addr,
996	     &sc->rl_ldata.rl_tx_list_addr, BUS_DMA_NOWAIT);
997
998	/* Create DMA maps for TX buffers */
999
1000	for (i = 0; i < RL_TX_DESC_CNT; i++) {
1001		error = bus_dmamap_create(sc->rl_ldata.rl_mtag, 0,
1002			    &sc->rl_ldata.rl_tx_dmamap[i]);
1003		if (error) {
1004			device_printf(dev, "can't create DMA map for TX\n");
1005			return (ENOMEM);
1006		}
1007	}
1008
1009	/*
1010	 * Allocate map for RX descriptor list.
1011	 */
1012	error = bus_dma_tag_create(sc->rl_parent_tag, RL_RING_ALIGN,
1013	    0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL,
1014	    NULL, RL_RX_LIST_SZ, 1, RL_RX_LIST_SZ, BUS_DMA_ALLOCNOW,
1015	    NULL, NULL, &sc->rl_ldata.rl_rx_list_tag);
1016	if (error) {
1017		device_printf(dev, "could not allocate dma tag\n");
1018		return (ENOMEM);
1019	}
1020
1021	/* Allocate DMA'able memory for the RX ring */
1022
1023	error = bus_dmamem_alloc(sc->rl_ldata.rl_rx_list_tag,
1024	    (void **)&sc->rl_ldata.rl_rx_list, BUS_DMA_NOWAIT | BUS_DMA_ZERO,
1025	    &sc->rl_ldata.rl_rx_list_map);
1026	if (error)
1027		return (ENOMEM);
1028
1029	/* Load the map for the RX ring. */
1030
1031	error = bus_dmamap_load(sc->rl_ldata.rl_rx_list_tag,
1032	     sc->rl_ldata.rl_rx_list_map, sc->rl_ldata.rl_rx_list,
1033	     RL_RX_LIST_SZ, re_dma_map_addr,
1034	     &sc->rl_ldata.rl_rx_list_addr, BUS_DMA_NOWAIT);
1035
1036	/* Create DMA maps for RX buffers */
1037
1038	for (i = 0; i < RL_RX_DESC_CNT; i++) {
1039		error = bus_dmamap_create(sc->rl_ldata.rl_mtag, 0,
1040			    &sc->rl_ldata.rl_rx_dmamap[i]);
1041		if (error) {
1042			device_printf(dev, "can't create DMA map for RX\n");
1043			return (ENOMEM);
1044		}
1045	}
1046
1047	return (0);
1048}
1049
1050/*
1051 * Attach the interface. Allocate softc structures, do ifmedia
1052 * setup and ethernet/BPF attach.
1053 */
1054static int
1055re_attach(dev)
1056	device_t		dev;
1057{
1058	u_char			eaddr[ETHER_ADDR_LEN];
1059	u_int16_t		as[3];
1060	struct rl_softc		*sc;
1061	struct ifnet		*ifp;
1062	struct rl_hwrev		*hw_rev;
1063	int			hwrev;
1064	u_int16_t		re_did = 0;
1065	int			unit, error = 0, rid, i;
1066
1067	sc = device_get_softc(dev);
1068	unit = device_get_unit(dev);
1069
1070	mtx_init(&sc->rl_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
1071	    MTX_DEF);
1072	/*
1073	 * Map control/status registers.
1074	 */
1075	pci_enable_busmaster(dev);
1076
1077	rid = RL_RID;
1078	sc->rl_res = bus_alloc_resource_any(dev, RL_RES, &rid,
1079	    RF_ACTIVE);
1080
1081	if (sc->rl_res == NULL) {
1082		printf ("re%d: couldn't map ports/memory\n", unit);
1083		error = ENXIO;
1084		goto fail;
1085	}
1086
1087	sc->rl_btag = rman_get_bustag(sc->rl_res);
1088	sc->rl_bhandle = rman_get_bushandle(sc->rl_res);
1089
1090	/* Allocate interrupt */
1091	rid = 0;
1092	sc->rl_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
1093	    RF_SHAREABLE | RF_ACTIVE);
1094
1095	if (sc->rl_irq == NULL) {
1096		printf("re%d: couldn't map interrupt\n", unit);
1097		error = ENXIO;
1098		goto fail;
1099	}
1100
1101	/* Reset the adapter. */
1102	RL_LOCK(sc);
1103	re_reset(sc);
1104	RL_UNLOCK(sc);
1105
1106	hw_rev = re_hwrevs;
1107	hwrev = CSR_READ_4(sc, RL_TXCFG) & RL_TXCFG_HWREV;
1108	while (hw_rev->rl_desc != NULL) {
1109		if (hw_rev->rl_rev == hwrev) {
1110			sc->rl_type = hw_rev->rl_type;
1111			break;
1112		}
1113		hw_rev++;
1114	}
1115
1116	if (sc->rl_type == RL_8169) {
1117
1118		/* Set RX length mask */
1119
1120		sc->rl_rxlenmask = RL_RDESC_STAT_GFRAGLEN;
1121
1122		/* Force station address autoload from the EEPROM */
1123
1124		CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_AUTOLOAD);
1125		for (i = 0; i < RL_TIMEOUT; i++) {
1126			if (!(CSR_READ_1(sc, RL_EECMD) & RL_EEMODE_AUTOLOAD))
1127				break;
1128			DELAY(100);
1129		}
1130		if (i == RL_TIMEOUT)
1131			printf ("re%d: eeprom autoload timed out\n", unit);
1132
1133			for (i = 0; i < ETHER_ADDR_LEN; i++)
1134				eaddr[i] = CSR_READ_1(sc, RL_IDR0 + i);
1135	} else {
1136
1137		/* Set RX length mask */
1138
1139		sc->rl_rxlenmask = RL_RDESC_STAT_FRAGLEN;
1140
1141		sc->rl_eecmd_read = RL_EECMD_READ_6BIT;
1142		re_read_eeprom(sc, (caddr_t)&re_did, 0, 1, 0);
1143		if (re_did != 0x8129)
1144			sc->rl_eecmd_read = RL_EECMD_READ_8BIT;
1145
1146		/*
1147		 * Get station address from the EEPROM.
1148		 */
1149		re_read_eeprom(sc, (caddr_t)as, RL_EE_EADDR, 3, 0);
1150		for (i = 0; i < 3; i++) {
1151			eaddr[(i * 2) + 0] = as[i] & 0xff;
1152			eaddr[(i * 2) + 1] = as[i] >> 8;
1153		}
1154	}
1155
1156	sc->rl_unit = unit;
1157	bcopy(eaddr, (char *)&sc->arpcom.ac_enaddr, ETHER_ADDR_LEN);
1158
1159	/*
1160	 * Allocate the parent bus DMA tag appropriate for PCI.
1161	 */
1162#define RL_NSEG_NEW 32
1163	error = bus_dma_tag_create(NULL,	/* parent */
1164			1, 0,			/* alignment, boundary */
1165			BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1166			BUS_SPACE_MAXADDR,	/* highaddr */
1167			NULL, NULL,		/* filter, filterarg */
1168			MAXBSIZE, RL_NSEG_NEW,	/* maxsize, nsegments */
1169			BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
1170			BUS_DMA_ALLOCNOW,	/* flags */
1171			NULL, NULL,		/* lockfunc, lockarg */
1172			&sc->rl_parent_tag);
1173	if (error)
1174		goto fail;
1175
1176	error = re_allocmem(dev, sc);
1177
1178	if (error)
1179		goto fail;
1180
1181	/* Do MII setup */
1182	if (mii_phy_probe(dev, &sc->rl_miibus,
1183	    re_ifmedia_upd, re_ifmedia_sts)) {
1184		printf("re%d: MII without any phy!\n", sc->rl_unit);
1185		error = ENXIO;
1186		goto fail;
1187	}
1188
1189	ifp = &sc->arpcom.ac_if;
1190	ifp->if_softc = sc;
1191	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1192	ifp->if_mtu = ETHERMTU;
1193	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1194	ifp->if_ioctl = re_ioctl;
1195	ifp->if_capabilities = IFCAP_VLAN_MTU;
1196	ifp->if_start = re_start;
1197	ifp->if_hwassist = /*RE_CSUM_FEATURES*/0;
1198	ifp->if_capabilities |= IFCAP_HWCSUM|IFCAP_VLAN_HWTAGGING;
1199#ifdef DEVICE_POLLING
1200	ifp->if_capabilities |= IFCAP_POLLING;
1201#endif
1202	ifp->if_watchdog = re_watchdog;
1203	ifp->if_init = re_init;
1204	if (sc->rl_type == RL_8169)
1205		ifp->if_baudrate = 1000000000;
1206	else
1207		ifp->if_baudrate = 100000000;
1208	IFQ_SET_MAXLEN(&ifp->if_snd,  RL_IFQ_MAXLEN);
1209	ifp->if_snd.ifq_drv_maxlen = RL_IFQ_MAXLEN;
1210	IFQ_SET_READY(&ifp->if_snd);
1211	ifp->if_capenable = ifp->if_capabilities & ~IFCAP_HWCSUM;
1212
1213	callout_handle_init(&sc->rl_stat_ch);
1214
1215	/*
1216	 * Call MI attach routine.
1217	 */
1218	ether_ifattach(ifp, eaddr);
1219
1220	/* Perform hardware diagnostic. */
1221	error = re_diag(sc);
1222
1223	if (error) {
1224		printf("re%d: attach aborted due to hardware diag failure\n",
1225		    unit);
1226		ether_ifdetach(ifp);
1227		goto fail;
1228	}
1229
1230	/* Hook interrupt last to avoid having to lock softc */
1231	error = bus_setup_intr(dev, sc->rl_irq, INTR_TYPE_NET | INTR_MPSAFE,
1232	    re_intr, sc, &sc->rl_intrhand);
1233	if (error) {
1234		printf("re%d: couldn't set up irq\n", unit);
1235		ether_ifdetach(ifp);
1236	}
1237
1238fail:
1239	if (error)
1240		re_detach(dev);
1241
1242	return (error);
1243}
1244
1245/*
1246 * Shutdown hardware and free up resources. This can be called any
1247 * time after the mutex has been initialized. It is called in both
1248 * the error case in attach and the normal detach case so it needs
1249 * to be careful about only freeing resources that have actually been
1250 * allocated.
1251 */
1252static int
1253re_detach(dev)
1254	device_t		dev;
1255{
1256	struct rl_softc		*sc;
1257	struct ifnet		*ifp;
1258	int			i;
1259	int			attached;
1260
1261	sc = device_get_softc(dev);
1262	ifp = &sc->arpcom.ac_if;
1263	KASSERT(mtx_initialized(&sc->rl_mtx), ("re mutex not initialized"));
1264
1265	attached = device_is_attached(dev);
1266	/* These should only be active if attach succeeded */
1267	if (attached)
1268		ether_ifdetach(ifp);
1269
1270	RL_LOCK(sc);
1271#if 0
1272	sc->suspended = 1;
1273#endif
1274
1275	/* These should only be active if attach succeeded */
1276	if (attached) {
1277		re_stop(sc);
1278		/*
1279		 * Force off the IFF_UP flag here, in case someone
1280		 * still had a BPF descriptor attached to this
1281		 * interface. If they do, ether_ifdetach() will cause
1282		 * the BPF code to try and clear the promisc mode
1283		 * flag, which will bubble down to re_ioctl(),
1284		 * which will try to call re_init() again. This will
1285		 * turn the NIC back on and restart the MII ticker,
1286		 * which will panic the system when the kernel tries
1287		 * to invoke the re_tick() function that isn't there
1288		 * anymore.
1289		 */
1290		ifp->if_flags &= ~IFF_UP;
1291	}
1292	if (sc->rl_miibus)
1293		device_delete_child(dev, sc->rl_miibus);
1294	bus_generic_detach(dev);
1295
1296	/*
1297	 * The rest is resource deallocation, so we should already be
1298	 * stopped here.
1299	 */
1300	RL_UNLOCK(sc);
1301
1302	if (sc->rl_intrhand)
1303		bus_teardown_intr(dev, sc->rl_irq, sc->rl_intrhand);
1304	if (sc->rl_irq)
1305		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->rl_irq);
1306	if (sc->rl_res)
1307		bus_release_resource(dev, RL_RES, RL_RID, sc->rl_res);
1308
1309
1310	/* Unload and free the RX DMA ring memory and map */
1311
1312	if (sc->rl_ldata.rl_rx_list_tag) {
1313		bus_dmamap_unload(sc->rl_ldata.rl_rx_list_tag,
1314		    sc->rl_ldata.rl_rx_list_map);
1315		bus_dmamem_free(sc->rl_ldata.rl_rx_list_tag,
1316		    sc->rl_ldata.rl_rx_list,
1317		    sc->rl_ldata.rl_rx_list_map);
1318		bus_dma_tag_destroy(sc->rl_ldata.rl_rx_list_tag);
1319	}
1320
1321	/* Unload and free the TX DMA ring memory and map */
1322
1323	if (sc->rl_ldata.rl_tx_list_tag) {
1324		bus_dmamap_unload(sc->rl_ldata.rl_tx_list_tag,
1325		    sc->rl_ldata.rl_tx_list_map);
1326		bus_dmamem_free(sc->rl_ldata.rl_tx_list_tag,
1327		    sc->rl_ldata.rl_tx_list,
1328		    sc->rl_ldata.rl_tx_list_map);
1329		bus_dma_tag_destroy(sc->rl_ldata.rl_tx_list_tag);
1330	}
1331
1332	/* Destroy all the RX and TX buffer maps */
1333
1334	if (sc->rl_ldata.rl_mtag) {
1335		for (i = 0; i < RL_TX_DESC_CNT; i++)
1336			bus_dmamap_destroy(sc->rl_ldata.rl_mtag,
1337			    sc->rl_ldata.rl_tx_dmamap[i]);
1338		for (i = 0; i < RL_RX_DESC_CNT; i++)
1339			bus_dmamap_destroy(sc->rl_ldata.rl_mtag,
1340			    sc->rl_ldata.rl_rx_dmamap[i]);
1341		bus_dma_tag_destroy(sc->rl_ldata.rl_mtag);
1342	}
1343
1344	/* Unload and free the stats buffer and map */
1345
1346	if (sc->rl_ldata.rl_stag) {
1347		bus_dmamap_unload(sc->rl_ldata.rl_stag,
1348		    sc->rl_ldata.rl_rx_list_map);
1349		bus_dmamem_free(sc->rl_ldata.rl_stag,
1350		    sc->rl_ldata.rl_stats,
1351		    sc->rl_ldata.rl_smap);
1352		bus_dma_tag_destroy(sc->rl_ldata.rl_stag);
1353	}
1354
1355	if (sc->rl_parent_tag)
1356		bus_dma_tag_destroy(sc->rl_parent_tag);
1357
1358	mtx_destroy(&sc->rl_mtx);
1359
1360	return (0);
1361}
1362
1363static int
1364re_newbuf(sc, idx, m)
1365	struct rl_softc		*sc;
1366	int			idx;
1367	struct mbuf		*m;
1368{
1369	struct rl_dmaload_arg	arg;
1370	struct mbuf		*n = NULL;
1371	int			error;
1372
1373	if (m == NULL) {
1374		n = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
1375		if (n == NULL)
1376			return (ENOBUFS);
1377		m = n;
1378	} else
1379		m->m_data = m->m_ext.ext_buf;
1380
1381	m->m_len = m->m_pkthdr.len = MCLBYTES;
1382#ifdef RE_FIXUP_RX
1383	/*
1384	 * This is part of an evil trick to deal with non-x86 platforms.
1385	 * The RealTek chip requires RX buffers to be aligned on 64-bit
1386	 * boundaries, but that will hose non-x86 machines. To get around
1387	 * this, we leave some empty space at the start of each buffer
1388	 * and for non-x86 hosts, we copy the buffer back six bytes
1389	 * to achieve word alignment. This is slightly more efficient
1390	 * than allocating a new buffer, copying the contents, and
1391	 * discarding the old buffer.
1392	 */
1393	m_adj(m, RE_ETHER_ALIGN);
1394#endif
1395	arg.sc = sc;
1396	arg.rl_idx = idx;
1397	arg.rl_maxsegs = 1;
1398	arg.rl_flags = 0;
1399	arg.rl_ring = sc->rl_ldata.rl_rx_list;
1400
1401	error = bus_dmamap_load_mbuf(sc->rl_ldata.rl_mtag,
1402	    sc->rl_ldata.rl_rx_dmamap[idx], m, re_dma_map_desc,
1403	    &arg, BUS_DMA_NOWAIT);
1404	if (error || arg.rl_maxsegs != 1) {
1405		if (n != NULL)
1406			m_freem(n);
1407		return (ENOMEM);
1408	}
1409
1410	sc->rl_ldata.rl_rx_list[idx].rl_cmdstat |= htole32(RL_RDESC_CMD_OWN);
1411	sc->rl_ldata.rl_rx_mbuf[idx] = m;
1412
1413	bus_dmamap_sync(sc->rl_ldata.rl_mtag,
1414	    sc->rl_ldata.rl_rx_dmamap[idx],
1415	    BUS_DMASYNC_PREREAD);
1416
1417	return (0);
1418}
1419
1420#ifdef RE_FIXUP_RX
1421static __inline void
1422re_fixup_rx(m)
1423	struct mbuf		*m;
1424{
1425	int                     i;
1426	uint16_t                *src, *dst;
1427
1428	src = mtod(m, uint16_t *);
1429	dst = src - (RE_ETHER_ALIGN - ETHER_ALIGN) / sizeof *src;
1430
1431	for (i = 0; i < (m->m_len / sizeof(uint16_t) + 1); i++)
1432		*dst++ = *src++;
1433
1434	m->m_data -= RE_ETHER_ALIGN - ETHER_ALIGN;
1435
1436	return;
1437}
1438#endif
1439
1440static int
1441re_tx_list_init(sc)
1442	struct rl_softc		*sc;
1443{
1444
1445	RL_LOCK_ASSERT(sc);
1446
1447	bzero ((char *)sc->rl_ldata.rl_tx_list, RL_TX_LIST_SZ);
1448	bzero ((char *)&sc->rl_ldata.rl_tx_mbuf,
1449	    (RL_TX_DESC_CNT * sizeof(struct mbuf *)));
1450
1451	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1452	    sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_PREWRITE);
1453	sc->rl_ldata.rl_tx_prodidx = 0;
1454	sc->rl_ldata.rl_tx_considx = 0;
1455	sc->rl_ldata.rl_tx_free = RL_TX_DESC_CNT;
1456
1457	return (0);
1458}
1459
1460static int
1461re_rx_list_init(sc)
1462	struct rl_softc		*sc;
1463{
1464	int			i;
1465
1466	bzero ((char *)sc->rl_ldata.rl_rx_list, RL_RX_LIST_SZ);
1467	bzero ((char *)&sc->rl_ldata.rl_rx_mbuf,
1468	    (RL_RX_DESC_CNT * sizeof(struct mbuf *)));
1469
1470	for (i = 0; i < RL_RX_DESC_CNT; i++) {
1471		if (re_newbuf(sc, i, NULL) == ENOBUFS)
1472			return (ENOBUFS);
1473	}
1474
1475	/* Flush the RX descriptors */
1476
1477	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1478	    sc->rl_ldata.rl_rx_list_map,
1479	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1480
1481	sc->rl_ldata.rl_rx_prodidx = 0;
1482	sc->rl_head = sc->rl_tail = NULL;
1483
1484	return (0);
1485}
1486
1487/*
1488 * RX handler for C+ and 8169. For the gigE chips, we support
1489 * the reception of jumbo frames that have been fragmented
1490 * across multiple 2K mbuf cluster buffers.
1491 */
1492static void
1493re_rxeof(sc)
1494	struct rl_softc		*sc;
1495{
1496	struct mbuf		*m;
1497	struct ifnet		*ifp;
1498	int			i, total_len;
1499	struct rl_desc		*cur_rx;
1500	u_int32_t		rxstat, rxvlan;
1501
1502	RL_LOCK_ASSERT(sc);
1503
1504	ifp = &sc->arpcom.ac_if;
1505	i = sc->rl_ldata.rl_rx_prodidx;
1506
1507	/* Invalidate the descriptor memory */
1508
1509	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1510	    sc->rl_ldata.rl_rx_list_map,
1511	    BUS_DMASYNC_POSTREAD);
1512
1513	while (!RL_OWN(&sc->rl_ldata.rl_rx_list[i])) {
1514		cur_rx = &sc->rl_ldata.rl_rx_list[i];
1515		m = sc->rl_ldata.rl_rx_mbuf[i];
1516		total_len = RL_RXBYTES(cur_rx);
1517		rxstat = le32toh(cur_rx->rl_cmdstat);
1518		rxvlan = le32toh(cur_rx->rl_vlanctl);
1519
1520		/* Invalidate the RX mbuf and unload its map */
1521
1522		bus_dmamap_sync(sc->rl_ldata.rl_mtag,
1523		    sc->rl_ldata.rl_rx_dmamap[i],
1524		    BUS_DMASYNC_POSTWRITE);
1525		bus_dmamap_unload(sc->rl_ldata.rl_mtag,
1526		    sc->rl_ldata.rl_rx_dmamap[i]);
1527
1528		if (!(rxstat & RL_RDESC_STAT_EOF)) {
1529			m->m_len = RE_RX_DESC_BUFLEN;
1530			if (sc->rl_head == NULL)
1531				sc->rl_head = sc->rl_tail = m;
1532			else {
1533				m->m_flags &= ~M_PKTHDR;
1534				sc->rl_tail->m_next = m;
1535				sc->rl_tail = m;
1536			}
1537			re_newbuf(sc, i, NULL);
1538			RL_DESC_INC(i);
1539			continue;
1540		}
1541
1542		/*
1543		 * NOTE: for the 8139C+, the frame length field
1544		 * is always 12 bits in size, but for the gigE chips,
1545		 * it is 13 bits (since the max RX frame length is 16K).
1546		 * Unfortunately, all 32 bits in the status word
1547		 * were already used, so to make room for the extra
1548		 * length bit, RealTek took out the 'frame alignment
1549		 * error' bit and shifted the other status bits
1550		 * over one slot. The OWN, EOR, FS and LS bits are
1551		 * still in the same places. We have already extracted
1552		 * the frame length and checked the OWN bit, so rather
1553		 * than using an alternate bit mapping, we shift the
1554		 * status bits one space to the right so we can evaluate
1555		 * them using the 8169 status as though it was in the
1556		 * same format as that of the 8139C+.
1557		 */
1558		if (sc->rl_type == RL_8169)
1559			rxstat >>= 1;
1560
1561		/*
1562		 * if total_len > 2^13-1, both _RXERRSUM and _GIANT will be
1563		 * set, but if CRC is clear, it will still be a valid frame.
1564		 */
1565		if (rxstat & RL_RDESC_STAT_RXERRSUM && !(total_len > 8191 &&
1566		    (rxstat & RL_RDESC_STAT_ERRS) == RL_RDESC_STAT_GIANT)) {
1567			ifp->if_ierrors++;
1568			/*
1569			 * If this is part of a multi-fragment packet,
1570			 * discard all the pieces.
1571			 */
1572			if (sc->rl_head != NULL) {
1573				m_freem(sc->rl_head);
1574				sc->rl_head = sc->rl_tail = NULL;
1575			}
1576			re_newbuf(sc, i, m);
1577			RL_DESC_INC(i);
1578			continue;
1579		}
1580
1581		/*
1582		 * If allocating a replacement mbuf fails,
1583		 * reload the current one.
1584		 */
1585
1586		if (re_newbuf(sc, i, NULL)) {
1587			ifp->if_ierrors++;
1588			if (sc->rl_head != NULL) {
1589				m_freem(sc->rl_head);
1590				sc->rl_head = sc->rl_tail = NULL;
1591			}
1592			re_newbuf(sc, i, m);
1593			RL_DESC_INC(i);
1594			continue;
1595		}
1596
1597		RL_DESC_INC(i);
1598
1599		if (sc->rl_head != NULL) {
1600			m->m_len = total_len % RE_RX_DESC_BUFLEN;
1601			if (m->m_len == 0)
1602				m->m_len = RE_RX_DESC_BUFLEN;
1603			/*
1604			 * Special case: if there's 4 bytes or less
1605			 * in this buffer, the mbuf can be discarded:
1606			 * the last 4 bytes is the CRC, which we don't
1607			 * care about anyway.
1608			 */
1609			if (m->m_len <= ETHER_CRC_LEN) {
1610				sc->rl_tail->m_len -=
1611				    (ETHER_CRC_LEN - m->m_len);
1612				m_freem(m);
1613			} else {
1614				m->m_len -= ETHER_CRC_LEN;
1615				m->m_flags &= ~M_PKTHDR;
1616				sc->rl_tail->m_next = m;
1617			}
1618			m = sc->rl_head;
1619			sc->rl_head = sc->rl_tail = NULL;
1620			m->m_pkthdr.len = total_len - ETHER_CRC_LEN;
1621		} else
1622			m->m_pkthdr.len = m->m_len =
1623			    (total_len - ETHER_CRC_LEN);
1624
1625#ifdef RE_FIXUP_RX
1626		re_fixup_rx(m);
1627#endif
1628		ifp->if_ipackets++;
1629		m->m_pkthdr.rcvif = ifp;
1630
1631		/* Do RX checksumming if enabled */
1632
1633		if (ifp->if_capenable & IFCAP_RXCSUM) {
1634
1635			/* Check IP header checksum */
1636			if (rxstat & RL_RDESC_STAT_PROTOID)
1637				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
1638			if (!(rxstat & RL_RDESC_STAT_IPSUMBAD))
1639				m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1640
1641			/* Check TCP/UDP checksum */
1642			if ((RL_TCPPKT(rxstat) &&
1643			    !(rxstat & RL_RDESC_STAT_TCPSUMBAD)) ||
1644			    (RL_UDPPKT(rxstat) &&
1645			    !(rxstat & RL_RDESC_STAT_UDPSUMBAD))) {
1646				m->m_pkthdr.csum_flags |=
1647				    CSUM_DATA_VALID|CSUM_PSEUDO_HDR;
1648				m->m_pkthdr.csum_data = 0xffff;
1649			}
1650		}
1651
1652		if (rxvlan & RL_RDESC_VLANCTL_TAG)
1653			VLAN_INPUT_TAG(ifp, m,
1654			    ntohs((rxvlan & RL_RDESC_VLANCTL_DATA)), continue);
1655		RL_UNLOCK(sc);
1656		(*ifp->if_input)(ifp, m);
1657		RL_LOCK(sc);
1658	}
1659
1660	/* Flush the RX DMA ring */
1661
1662	bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
1663	    sc->rl_ldata.rl_rx_list_map,
1664	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
1665
1666	sc->rl_ldata.rl_rx_prodidx = i;
1667}
1668
1669static void
1670re_txeof(sc)
1671	struct rl_softc		*sc;
1672{
1673	struct ifnet		*ifp;
1674	u_int32_t		txstat;
1675	int			idx;
1676
1677	ifp = &sc->arpcom.ac_if;
1678	idx = sc->rl_ldata.rl_tx_considx;
1679
1680	/* Invalidate the TX descriptor list */
1681
1682	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
1683	    sc->rl_ldata.rl_tx_list_map,
1684	    BUS_DMASYNC_POSTREAD);
1685
1686	while (idx != sc->rl_ldata.rl_tx_prodidx) {
1687
1688		txstat = le32toh(sc->rl_ldata.rl_tx_list[idx].rl_cmdstat);
1689		if (txstat & RL_TDESC_CMD_OWN)
1690			break;
1691
1692		/*
1693		 * We only stash mbufs in the last descriptor
1694		 * in a fragment chain, which also happens to
1695		 * be the only place where the TX status bits
1696		 * are valid.
1697		 */
1698
1699		if (txstat & RL_TDESC_CMD_EOF) {
1700			m_freem(sc->rl_ldata.rl_tx_mbuf[idx]);
1701			sc->rl_ldata.rl_tx_mbuf[idx] = NULL;
1702			bus_dmamap_unload(sc->rl_ldata.rl_mtag,
1703			    sc->rl_ldata.rl_tx_dmamap[idx]);
1704			if (txstat & (RL_TDESC_STAT_EXCESSCOL|
1705			    RL_TDESC_STAT_COLCNT))
1706				ifp->if_collisions++;
1707			if (txstat & RL_TDESC_STAT_TXERRSUM)
1708				ifp->if_oerrors++;
1709			else
1710				ifp->if_opackets++;
1711		}
1712		sc->rl_ldata.rl_tx_free++;
1713		RL_DESC_INC(idx);
1714	}
1715
1716	/* No changes made to the TX ring, so no flush needed */
1717
1718	if (idx != sc->rl_ldata.rl_tx_considx) {
1719		sc->rl_ldata.rl_tx_considx = idx;
1720		ifp->if_flags &= ~IFF_OACTIVE;
1721		ifp->if_timer = 0;
1722	}
1723
1724	/*
1725	 * If not all descriptors have been released reaped yet,
1726	 * reload the timer so that we will eventually get another
1727	 * interrupt that will cause us to re-enter this routine.
1728	 * This is done in case the transmitter has gone idle.
1729	 */
1730	if (sc->rl_ldata.rl_tx_free != RL_TX_DESC_CNT)
1731		CSR_WRITE_4(sc, RL_TIMERCNT, 1);
1732}
1733
1734static void
1735re_tick(xsc)
1736	void			*xsc;
1737{
1738	struct rl_softc		*sc;
1739
1740	sc = xsc;
1741	RL_LOCK(sc);
1742	re_tick_locked(sc);
1743	RL_UNLOCK(sc);
1744}
1745
1746static void
1747re_tick_locked(sc)
1748	struct rl_softc		*sc;
1749{
1750	struct mii_data		*mii;
1751
1752	RL_LOCK_ASSERT(sc);
1753
1754	mii = device_get_softc(sc->rl_miibus);
1755
1756	mii_tick(mii);
1757
1758	sc->rl_stat_ch = timeout(re_tick, sc, hz);
1759}
1760
1761#ifdef DEVICE_POLLING
1762static void
1763re_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1764{
1765	struct rl_softc *sc = ifp->if_softc;
1766
1767	RL_LOCK(sc);
1768	re_poll_locked(ifp, cmd, count);
1769	RL_UNLOCK(sc);
1770}
1771
1772static void
1773re_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count)
1774{
1775	struct rl_softc *sc = ifp->if_softc;
1776
1777	RL_LOCK_ASSERT(sc);
1778
1779	if (!(ifp->if_capenable & IFCAP_POLLING)) {
1780		ether_poll_deregister(ifp);
1781		cmd = POLL_DEREGISTER;
1782	}
1783	if (cmd == POLL_DEREGISTER) { /* final call, enable interrupts */
1784		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
1785		return;
1786	}
1787
1788	sc->rxcycles = count;
1789	re_rxeof(sc);
1790	re_txeof(sc);
1791
1792	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1793		re_start_locked(ifp);
1794
1795	if (cmd == POLL_AND_CHECK_STATUS) { /* also check status register */
1796		u_int16_t       status;
1797
1798		status = CSR_READ_2(sc, RL_ISR);
1799		if (status == 0xffff)
1800			return;
1801		if (status)
1802			CSR_WRITE_2(sc, RL_ISR, status);
1803
1804		/*
1805		 * XXX check behaviour on receiver stalls.
1806		 */
1807
1808		if (status & RL_ISR_SYSTEM_ERR) {
1809			re_reset(sc);
1810			re_init_locked(sc);
1811		}
1812	}
1813}
1814#endif /* DEVICE_POLLING */
1815
1816static void
1817re_intr(arg)
1818	void			*arg;
1819{
1820	struct rl_softc		*sc;
1821	struct ifnet		*ifp;
1822	u_int16_t		status;
1823
1824	sc = arg;
1825
1826	RL_LOCK(sc);
1827
1828	ifp = &sc->arpcom.ac_if;
1829
1830	if (sc->suspended || !(ifp->if_flags & IFF_UP))
1831		goto done_locked;
1832
1833#ifdef DEVICE_POLLING
1834	if  (ifp->if_flags & IFF_POLLING)
1835		goto done_locked;
1836	if ((ifp->if_capenable & IFCAP_POLLING) &&
1837	    ether_poll_register(re_poll, ifp)) { /* ok, disable interrupts */
1838		CSR_WRITE_2(sc, RL_IMR, 0x0000);
1839		re_poll_locked(ifp, 0, 1);
1840		goto done_locked;
1841	}
1842#endif /* DEVICE_POLLING */
1843
1844	for (;;) {
1845
1846		status = CSR_READ_2(sc, RL_ISR);
1847		/* If the card has gone away the read returns 0xffff. */
1848		if (status == 0xffff)
1849			break;
1850		if (status)
1851			CSR_WRITE_2(sc, RL_ISR, status);
1852
1853		if ((status & RL_INTRS_CPLUS) == 0)
1854			break;
1855
1856		if ((status & RL_ISR_RX_OK) ||
1857		    (status & RL_ISR_RX_ERR))
1858			re_rxeof(sc);
1859
1860		if ((status & RL_ISR_TIMEOUT_EXPIRED) ||
1861		    (status & RL_ISR_TX_ERR) ||
1862		    (status & RL_ISR_TX_DESC_UNAVAIL))
1863			re_txeof(sc);
1864
1865		if (status & RL_ISR_SYSTEM_ERR) {
1866			re_reset(sc);
1867			re_init_locked(sc);
1868		}
1869
1870		if (status & RL_ISR_LINKCHG) {
1871			untimeout(re_tick, sc, sc->rl_stat_ch);
1872			re_tick_locked(sc);
1873		}
1874	}
1875
1876	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1877		re_start_locked(ifp);
1878
1879done_locked:
1880	RL_UNLOCK(sc);
1881}
1882
1883static int
1884re_encap(sc, m_head, idx)
1885	struct rl_softc		*sc;
1886	struct mbuf		**m_head;
1887	int			*idx;
1888{
1889	struct mbuf		*m_new = NULL;
1890	struct rl_dmaload_arg	arg;
1891	bus_dmamap_t		map;
1892	int			error;
1893	struct m_tag		*mtag;
1894
1895	RL_LOCK_ASSERT(sc);
1896
1897	if (sc->rl_ldata.rl_tx_free <= 4)
1898		return (EFBIG);
1899
1900	/*
1901	 * Set up checksum offload. Note: checksum offload bits must
1902	 * appear in all descriptors of a multi-descriptor transmit
1903	 * attempt. This is according to testing done with an 8169
1904	 * chip. This is a requirement.
1905	 */
1906
1907	arg.rl_flags = 0;
1908
1909	if ((*m_head)->m_pkthdr.csum_flags & CSUM_IP)
1910		arg.rl_flags |= RL_TDESC_CMD_IPCSUM;
1911	if ((*m_head)->m_pkthdr.csum_flags & CSUM_TCP)
1912		arg.rl_flags |= RL_TDESC_CMD_TCPCSUM;
1913	if ((*m_head)->m_pkthdr.csum_flags & CSUM_UDP)
1914		arg.rl_flags |= RL_TDESC_CMD_UDPCSUM;
1915
1916	arg.sc = sc;
1917	arg.rl_idx = *idx;
1918	arg.rl_maxsegs = sc->rl_ldata.rl_tx_free;
1919	if (arg.rl_maxsegs > 4)
1920		arg.rl_maxsegs -= 4;
1921	arg.rl_ring = sc->rl_ldata.rl_tx_list;
1922
1923	map = sc->rl_ldata.rl_tx_dmamap[*idx];
1924	error = bus_dmamap_load_mbuf(sc->rl_ldata.rl_mtag, map,
1925	    *m_head, re_dma_map_desc, &arg, BUS_DMA_NOWAIT);
1926
1927	if (error && error != EFBIG) {
1928		printf("re%d: can't map mbuf (error %d)\n", sc->rl_unit, error);
1929		return (ENOBUFS);
1930	}
1931
1932	/* Too many segments to map, coalesce into a single mbuf */
1933
1934	if (error || arg.rl_maxsegs == 0) {
1935		m_new = m_defrag(*m_head, M_DONTWAIT);
1936		if (m_new == NULL)
1937			return (ENOBUFS);
1938		else
1939			*m_head = m_new;
1940
1941		arg.sc = sc;
1942		arg.rl_idx = *idx;
1943		arg.rl_maxsegs = sc->rl_ldata.rl_tx_free;
1944		arg.rl_ring = sc->rl_ldata.rl_tx_list;
1945
1946		error = bus_dmamap_load_mbuf(sc->rl_ldata.rl_mtag, map,
1947		    *m_head, re_dma_map_desc, &arg, BUS_DMA_NOWAIT);
1948		if (error) {
1949			printf("re%d: can't map mbuf (error %d)\n",
1950			    sc->rl_unit, error);
1951			return (EFBIG);
1952		}
1953	}
1954
1955	/*
1956	 * Insure that the map for this transmission
1957	 * is placed at the array index of the last descriptor
1958	 * in this chain.  (Swap last and first dmamaps.)
1959	 */
1960	sc->rl_ldata.rl_tx_dmamap[*idx] =
1961	    sc->rl_ldata.rl_tx_dmamap[arg.rl_idx];
1962	sc->rl_ldata.rl_tx_dmamap[arg.rl_idx] = map;
1963
1964	sc->rl_ldata.rl_tx_mbuf[arg.rl_idx] = *m_head;
1965	sc->rl_ldata.rl_tx_free -= arg.rl_maxsegs;
1966
1967	/*
1968	 * Set up hardware VLAN tagging. Note: vlan tag info must
1969	 * appear in the first descriptor of a multi-descriptor
1970	 * transmission attempt.
1971	 */
1972
1973	mtag = VLAN_OUTPUT_TAG(&sc->arpcom.ac_if, *m_head);
1974	if (mtag != NULL)
1975		sc->rl_ldata.rl_tx_list[*idx].rl_vlanctl =
1976		    htole32(htons(VLAN_TAG_VALUE(mtag)) | RL_TDESC_VLANCTL_TAG);
1977
1978	/* Transfer ownership of packet to the chip. */
1979
1980	sc->rl_ldata.rl_tx_list[arg.rl_idx].rl_cmdstat |=
1981	    htole32(RL_TDESC_CMD_OWN);
1982	if (*idx != arg.rl_idx)
1983		sc->rl_ldata.rl_tx_list[*idx].rl_cmdstat |=
1984		    htole32(RL_TDESC_CMD_OWN);
1985
1986	RL_DESC_INC(arg.rl_idx);
1987	*idx = arg.rl_idx;
1988
1989	return (0);
1990}
1991
1992static void
1993re_start(ifp)
1994	struct ifnet		*ifp;
1995{
1996	struct rl_softc		*sc;
1997
1998	sc = ifp->if_softc;
1999	RL_LOCK(sc);
2000	re_start_locked(ifp);
2001	RL_UNLOCK(sc);
2002}
2003
2004/*
2005 * Main transmit routine for C+ and gigE NICs.
2006 */
2007static void
2008re_start_locked(ifp)
2009	struct ifnet		*ifp;
2010{
2011	struct rl_softc		*sc;
2012	struct mbuf		*m_head = NULL;
2013	int			idx, queued = 0;
2014
2015	sc = ifp->if_softc;
2016
2017	RL_LOCK_ASSERT(sc);
2018
2019	idx = sc->rl_ldata.rl_tx_prodidx;
2020
2021	while (sc->rl_ldata.rl_tx_mbuf[idx] == NULL) {
2022		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2023		if (m_head == NULL)
2024			break;
2025
2026		if (re_encap(sc, &m_head, &idx)) {
2027			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2028			ifp->if_flags |= IFF_OACTIVE;
2029			break;
2030		}
2031
2032		/*
2033		 * If there's a BPF listener, bounce a copy of this frame
2034		 * to him.
2035		 */
2036		BPF_MTAP(ifp, m_head);
2037
2038		queued++;
2039	}
2040
2041	if (queued == 0)
2042		return;
2043
2044	/* Flush the TX descriptors */
2045
2046	bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
2047	    sc->rl_ldata.rl_tx_list_map,
2048	    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
2049
2050	sc->rl_ldata.rl_tx_prodidx = idx;
2051
2052	/*
2053	 * RealTek put the TX poll request register in a different
2054	 * location on the 8169 gigE chip. I don't know why.
2055	 */
2056
2057	if (sc->rl_type == RL_8169)
2058		CSR_WRITE_2(sc, RL_GTXSTART, RL_TXSTART_START);
2059	else
2060		CSR_WRITE_2(sc, RL_TXSTART, RL_TXSTART_START);
2061
2062	/*
2063	 * Use the countdown timer for interrupt moderation.
2064	 * 'TX done' interrupts are disabled. Instead, we reset the
2065	 * countdown timer, which will begin counting until it hits
2066	 * the value in the TIMERINT register, and then trigger an
2067	 * interrupt. Each time we write to the TIMERCNT register,
2068	 * the timer count is reset to 0.
2069	 */
2070	CSR_WRITE_4(sc, RL_TIMERCNT, 1);
2071
2072	/*
2073	 * Set a timeout in case the chip goes out to lunch.
2074	 */
2075	ifp->if_timer = 5;
2076}
2077
2078static void
2079re_init(xsc)
2080	void			*xsc;
2081{
2082	struct rl_softc		*sc = xsc;
2083
2084	RL_LOCK(sc);
2085	re_init_locked(sc);
2086	RL_UNLOCK(sc);
2087}
2088
2089static void
2090re_init_locked(sc)
2091	struct rl_softc		*sc;
2092{
2093	struct ifnet		*ifp = &sc->arpcom.ac_if;
2094	struct mii_data		*mii;
2095	u_int32_t		rxcfg = 0;
2096
2097	RL_LOCK_ASSERT(sc);
2098
2099	mii = device_get_softc(sc->rl_miibus);
2100
2101	/*
2102	 * Cancel pending I/O and free all RX/TX buffers.
2103	 */
2104	re_stop(sc);
2105
2106	/*
2107	 * Enable C+ RX and TX mode, as well as VLAN stripping and
2108	 * RX checksum offload. We must configure the C+ register
2109	 * before all others.
2110	 */
2111	CSR_WRITE_2(sc, RL_CPLUS_CMD, RL_CPLUSCMD_RXENB|
2112	    RL_CPLUSCMD_TXENB|RL_CPLUSCMD_PCI_MRW|
2113	    RL_CPLUSCMD_VLANSTRIP|
2114	    (ifp->if_capenable & IFCAP_RXCSUM ?
2115	    RL_CPLUSCMD_RXCSUM_ENB : 0));
2116
2117	/*
2118	 * Init our MAC address.  Even though the chipset
2119	 * documentation doesn't mention it, we need to enter "Config
2120	 * register write enable" mode to modify the ID registers.
2121	 */
2122	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_WRITECFG);
2123	CSR_WRITE_STREAM_4(sc, RL_IDR0,
2124	    *(u_int32_t *)(&sc->arpcom.ac_enaddr[0]));
2125	CSR_WRITE_STREAM_4(sc, RL_IDR4,
2126	    *(u_int32_t *)(&sc->arpcom.ac_enaddr[4]));
2127	CSR_WRITE_1(sc, RL_EECMD, RL_EEMODE_OFF);
2128
2129	/*
2130	 * For C+ mode, initialize the RX descriptors and mbufs.
2131	 */
2132	re_rx_list_init(sc);
2133	re_tx_list_init(sc);
2134
2135	/*
2136	 * Enable transmit and receive.
2137	 */
2138	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2139
2140	/*
2141	 * Set the initial TX and RX configuration.
2142	 */
2143	if (sc->rl_testmode) {
2144		if (sc->rl_type == RL_8169)
2145			CSR_WRITE_4(sc, RL_TXCFG,
2146			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON);
2147		else
2148			CSR_WRITE_4(sc, RL_TXCFG,
2149			    RL_TXCFG_CONFIG|RL_LOOPTEST_ON_CPLUS);
2150	} else
2151		CSR_WRITE_4(sc, RL_TXCFG, RL_TXCFG_CONFIG);
2152	CSR_WRITE_4(sc, RL_RXCFG, RL_RXCFG_CONFIG);
2153
2154	/* Set the individual bit to receive frames for this host only. */
2155	rxcfg = CSR_READ_4(sc, RL_RXCFG);
2156	rxcfg |= RL_RXCFG_RX_INDIV;
2157
2158	/* If we want promiscuous mode, set the allframes bit. */
2159	if (ifp->if_flags & IFF_PROMISC)
2160		rxcfg |= RL_RXCFG_RX_ALLPHYS;
2161	else
2162		rxcfg &= ~RL_RXCFG_RX_ALLPHYS;
2163	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2164
2165	/*
2166	 * Set capture broadcast bit to capture broadcast frames.
2167	 */
2168	if (ifp->if_flags & IFF_BROADCAST)
2169		rxcfg |= RL_RXCFG_RX_BROAD;
2170	else
2171		rxcfg &= ~RL_RXCFG_RX_BROAD;
2172	CSR_WRITE_4(sc, RL_RXCFG, rxcfg);
2173
2174	/*
2175	 * Program the multicast filter, if necessary.
2176	 */
2177	re_setmulti(sc);
2178
2179#ifdef DEVICE_POLLING
2180	/*
2181	 * Disable interrupts if we are polling.
2182	 */
2183	if (ifp->if_flags & IFF_POLLING)
2184		CSR_WRITE_2(sc, RL_IMR, 0);
2185	else	/* otherwise ... */
2186#endif /* DEVICE_POLLING */
2187	/*
2188	 * Enable interrupts.
2189	 */
2190	if (sc->rl_testmode)
2191		CSR_WRITE_2(sc, RL_IMR, 0);
2192	else
2193		CSR_WRITE_2(sc, RL_IMR, RL_INTRS_CPLUS);
2194
2195	/* Set initial TX threshold */
2196	sc->rl_txthresh = RL_TX_THRESH_INIT;
2197
2198	/* Start RX/TX process. */
2199	CSR_WRITE_4(sc, RL_MISSEDPKT, 0);
2200#ifdef notdef
2201	/* Enable receiver and transmitter. */
2202	CSR_WRITE_1(sc, RL_COMMAND, RL_CMD_TX_ENB|RL_CMD_RX_ENB);
2203#endif
2204	/*
2205	 * Load the addresses of the RX and TX lists into the chip.
2206	 */
2207
2208	CSR_WRITE_4(sc, RL_RXLIST_ADDR_HI,
2209	    RL_ADDR_HI(sc->rl_ldata.rl_rx_list_addr));
2210	CSR_WRITE_4(sc, RL_RXLIST_ADDR_LO,
2211	    RL_ADDR_LO(sc->rl_ldata.rl_rx_list_addr));
2212
2213	CSR_WRITE_4(sc, RL_TXLIST_ADDR_HI,
2214	    RL_ADDR_HI(sc->rl_ldata.rl_tx_list_addr));
2215	CSR_WRITE_4(sc, RL_TXLIST_ADDR_LO,
2216	    RL_ADDR_LO(sc->rl_ldata.rl_tx_list_addr));
2217
2218	CSR_WRITE_1(sc, RL_EARLY_TX_THRESH, 16);
2219
2220	/*
2221	 * Initialize the timer interrupt register so that
2222	 * a timer interrupt will be generated once the timer
2223	 * reaches a certain number of ticks. The timer is
2224	 * reloaded on each transmit. This gives us TX interrupt
2225	 * moderation, which dramatically improves TX frame rate.
2226	 */
2227	if (sc->rl_type == RL_8169)
2228		CSR_WRITE_4(sc, RL_TIMERINT_8169, 0x800);
2229	else
2230		CSR_WRITE_4(sc, RL_TIMERINT, 0x400);
2231
2232	/*
2233	 * For 8169 gigE NICs, set the max allowed RX packet
2234	 * size so we can receive jumbo frames.
2235	 */
2236	if (sc->rl_type == RL_8169)
2237		CSR_WRITE_2(sc, RL_MAXRXPKTLEN, 16383);
2238
2239	if (sc->rl_testmode)
2240		return;
2241
2242	mii_mediachg(mii);
2243
2244	CSR_WRITE_1(sc, RL_CFG1, RL_CFG1_DRVLOAD|RL_CFG1_FULLDUPLEX);
2245
2246	ifp->if_flags |= IFF_RUNNING;
2247	ifp->if_flags &= ~IFF_OACTIVE;
2248
2249	sc->rl_stat_ch = timeout(re_tick, sc, hz);
2250}
2251
2252/*
2253 * Set media options.
2254 */
2255static int
2256re_ifmedia_upd(ifp)
2257	struct ifnet		*ifp;
2258{
2259	struct rl_softc		*sc;
2260	struct mii_data		*mii;
2261
2262	sc = ifp->if_softc;
2263	mii = device_get_softc(sc->rl_miibus);
2264	mii_mediachg(mii);
2265
2266	return (0);
2267}
2268
2269/*
2270 * Report current media status.
2271 */
2272static void
2273re_ifmedia_sts(ifp, ifmr)
2274	struct ifnet		*ifp;
2275	struct ifmediareq	*ifmr;
2276{
2277	struct rl_softc		*sc;
2278	struct mii_data		*mii;
2279
2280	sc = ifp->if_softc;
2281	mii = device_get_softc(sc->rl_miibus);
2282
2283	mii_pollstat(mii);
2284	ifmr->ifm_active = mii->mii_media_active;
2285	ifmr->ifm_status = mii->mii_media_status;
2286}
2287
2288static int
2289re_ioctl(ifp, command, data)
2290	struct ifnet		*ifp;
2291	u_long			command;
2292	caddr_t			data;
2293{
2294	struct rl_softc		*sc = ifp->if_softc;
2295	struct ifreq		*ifr = (struct ifreq *) data;
2296	struct mii_data		*mii;
2297	int			error = 0;
2298
2299	switch (command) {
2300	case SIOCSIFMTU:
2301		if (ifr->ifr_mtu > RL_JUMBO_MTU)
2302			error = EINVAL;
2303		ifp->if_mtu = ifr->ifr_mtu;
2304		break;
2305	case SIOCSIFFLAGS:
2306		RL_LOCK(sc);
2307		if (ifp->if_flags & IFF_UP)
2308			re_init_locked(sc);
2309		else if (ifp->if_flags & IFF_RUNNING)
2310			re_stop(sc);
2311		RL_UNLOCK(sc);
2312		error = 0;
2313		break;
2314	case SIOCADDMULTI:
2315	case SIOCDELMULTI:
2316		RL_LOCK(sc);
2317		re_setmulti(sc);
2318		RL_UNLOCK(sc);
2319		error = 0;
2320		break;
2321	case SIOCGIFMEDIA:
2322	case SIOCSIFMEDIA:
2323		mii = device_get_softc(sc->rl_miibus);
2324		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
2325		break;
2326	case SIOCSIFCAP:
2327		ifp->if_capenable &= ~(IFCAP_HWCSUM | IFCAP_POLLING);
2328		ifp->if_capenable |=
2329		    ifr->ifr_reqcap & (IFCAP_HWCSUM | IFCAP_POLLING);
2330		if (ifp->if_capenable & IFCAP_TXCSUM)
2331			ifp->if_hwassist = RE_CSUM_FEATURES;
2332		else
2333			ifp->if_hwassist = 0;
2334		if (ifp->if_flags & IFF_RUNNING)
2335			re_init(sc);
2336		break;
2337	default:
2338		error = ether_ioctl(ifp, command, data);
2339		break;
2340	}
2341
2342	return (error);
2343}
2344
2345static void
2346re_watchdog(ifp)
2347	struct ifnet		*ifp;
2348{
2349	struct rl_softc		*sc;
2350
2351	sc = ifp->if_softc;
2352	RL_LOCK(sc);
2353	printf("re%d: watchdog timeout\n", sc->rl_unit);
2354	ifp->if_oerrors++;
2355
2356	re_txeof(sc);
2357	re_rxeof(sc);
2358	re_init_locked(sc);
2359
2360	RL_UNLOCK(sc);
2361}
2362
2363/*
2364 * Stop the adapter and free any mbufs allocated to the
2365 * RX and TX lists.
2366 */
2367static void
2368re_stop(sc)
2369	struct rl_softc		*sc;
2370{
2371	register int		i;
2372	struct ifnet		*ifp;
2373
2374	RL_LOCK_ASSERT(sc);
2375
2376	ifp = &sc->arpcom.ac_if;
2377	ifp->if_timer = 0;
2378
2379	untimeout(re_tick, sc, sc->rl_stat_ch);
2380	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2381#ifdef DEVICE_POLLING
2382	ether_poll_deregister(ifp);
2383#endif /* DEVICE_POLLING */
2384
2385	CSR_WRITE_1(sc, RL_COMMAND, 0x00);
2386	CSR_WRITE_2(sc, RL_IMR, 0x0000);
2387
2388	if (sc->rl_head != NULL) {
2389		m_freem(sc->rl_head);
2390		sc->rl_head = sc->rl_tail = NULL;
2391	}
2392
2393	/* Free the TX list buffers. */
2394
2395	for (i = 0; i < RL_TX_DESC_CNT; i++) {
2396		if (sc->rl_ldata.rl_tx_mbuf[i] != NULL) {
2397			bus_dmamap_unload(sc->rl_ldata.rl_mtag,
2398			    sc->rl_ldata.rl_tx_dmamap[i]);
2399			m_freem(sc->rl_ldata.rl_tx_mbuf[i]);
2400			sc->rl_ldata.rl_tx_mbuf[i] = NULL;
2401		}
2402	}
2403
2404	/* Free the RX list buffers. */
2405
2406	for (i = 0; i < RL_RX_DESC_CNT; i++) {
2407		if (sc->rl_ldata.rl_rx_mbuf[i] != NULL) {
2408			bus_dmamap_unload(sc->rl_ldata.rl_mtag,
2409			    sc->rl_ldata.rl_rx_dmamap[i]);
2410			m_freem(sc->rl_ldata.rl_rx_mbuf[i]);
2411			sc->rl_ldata.rl_rx_mbuf[i] = NULL;
2412		}
2413	}
2414}
2415
2416/*
2417 * Device suspend routine.  Stop the interface and save some PCI
2418 * settings in case the BIOS doesn't restore them properly on
2419 * resume.
2420 */
2421static int
2422re_suspend(dev)
2423	device_t		dev;
2424{
2425	struct rl_softc		*sc;
2426
2427	sc = device_get_softc(dev);
2428
2429	RL_LOCK(sc);
2430	re_stop(sc);
2431	sc->suspended = 1;
2432	RL_UNLOCK(sc);
2433
2434	return (0);
2435}
2436
2437/*
2438 * Device resume routine.  Restore some PCI settings in case the BIOS
2439 * doesn't, re-enable busmastering, and restart the interface if
2440 * appropriate.
2441 */
2442static int
2443re_resume(dev)
2444	device_t		dev;
2445{
2446	struct rl_softc		*sc;
2447	struct ifnet		*ifp;
2448
2449	sc = device_get_softc(dev);
2450
2451	RL_LOCK(sc);
2452
2453	ifp = &sc->arpcom.ac_if;
2454
2455	/* reinitialize interface if necessary */
2456	if (ifp->if_flags & IFF_UP)
2457		re_init_locked(sc);
2458
2459	sc->suspended = 0;
2460	RL_UNLOCK(sc);
2461
2462	return (0);
2463}
2464
2465/*
2466 * Stop all chip I/O so that the kernel's probe routines don't
2467 * get confused by errant DMAs when rebooting.
2468 */
2469static void
2470re_shutdown(dev)
2471	device_t		dev;
2472{
2473	struct rl_softc		*sc;
2474
2475	sc = device_get_softc(dev);
2476
2477	RL_LOCK(sc);
2478	re_stop(sc);
2479	RL_UNLOCK(sc);
2480}
2481