if_em.c revision 162425
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162425 2006-09-18 23:44:12Z andre $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78#include <dev/em/if_em_hw.h>
79#include <dev/em/if_em.h>
80
81/*********************************************************************
82 *  Set this to one to display debug statistics
83 *********************************************************************/
84int	em_display_debug_stats = 0;
85
86/*********************************************************************
87 *  Driver version
88 *********************************************************************/
89
90char em_driver_version[] = "Version - 6.1.4 - TSO";
91
92
93/*********************************************************************
94 *  PCI Device ID Table
95 *
96 *  Used by probe to select devices to load on
97 *  Last field stores an index into em_strings
98 *  Last entry must be all 0s
99 *
100 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
101 *********************************************************************/
102
103static em_vendor_info_t em_vendor_info_array[] =
104{
105	/* Intel(R) PRO/1000 Network Connection */
106	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
111
112	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119
120	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
129
130	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146
147	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150
151	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
155						PCI_ANY_ID, PCI_ANY_ID, 0},
156
157	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
161
162	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
166						PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
168						PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
170						PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
176
177	/* required last entry */
178	{ 0, 0, 0, 0, 0}
179};
180
181/*********************************************************************
182 *  Table of branding strings for all supported NICs.
183 *********************************************************************/
184
185static char *em_strings[] = {
186	"Intel(R) PRO/1000 Network Connection"
187};
188
189/*********************************************************************
190 *  Function prototypes
191 *********************************************************************/
192static int	em_probe(device_t);
193static int	em_attach(device_t);
194static int	em_detach(device_t);
195static int	em_shutdown(device_t);
196static int	em_suspend(device_t);
197static int	em_resume(device_t);
198static void	em_start(struct ifnet *);
199static void	em_start_locked(struct ifnet *ifp);
200static int	em_ioctl(struct ifnet *, u_long, caddr_t);
201static void	em_watchdog(struct ifnet *);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_intr(struct adapter *);
210static void	em_free_intr(struct adapter *);
211static void	em_free_pci_resources(struct adapter *);
212static void	em_local_timer(void *);
213static int	em_hardware_init(struct adapter *);
214static void	em_setup_interface(device_t, struct adapter *);
215static int	em_setup_transmit_structures(struct adapter *);
216static void	em_initialize_transmit_unit(struct adapter *);
217static int	em_setup_receive_structures(struct adapter *);
218static void	em_initialize_receive_unit(struct adapter *);
219static void	em_enable_intr(struct adapter *);
220static void	em_disable_intr(struct adapter *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_receive_structures(struct adapter *);
223static void	em_update_stats_counters(struct adapter *);
224static void	em_txeof(struct adapter *);
225static int	em_allocate_receive_structures(struct adapter *);
226static int	em_allocate_transmit_structures(struct adapter *);
227static int	em_rxeof(struct adapter *, int);
228#ifndef __NO_STRICT_ALIGNMENT
229static int	em_fixup_rx(struct adapter *);
230#endif
231static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
232		    struct mbuf *);
233static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
234		    uint32_t *, uint32_t *);
235static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
236		    uint32_t *, uint32_t *);
237static void	em_set_promisc(struct adapter *);
238static void	em_disable_promisc(struct adapter *);
239static void	em_set_multi(struct adapter *);
240static void	em_print_hw_stats(struct adapter *);
241static void	em_update_link_status(struct adapter *);
242static int	em_get_buf(struct adapter *, int);
243static void	em_enable_vlans(struct adapter *);
244static void	em_disable_vlans(struct adapter *);
245static int	em_encap(struct adapter *, struct mbuf **);
246static void	em_smartspeed(struct adapter *);
247static int	em_82547_fifo_workaround(struct adapter *, int);
248static void	em_82547_update_fifo_head(struct adapter *, int);
249static int	em_82547_tx_fifo_reset(struct adapter *);
250static void	em_82547_move_tail(void *arg);
251static void	em_82547_move_tail_locked(struct adapter *);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static int 	em_is_valid_ether_addr(uint8_t *);
257static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
258static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
259static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
260		    PDESC_ARRAY desc_array);
261static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
262static void	em_add_int_delay_sysctl(struct adapter *, const char *,
263		const char *, struct em_int_delay_info *, int, int);
264
265/*
266 * Fast interrupt handler and legacy ithread/polling modes are
267 * mutually exclusive.
268 */
269#ifdef DEVICE_POLLING
270static poll_handler_t em_poll;
271static void	em_intr(void *);
272#else
273static void	em_intr_fast(void *);
274static void	em_add_int_process_limit(struct adapter *, const char *,
275		const char *, int *, int);
276static void	em_handle_rxtx(void *context, int pending);
277static void	em_handle_link(void *context, int pending);
278#endif
279
280/*********************************************************************
281 *  FreeBSD Device Interface Entry Points
282 *********************************************************************/
283
284static device_method_t em_methods[] = {
285	/* Device interface */
286	DEVMETHOD(device_probe, em_probe),
287	DEVMETHOD(device_attach, em_attach),
288	DEVMETHOD(device_detach, em_detach),
289	DEVMETHOD(device_shutdown, em_shutdown),
290	DEVMETHOD(device_suspend, em_suspend),
291	DEVMETHOD(device_resume, em_resume),
292	{0, 0}
293};
294
295static driver_t em_driver = {
296	"em", em_methods, sizeof(struct adapter),
297};
298
299static devclass_t em_devclass;
300DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
301MODULE_DEPEND(em, pci, 1, 1, 1);
302MODULE_DEPEND(em, ether, 1, 1, 1);
303
304/*********************************************************************
305 *  Tunable default values.
306 *********************************************************************/
307
308#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
309#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
310#define M_TSO_LEN			66
311
312static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
313static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
314static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
315static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
316static int em_rxd = EM_DEFAULT_RXD;
317static int em_txd = EM_DEFAULT_TXD;
318static int em_smart_pwr_down = FALSE;
319
320TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
322TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rxd", &em_rxd);
325TUNABLE_INT("hw.em.txd", &em_txd);
326TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
327#ifndef DEVICE_POLLING
328static int em_rx_process_limit = 100;
329TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
330#endif
331
332/*********************************************************************
333 *  Device identification routine
334 *
335 *  em_probe determines if the driver should be loaded on
336 *  adapter based on PCI vendor/device id of the adapter.
337 *
338 *  return BUS_PROBE_DEFAULT on success, positive on failure
339 *********************************************************************/
340
341static int
342em_probe(device_t dev)
343{
344	char		adapter_name[60];
345	uint16_t	pci_vendor_id = 0;
346	uint16_t	pci_device_id = 0;
347	uint16_t	pci_subvendor_id = 0;
348	uint16_t	pci_subdevice_id = 0;
349	em_vendor_info_t *ent;
350
351	INIT_DEBUGOUT("em_probe: begin");
352
353	pci_vendor_id = pci_get_vendor(dev);
354	if (pci_vendor_id != EM_VENDOR_ID)
355		return (ENXIO);
356
357	pci_device_id = pci_get_device(dev);
358	pci_subvendor_id = pci_get_subvendor(dev);
359	pci_subdevice_id = pci_get_subdevice(dev);
360
361	ent = em_vendor_info_array;
362	while (ent->vendor_id != 0) {
363		if ((pci_vendor_id == ent->vendor_id) &&
364		    (pci_device_id == ent->device_id) &&
365
366		    ((pci_subvendor_id == ent->subvendor_id) ||
367		    (ent->subvendor_id == PCI_ANY_ID)) &&
368
369		    ((pci_subdevice_id == ent->subdevice_id) ||
370		    (ent->subdevice_id == PCI_ANY_ID))) {
371			sprintf(adapter_name, "%s %s",
372				em_strings[ent->index],
373				em_driver_version);
374			device_set_desc_copy(dev, adapter_name);
375			return (BUS_PROBE_DEFAULT);
376		}
377		ent++;
378	}
379
380	return (ENXIO);
381}
382
383/*********************************************************************
384 *  Device initialization routine
385 *
386 *  The attach entry point is called when the driver is being loaded.
387 *  This routine identifies the type of hardware, allocates all resources
388 *  and initializes the hardware.
389 *
390 *  return 0 on success, positive on failure
391 *********************************************************************/
392
393static int
394em_attach(device_t dev)
395{
396	struct adapter	*adapter;
397	int		tsize, rsize;
398	int		error = 0;
399
400	INIT_DEBUGOUT("em_attach: begin");
401
402	adapter = device_get_softc(dev);
403	adapter->dev = adapter->osdep.dev = dev;
404	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
405
406	/* SYSCTL stuff */
407	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
409	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
410	    em_sysctl_debug_info, "I", "Debug Information");
411
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_stats, "I", "Statistics");
416
417	callout_init(&adapter->timer, CALLOUT_MPSAFE);
418	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
419
420	/* Determine hardware revision */
421	em_identify_hardware(adapter);
422
423	/* Set up some sysctls for the tunable interrupt delays */
424	em_add_int_delay_sysctl(adapter, "rx_int_delay",
425	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
426	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
427	em_add_int_delay_sysctl(adapter, "tx_int_delay",
428	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
429	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
430	if (adapter->hw.mac_type >= em_82540) {
431		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
432		    "receive interrupt delay limit in usecs",
433		    &adapter->rx_abs_int_delay,
434		    E1000_REG_OFFSET(&adapter->hw, RADV),
435		    em_rx_abs_int_delay_dflt);
436		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
437		    "transmit interrupt delay limit in usecs",
438		    &adapter->tx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, TADV),
440		    em_tx_abs_int_delay_dflt);
441	}
442
443#ifndef DEVICE_POLLING
444	/* Sysctls for limiting the amount of work done in the taskqueue */
445	em_add_int_process_limit(adapter, "rx_processing_limit",
446	    "max number of rx packets to process", &adapter->rx_process_limit,
447	    em_rx_process_limit);
448#endif
449
450	/*
451	 * Validate number of transmit and receive descriptors. It
452	 * must not exceed hardware maximum, and must be multiple
453	 * of EM_DBA_ALIGN.
454	 */
455	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
456	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
457	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
458	    (em_txd < EM_MIN_TXD)) {
459		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
460		    EM_DEFAULT_TXD, em_txd);
461		adapter->num_tx_desc = EM_DEFAULT_TXD;
462	} else
463		adapter->num_tx_desc = em_txd;
464	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
465	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
466	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
467	    (em_rxd < EM_MIN_RXD)) {
468		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
469		    EM_DEFAULT_RXD, em_rxd);
470		adapter->num_rx_desc = EM_DEFAULT_RXD;
471	} else
472		adapter->num_rx_desc = em_rxd;
473
474	adapter->hw.autoneg = DO_AUTO_NEG;
475	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
476	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
477	adapter->hw.tbi_compatibility_en = TRUE;
478	adapter->rx_buffer_len = EM_RXBUFFER_2048;
479
480	adapter->hw.phy_init_script = 1;
481	adapter->hw.phy_reset_disable = FALSE;
482
483#ifndef EM_MASTER_SLAVE
484	adapter->hw.master_slave = em_ms_hw_default;
485#else
486	adapter->hw.master_slave = EM_MASTER_SLAVE;
487#endif
488	/*
489	 * Set the max frame size assuming standard ethernet
490	 * sized frames.
491	 */
492	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
493
494	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
495
496	/*
497	 * This controls when hardware reports transmit completion
498	 * status.
499	 */
500	adapter->hw.report_tx_early = 1;
501	if (em_allocate_pci_resources(adapter)) {
502		device_printf(dev, "Allocation of PCI resources failed\n");
503		error = ENXIO;
504		goto err_pci;
505	}
506
507	/* Initialize eeprom parameters */
508	em_init_eeprom_params(&adapter->hw);
509
510	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
511	    EM_DBA_ALIGN);
512
513	/* Allocate Transmit Descriptor ring */
514	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
515		device_printf(dev, "Unable to allocate tx_desc memory\n");
516		error = ENOMEM;
517		goto err_tx_desc;
518	}
519	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
520
521	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
522	    EM_DBA_ALIGN);
523
524	/* Allocate Receive Descriptor ring */
525	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
526		device_printf(dev, "Unable to allocate rx_desc memory\n");
527		error = ENOMEM;
528		goto err_rx_desc;
529	}
530	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
531
532	/* Initialize the hardware */
533	if (em_hardware_init(adapter)) {
534		device_printf(dev, "Unable to initialize the hardware\n");
535		error = EIO;
536		goto err_hw_init;
537	}
538
539	/* Copy the permanent MAC address out of the EEPROM */
540	if (em_read_mac_addr(&adapter->hw) < 0) {
541		device_printf(dev, "EEPROM read error while reading MAC"
542		    " address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
548		device_printf(dev, "Invalid MAC address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	/* Setup OS specific network interface */
554	em_setup_interface(dev, adapter);
555
556	em_allocate_intr(adapter);
557
558	/* Initialize statistics */
559	em_clear_hw_cntrs(&adapter->hw);
560	em_update_stats_counters(adapter);
561	adapter->hw.get_link_status = 1;
562	em_update_link_status(adapter);
563
564	/* Indicate SOL/IDER usage */
565	if (em_check_phy_reset_block(&adapter->hw))
566		device_printf(dev,
567		    "PHY reset is blocked due to SOL/IDER session.\n");
568
569	/* Identify 82544 on PCIX */
570	em_get_bus_info(&adapter->hw);
571	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
572		adapter->pcix_82544 = TRUE;
573	else
574		adapter->pcix_82544 = FALSE;
575
576	INIT_DEBUGOUT("em_attach: end");
577
578	return (0);
579
580err_hw_init:
581	em_dma_free(adapter, &adapter->rxdma);
582err_rx_desc:
583	em_dma_free(adapter, &adapter->txdma);
584err_tx_desc:
585err_pci:
586	em_free_intr(adapter);
587	em_free_pci_resources(adapter);
588	EM_LOCK_DESTROY(adapter);
589
590	return (error);
591}
592
593/*********************************************************************
594 *  Device removal routine
595 *
596 *  The detach entry point is called when the driver is being removed.
597 *  This routine stops the adapter and deallocates all the resources
598 *  that were allocated for driver operation.
599 *
600 *  return 0 on success, positive on failure
601 *********************************************************************/
602
603static int
604em_detach(device_t dev)
605{
606	struct adapter	*adapter = device_get_softc(dev);
607	struct ifnet	*ifp = adapter->ifp;
608
609	INIT_DEBUGOUT("em_detach: begin");
610
611#ifdef DEVICE_POLLING
612	if (ifp->if_capenable & IFCAP_POLLING)
613		ether_poll_deregister(ifp);
614#endif
615
616	em_free_intr(adapter);
617	EM_LOCK(adapter);
618	adapter->in_detach = 1;
619	em_stop(adapter);
620	em_phy_hw_reset(&adapter->hw);
621	EM_UNLOCK(adapter);
622	ether_ifdetach(adapter->ifp);
623
624	em_free_pci_resources(adapter);
625	bus_generic_detach(dev);
626	if_free(ifp);
627
628	/* Free Transmit Descriptor ring */
629	if (adapter->tx_desc_base) {
630		em_dma_free(adapter, &adapter->txdma);
631		adapter->tx_desc_base = NULL;
632	}
633
634	/* Free Receive Descriptor ring */
635	if (adapter->rx_desc_base) {
636		em_dma_free(adapter, &adapter->rxdma);
637		adapter->rx_desc_base = NULL;
638	}
639
640	EM_LOCK_DESTROY(adapter);
641
642	return (0);
643}
644
645/*********************************************************************
646 *
647 *  Shutdown entry point
648 *
649 **********************************************************************/
650
651static int
652em_shutdown(device_t dev)
653{
654	struct adapter *adapter = device_get_softc(dev);
655	EM_LOCK(adapter);
656	em_stop(adapter);
657	EM_UNLOCK(adapter);
658	return (0);
659}
660
661/*
662 * Suspend/resume device methods.
663 */
664static int
665em_suspend(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668
669	EM_LOCK(adapter);
670	em_stop(adapter);
671	EM_UNLOCK(adapter);
672
673	return bus_generic_suspend(dev);
674}
675
676static int
677em_resume(device_t dev)
678{
679	struct adapter *adapter = device_get_softc(dev);
680	struct ifnet *ifp = adapter->ifp;
681
682	EM_LOCK(adapter);
683	em_init_locked(adapter);
684	if ((ifp->if_flags & IFF_UP) &&
685	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
686		em_start_locked(ifp);
687	EM_UNLOCK(adapter);
688
689	return bus_generic_resume(dev);
690}
691
692
693/*********************************************************************
694 *  Transmit entry point
695 *
696 *  em_start is called by the stack to initiate a transmit.
697 *  The driver will remain in this routine as long as there are
698 *  packets to transmit and transmit resources are available.
699 *  In case resources are not available stack is notified and
700 *  the packet is requeued.
701 **********************************************************************/
702
703static void
704em_start_locked(struct ifnet *ifp)
705{
706	struct adapter	*adapter = ifp->if_softc;
707	struct mbuf	*m_head;
708
709	EM_LOCK_ASSERT(adapter);
710
711	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
712	    IFF_DRV_RUNNING)
713		return;
714	if (!adapter->link_active)
715		return;
716
717	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
718
719		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
720		if (m_head == NULL)
721			break;
722		/*
723		 * em_encap() can modify our pointer, and or make it NULL on
724		 * failure.  In that event, we can't requeue.
725		 */
726		if (em_encap(adapter, &m_head)) {
727			if (m_head == NULL)
728				break;
729			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
730			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
731			break;
732		}
733
734		/* Send a copy of the frame to the BPF listener */
735		BPF_MTAP(ifp, m_head);
736
737		/* Set timeout in case hardware has problems transmitting. */
738		ifp->if_timer = EM_TX_TIMEOUT;
739	}
740}
741
742static void
743em_start(struct ifnet *ifp)
744{
745	struct adapter *adapter = ifp->if_softc;
746
747	EM_LOCK(adapter);
748	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749		em_start_locked(ifp);
750	EM_UNLOCK(adapter);
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  em_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775	case SIOCGIFADDR:
776		if (ifa->ifa_addr->sa_family == AF_INET) {
777			/*
778			 * XXX
779			 * Since resetting hardware takes a very long time
780			 * and results in link renegotiation we only
781			 * initialize the hardware only when it is absolutely
782			 * required.
783			 */
784			ifp->if_flags |= IFF_UP;
785			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
786				EM_LOCK(adapter);
787				em_init_locked(adapter);
788				EM_UNLOCK(adapter);
789			}
790			arp_ifinit(ifp, ifa);
791		} else
792			error = ether_ioctl(ifp, command, data);
793		break;
794	case SIOCSIFMTU:
795	    {
796		int max_frame_size;
797		uint16_t eeprom_data = 0;
798
799		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
800
801		EM_LOCK(adapter);
802		switch (adapter->hw.mac_type) {
803		case em_82573:
804			/*
805			 * 82573 only supports jumbo frames
806			 * if ASPM is disabled.
807			 */
808			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
809			    &eeprom_data);
810			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
811				max_frame_size = ETHER_MAX_LEN;
812				break;
813			}
814			/* Allow Jumbo frames - fall thru */
815		case em_82571:
816		case em_82572:
817		case em_80003es2lan:	/* Limit Jumbo Frame size */
818			max_frame_size = 9234;
819			break;
820		case em_ich8lan:
821			/* ICH8 does not support jumbo frames */
822			max_frame_size = ETHER_MAX_LEN;
823			break;
824		default:
825			max_frame_size = MAX_JUMBO_FRAME_SIZE;
826		}
827		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
828		    ETHER_CRC_LEN) {
829			EM_UNLOCK(adapter);
830			error = EINVAL;
831			break;
832		}
833
834		ifp->if_mtu = ifr->ifr_mtu;
835		adapter->hw.max_frame_size =
836		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
837		em_init_locked(adapter);
838		EM_UNLOCK(adapter);
839		break;
840	    }
841	case SIOCSIFFLAGS:
842		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
843		EM_LOCK(adapter);
844		if (ifp->if_flags & IFF_UP) {
845			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
846				if ((ifp->if_flags ^ adapter->if_flags) &
847				    IFF_PROMISC) {
848					em_disable_promisc(adapter);
849					em_set_promisc(adapter);
850				}
851			} else
852				em_init_locked(adapter);
853		} else {
854			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
855				em_stop(adapter);
856			}
857		}
858		adapter->if_flags = ifp->if_flags;
859		EM_UNLOCK(adapter);
860		break;
861	case SIOCADDMULTI:
862	case SIOCDELMULTI:
863		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
864		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
865			EM_LOCK(adapter);
866			em_disable_intr(adapter);
867			em_set_multi(adapter);
868			if (adapter->hw.mac_type == em_82542_rev2_0) {
869				em_initialize_receive_unit(adapter);
870			}
871#ifdef DEVICE_POLLING
872			if (!(ifp->if_capenable & IFCAP_POLLING))
873#endif
874				em_enable_intr(adapter);
875			EM_UNLOCK(adapter);
876		}
877		break;
878	case SIOCSIFMEDIA:
879	case SIOCGIFMEDIA:
880		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
881		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
882		break;
883	case SIOCSIFCAP:
884	    {
885		int mask, reinit;
886
887		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
888		reinit = 0;
889		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
890#ifdef DEVICE_POLLING
891		if (mask & IFCAP_POLLING) {
892			if (ifr->ifr_reqcap & IFCAP_POLLING) {
893				error = ether_poll_register(em_poll, ifp);
894				if (error)
895					return (error);
896				EM_LOCK(adapter);
897				em_disable_intr(adapter);
898				ifp->if_capenable |= IFCAP_POLLING;
899				EM_UNLOCK(adapter);
900			} else {
901				error = ether_poll_deregister(ifp);
902				/* Enable interrupt even in error case */
903				EM_LOCK(adapter);
904				em_enable_intr(adapter);
905				ifp->if_capenable &= ~IFCAP_POLLING;
906				EM_UNLOCK(adapter);
907			}
908		}
909#endif
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO) {
915			ifp->if_capenable ^= IFCAP_TSO;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
923			em_init(adapter);
924		VLAN_CAPABILITIES(ifp);
925		break;
926	    }
927	default:
928		error = ether_ioctl(ifp, command, data);
929		break;
930	}
931
932	return (error);
933}
934
935/*********************************************************************
936 *  Watchdog entry point
937 *
938 *  This routine is called whenever hardware quits transmitting.
939 *
940 **********************************************************************/
941
942static void
943em_watchdog(struct ifnet *ifp)
944{
945	struct adapter *adapter = ifp->if_softc;
946
947	EM_LOCK(adapter);
948	/* If we are in this routine because of pause frames, then
949	 * don't reset the hardware.
950	 */
951	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
952		ifp->if_timer = EM_TX_TIMEOUT;
953		EM_UNLOCK(adapter);
954		return;
955	}
956
957	/*
958	 * Reclaim first as there is a possibility of losing Tx completion
959	 * interrupts. Possible cause of missing Tx completion interrupts
960	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
961	 * or chipset bug.
962	 */
963	em_txeof(adapter);
964	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
965		EM_UNLOCK(adapter);
966		return;
967	}
968
969	if (em_check_for_link(&adapter->hw) == 0)
970		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
971
972	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973	adapter->watchdog_events++;
974
975	em_init_locked(adapter);
976	EM_UNLOCK(adapter);
977}
978
979/*********************************************************************
980 *  Init entry point
981 *
982 *  This routine is used in two ways. It is used by the stack as
983 *  init entry point in network interface structure. It is also used
984 *  by the driver as a hw/sw initialization routine to get to a
985 *  consistent state.
986 *
987 *  return 0 on success, positive on failure
988 **********************************************************************/
989
990static void
991em_init_locked(struct adapter *adapter)
992{
993	struct ifnet	*ifp = adapter->ifp;
994	device_t	dev = adapter->dev;
995	uint32_t	pba;
996
997	INIT_DEBUGOUT("em_init: begin");
998
999	EM_LOCK_ASSERT(adapter);
1000
1001	em_stop(adapter);
1002
1003	/*
1004	 * Packet Buffer Allocation (PBA)
1005	 * Writing PBA sets the receive portion of the buffer
1006	 * the remainder is used for the transmit buffer.
1007	 *
1008	 * Devices before the 82547 had a Packet Buffer of 64K.
1009	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1010	 * After the 82547 the buffer was reduced to 40K.
1011	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1012	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1013	 */
1014	switch (adapter->hw.mac_type) {
1015	case em_82547:
1016	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1017		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1019		else
1020			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1021		adapter->tx_fifo_head = 0;
1022		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1023		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1024		break;
1025	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1026	case em_82571: /* 82571: Total Packet Buffer is 48K */
1027	case em_82572: /* 82572: Total Packet Buffer is 48K */
1028			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		break;
1030	case em_82573: /* 82573: Total Packet Buffer is 32K */
1031		/* Jumbo frames not supported */
1032			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1033		break;
1034	case em_ich8lan:
1035		pba = E1000_PBA_8K;
1036		break;
1037	default:
1038		/* Devices before 82547 had a Packet Buffer of 64K.   */
1039		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1040			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1041		else
1042			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1043	}
1044
1045	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1046	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1047
1048	/* Get the latest mac address, User can use a LAA */
1049	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1050
1051	/* Initialize the hardware */
1052	if (em_hardware_init(adapter)) {
1053		device_printf(dev, "Unable to initialize the hardware\n");
1054		return;
1055	}
1056	em_update_link_status(adapter);
1057
1058	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1059		em_enable_vlans(adapter);
1060
1061	/* Prepare transmit descriptors and buffers */
1062	if (em_setup_transmit_structures(adapter)) {
1063		device_printf(dev, "Could not setup transmit structures\n");
1064		em_stop(adapter);
1065		return;
1066	}
1067	em_initialize_transmit_unit(adapter);
1068
1069	/* Setup Multicast table */
1070	em_set_multi(adapter);
1071
1072	/* Prepare receive descriptors and buffers */
1073	if (em_setup_receive_structures(adapter)) {
1074		device_printf(dev, "Could not setup receive structures\n");
1075		em_stop(adapter);
1076		return;
1077	}
1078	em_initialize_receive_unit(adapter);
1079
1080	/* Don't lose promiscuous settings */
1081	em_set_promisc(adapter);
1082
1083	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1084	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1085
1086	ifp->if_hwassist = 0;
1087	if (adapter->hw.mac_type >= em_82543) {
1088		if (ifp->if_capenable & IFCAP_TXCSUM)
1089			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1090		if (ifp->if_capenable & IFCAP_TSO)
1091			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1092	}
1093
1094	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1095	em_clear_hw_cntrs(&adapter->hw);
1096#ifdef DEVICE_POLLING
1097	/*
1098	 * Only enable interrupts if we are not polling, make sure
1099	 * they are off otherwise.
1100	 */
1101	if (ifp->if_capenable & IFCAP_POLLING)
1102		em_disable_intr(adapter);
1103	else
1104#endif /* DEVICE_POLLING */
1105		em_enable_intr(adapter);
1106
1107	/* Don't reset the phy next time init gets called */
1108	adapter->hw.phy_reset_disable = TRUE;
1109}
1110
1111static void
1112em_init(void *arg)
1113{
1114	struct adapter *adapter = arg;
1115
1116	EM_LOCK(adapter);
1117	em_init_locked(adapter);
1118	EM_UNLOCK(adapter);
1119}
1120
1121
1122#ifdef DEVICE_POLLING
1123/*********************************************************************
1124 *
1125 *  Legacy polling routine
1126 *
1127 *********************************************************************/
1128static void
1129em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1130{
1131	struct adapter *adapter = ifp->if_softc;
1132	uint32_t reg_icr;
1133
1134	EM_LOCK(adapter);
1135	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1136		EM_UNLOCK(adapter);
1137		return;
1138	}
1139
1140	if (cmd == POLL_AND_CHECK_STATUS) {
1141		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1142		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1143			callout_stop(&adapter->timer);
1144			adapter->hw.get_link_status = 1;
1145			em_check_for_link(&adapter->hw);
1146			em_update_link_status(adapter);
1147			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1148		}
1149	}
1150	em_rxeof(adapter, count);
1151	em_txeof(adapter);
1152
1153	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1154		em_start_locked(ifp);
1155	EM_UNLOCK(adapter);
1156}
1157
1158/*********************************************************************
1159 *
1160 *  Legacy Interrupt Service routine
1161 *
1162 *********************************************************************/
1163static void
1164em_intr(void *arg)
1165{
1166	struct adapter	*adapter = arg;
1167	struct ifnet	*ifp;
1168	uint32_t	reg_icr;
1169
1170	EM_LOCK(adapter);
1171
1172	ifp = adapter->ifp;
1173
1174	if (ifp->if_capenable & IFCAP_POLLING) {
1175		EM_UNLOCK(adapter);
1176		return;
1177	}
1178
1179	for (;;) {
1180		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1181		if (adapter->hw.mac_type >= em_82571 &&
1182		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1183			break;
1184		else if (reg_icr == 0)
1185			break;
1186
1187		/*
1188		 * XXX: some laptops trigger several spurious interrupts
1189		 * on em(4) when in the resume cycle. The ICR register
1190		 * reports all-ones value in this case. Processing such
1191		 * interrupts would lead to a freeze. I don't know why.
1192		 */
1193		if (reg_icr == 0xffffffff)
1194			break;
1195
1196		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1197			em_rxeof(adapter, -1);
1198			em_txeof(adapter);
1199		}
1200
1201		/* Link status change */
1202		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1203			callout_stop(&adapter->timer);
1204			adapter->hw.get_link_status = 1;
1205			em_check_for_link(&adapter->hw);
1206			em_update_link_status(adapter);
1207			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1208		}
1209
1210		if (reg_icr & E1000_ICR_RXO)
1211			adapter->rx_overruns++;
1212	}
1213
1214	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1215	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1216		em_start_locked(ifp);
1217
1218	EM_UNLOCK(adapter);
1219}
1220
1221#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1222
1223static void
1224em_handle_link(void *context, int pending)
1225{
1226	struct adapter	*adapter = context;
1227	struct ifnet *ifp;
1228
1229	ifp = adapter->ifp;
1230
1231	EM_LOCK(adapter);
1232
1233	callout_stop(&adapter->timer);
1234	adapter->hw.get_link_status = 1;
1235	em_check_for_link(&adapter->hw);
1236	em_update_link_status(adapter);
1237	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1238	EM_UNLOCK(adapter);
1239}
1240
1241static void
1242em_handle_rxtx(void *context, int pending)
1243{
1244	struct adapter	*adapter = context;
1245	struct ifnet	*ifp;
1246
1247	NET_LOCK_GIANT();
1248	ifp = adapter->ifp;
1249
1250	/*
1251	 * TODO:
1252	 * It should be possible to run the tx clean loop without the lock.
1253	 */
1254	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1255		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1256			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1257		EM_LOCK(adapter);
1258		em_txeof(adapter);
1259
1260		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1261			em_start_locked(ifp);
1262		EM_UNLOCK(adapter);
1263	}
1264
1265	em_enable_intr(adapter);
1266	NET_UNLOCK_GIANT();
1267}
1268
1269/*********************************************************************
1270 *
1271 *  Fast Interrupt Service routine
1272 *
1273 *********************************************************************/
1274static void
1275em_intr_fast(void *arg)
1276{
1277	struct adapter	*adapter = arg;
1278	struct ifnet	*ifp;
1279	uint32_t	reg_icr;
1280
1281	ifp = adapter->ifp;
1282
1283	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1284
1285	/* Hot eject?  */
1286	if (reg_icr == 0xffffffff)
1287		return;
1288
1289	/* Definitely not our interrupt.  */
1290	if (reg_icr == 0x0)
1291		return;
1292
1293	/*
1294	 * Starting with the 82571 chip, bit 31 should be used to
1295	 * determine whether the interrupt belongs to us.
1296	 */
1297	if (adapter->hw.mac_type >= em_82571 &&
1298	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1299		return;
1300
1301	/*
1302	 * Mask interrupts until the taskqueue is finished running.  This is
1303	 * cheap, just assume that it is needed.  This also works around the
1304	 * MSI message reordering errata on certain systems.
1305	 */
1306	em_disable_intr(adapter);
1307	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1308
1309	/* Link status change */
1310	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1311		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1312
1313	if (reg_icr & E1000_ICR_RXO)
1314		adapter->rx_overruns++;
1315}
1316#endif /* ! DEVICE_POLLING */
1317
1318/*********************************************************************
1319 *
1320 *  Media Ioctl callback
1321 *
1322 *  This routine is called whenever the user queries the status of
1323 *  the interface using ifconfig.
1324 *
1325 **********************************************************************/
1326static void
1327em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1328{
1329	struct adapter *adapter = ifp->if_softc;
1330
1331	INIT_DEBUGOUT("em_media_status: begin");
1332
1333	EM_LOCK(adapter);
1334	em_check_for_link(&adapter->hw);
1335	em_update_link_status(adapter);
1336
1337	ifmr->ifm_status = IFM_AVALID;
1338	ifmr->ifm_active = IFM_ETHER;
1339
1340	if (!adapter->link_active) {
1341		EM_UNLOCK(adapter);
1342		return;
1343	}
1344
1345	ifmr->ifm_status |= IFM_ACTIVE;
1346
1347	if ((adapter->hw.media_type == em_media_type_fiber) ||
1348	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1349		if (adapter->hw.mac_type == em_82545)
1350			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1351		else
1352			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1353	} else {
1354		switch (adapter->link_speed) {
1355		case 10:
1356			ifmr->ifm_active |= IFM_10_T;
1357			break;
1358		case 100:
1359			ifmr->ifm_active |= IFM_100_TX;
1360			break;
1361		case 1000:
1362			ifmr->ifm_active |= IFM_1000_T;
1363			break;
1364		}
1365		if (adapter->link_duplex == FULL_DUPLEX)
1366			ifmr->ifm_active |= IFM_FDX;
1367		else
1368			ifmr->ifm_active |= IFM_HDX;
1369	}
1370	EM_UNLOCK(adapter);
1371}
1372
1373/*********************************************************************
1374 *
1375 *  Media Ioctl callback
1376 *
1377 *  This routine is called when the user changes speed/duplex using
1378 *  media/mediopt option with ifconfig.
1379 *
1380 **********************************************************************/
1381static int
1382em_media_change(struct ifnet *ifp)
1383{
1384	struct adapter *adapter = ifp->if_softc;
1385	struct ifmedia  *ifm = &adapter->media;
1386
1387	INIT_DEBUGOUT("em_media_change: begin");
1388
1389	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1390		return (EINVAL);
1391
1392	EM_LOCK(adapter);
1393	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1394	case IFM_AUTO:
1395		adapter->hw.autoneg = DO_AUTO_NEG;
1396		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1397		break;
1398	case IFM_1000_LX:
1399	case IFM_1000_SX:
1400	case IFM_1000_T:
1401		adapter->hw.autoneg = DO_AUTO_NEG;
1402		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1403		break;
1404	case IFM_100_TX:
1405		adapter->hw.autoneg = FALSE;
1406		adapter->hw.autoneg_advertised = 0;
1407		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1408			adapter->hw.forced_speed_duplex = em_100_full;
1409		else
1410			adapter->hw.forced_speed_duplex = em_100_half;
1411		break;
1412	case IFM_10_T:
1413		adapter->hw.autoneg = FALSE;
1414		adapter->hw.autoneg_advertised = 0;
1415		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1416			adapter->hw.forced_speed_duplex = em_10_full;
1417		else
1418			adapter->hw.forced_speed_duplex = em_10_half;
1419		break;
1420	default:
1421		device_printf(adapter->dev, "Unsupported media type\n");
1422	}
1423
1424	/* As the speed/duplex settings my have changed we need to
1425	 * reset the PHY.
1426	 */
1427	adapter->hw.phy_reset_disable = FALSE;
1428
1429	em_init_locked(adapter);
1430	EM_UNLOCK(adapter);
1431
1432	return (0);
1433}
1434
1435/*********************************************************************
1436 *
1437 *  This routine maps the mbufs to tx descriptors.
1438 *
1439 *  return 0 on success, positive on failure
1440 **********************************************************************/
1441static int
1442em_encap(struct adapter *adapter, struct mbuf **m_headp)
1443{
1444	struct ifnet		*ifp = adapter->ifp;
1445	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1446	bus_dmamap_t		map;
1447	struct em_buffer	*tx_buffer, *tx_buffer_last;
1448	struct em_tx_desc	*current_tx_desc;
1449	struct mbuf		*m_head;
1450	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1451	int			nsegs, i, j;
1452	int			error, do_tso, tso_desc = 0;
1453
1454	m_head = *m_headp;
1455	current_tx_desc = NULL;
1456	txd_upper = txd_lower = txd_used = txd_saved = 0;
1457
1458	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1459
1460	/*
1461	 * Force a cleanup if number of TX descriptors
1462	 * available hits the threshold.
1463	 */
1464	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1465		em_txeof(adapter);
1466		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1467			adapter->no_tx_desc_avail1++;
1468			return (ENOBUFS);
1469		}
1470	}
1471
1472	/*
1473	 * When operating in promiscuous mode, hardware encapsulation for
1474	 * packets is disabled.  This means we have to add the vlan
1475	 * encapsulation in the driver, since it will have come down from the
1476	 * VLAN layer with a tag instead of a VLAN header.
1477	 */
1478	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1479		struct ether_vlan_header *evl;
1480		struct ether_header eh;
1481
1482		m_head = m_pullup(m_head, sizeof(eh));
1483		if (m_head == NULL) {
1484			*m_headp = NULL;
1485			return (ENOBUFS);
1486		}
1487		eh = *mtod(m_head, struct ether_header *);
1488		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1489		if (m_head == NULL) {
1490			*m_headp = NULL;
1491			return (ENOBUFS);
1492		}
1493		m_head = m_pullup(m_head, sizeof(*evl));
1494		if (m_head == NULL) {
1495			*m_headp = NULL;
1496			return (ENOBUFS);
1497		}
1498		evl = mtod(m_head, struct ether_vlan_header *);
1499		bcopy(&eh, evl, sizeof(*evl));
1500		evl->evl_proto = evl->evl_encap_proto;
1501		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1502		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1503		*m_headp = m_head;
1504	}
1505
1506	/*
1507	 * TSO workaround:
1508	 *  If an mbuf is only header we need
1509	 *     to pull 4 bytes of data into it.
1510	 */
1511	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1512		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1513		*m_headp = m_head;
1514		if (m_head == NULL) {
1515			return (ENOBUFS);
1516		}
1517	}
1518
1519	/*
1520	 * Map the packet for DMA.
1521	 */
1522	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1523	tx_buffer_last = tx_buffer;
1524	map = tx_buffer->map;
1525	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1526	    &nsegs, BUS_DMA_NOWAIT);
1527	if (error == EFBIG) {
1528		struct mbuf *m;
1529
1530		m = m_defrag(*m_headp, M_DONTWAIT);
1531		if (m == NULL) {
1532			/* Assume m_defrag(9) used only m_get(9). */
1533			adapter->mbuf_alloc_failed++;
1534			m_freem(*m_headp);
1535			*m_headp = NULL;
1536			return (ENOBUFS);
1537		}
1538		*m_headp = m;
1539		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1540		    segs, &nsegs, BUS_DMA_NOWAIT);
1541		if (error != 0) {
1542			adapter->no_tx_dma_setup++;
1543			m_freem(*m_headp);
1544			*m_headp = NULL;
1545			return (error);
1546		}
1547	} else if (error != 0) {
1548		adapter->no_tx_dma_setup++;
1549		return (error);
1550	}
1551	if (nsegs == 0) {
1552		m_freem(*m_headp);
1553		*m_headp = NULL;
1554		return (EIO);
1555	}
1556
1557	/*
1558	 * TSO Hardware workaround, if this packet is not
1559	 * TSO, and is only a single descriptor long, and
1560	 * it follows a TSO burst, then we need to add a
1561	 * sentinel descriptor to prevent premature writeback.
1562	 */
1563	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1564		if (nsegs == 1)
1565			tso_desc = TRUE;
1566		adapter->tx_tso = FALSE;
1567	}
1568
1569	if (nsegs > adapter->num_tx_desc_avail - 2) {
1570		adapter->no_tx_desc_avail2++;
1571		bus_dmamap_unload(adapter->txtag, map);
1572		return (ENOBUFS);
1573	}
1574
1575	/* Do hardware assists */
1576	m_head = *m_headp;
1577	if (ifp->if_hwassist > 0) {
1578		if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1579			/* we need to make a final sentinel transmit desc */
1580			tso_desc = TRUE;
1581		} else
1582			em_transmit_checksum_setup(adapter,  m_head,
1583			    &txd_upper, &txd_lower);
1584	}
1585
1586	i = adapter->next_avail_tx_desc;
1587	if (adapter->pcix_82544)
1588		txd_saved = i;
1589
1590	for (j = 0; j < nsegs; j++) {
1591		bus_size_t seg_len;
1592		bus_addr_t seg_addr;
1593		/* If adapter is 82544 and on PCIX bus. */
1594		if(adapter->pcix_82544) {
1595			DESC_ARRAY	desc_array;
1596			uint32_t	array_elements, counter;
1597
1598			/*
1599			 * Check the Address and Length combination and
1600			 * split the data accordingly
1601			 */
1602			array_elements = em_fill_descriptors(segs[j].ds_addr,
1603			    segs[j].ds_len, &desc_array);
1604			for (counter = 0; counter < array_elements; counter++) {
1605				if (txd_used == adapter->num_tx_desc_avail) {
1606					adapter->next_avail_tx_desc = txd_saved;
1607					adapter->no_tx_desc_avail2++;
1608					bus_dmamap_unload(adapter->txtag, map);
1609					return (ENOBUFS);
1610				}
1611				tx_buffer = &adapter->tx_buffer_area[i];
1612				current_tx_desc = &adapter->tx_desc_base[i];
1613				current_tx_desc->buffer_addr = htole64(
1614					desc_array.descriptor[counter].address);
1615				current_tx_desc->lower.data = htole32(
1616					(adapter->txd_cmd | txd_lower |
1617					(uint16_t)desc_array.descriptor[counter].length));
1618				current_tx_desc->upper.data = htole32((txd_upper));
1619				if (++i == adapter->num_tx_desc)
1620					i = 0;
1621
1622				tx_buffer->m_head = NULL;
1623				txd_used++;
1624			}
1625		} else {
1626			tx_buffer = &adapter->tx_buffer_area[i];
1627			current_tx_desc = &adapter->tx_desc_base[i];
1628			seg_addr = htole64(segs[j].ds_addr);
1629			seg_len  = segs[j].ds_len;
1630			/*
1631			** TSO Workaround:
1632			** If this is the last descriptor, we want to
1633			** split it so we have a small final sentinel
1634			*/
1635			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1636				seg_len -= 4;
1637				current_tx_desc->buffer_addr = seg_addr;
1638				current_tx_desc->lower.data = htole32(
1639				adapter->txd_cmd | txd_lower | seg_len);
1640				current_tx_desc->upper.data =
1641				    htole32(txd_upper);
1642				if (++i == adapter->num_tx_desc)
1643					i = 0;
1644				/* Now make the sentinel */
1645				++txd_used; /* using an extra txd */
1646				current_tx_desc = &adapter->tx_desc_base[i];
1647				tx_buffer = &adapter->tx_buffer_area[i];
1648				current_tx_desc->buffer_addr =
1649				    seg_addr + seg_len;
1650				current_tx_desc->lower.data = htole32(
1651				adapter->txd_cmd | txd_lower | 4);
1652				current_tx_desc->upper.data =
1653				    htole32(txd_upper);
1654				if (++i == adapter->num_tx_desc)
1655					i = 0;
1656			} else {
1657				current_tx_desc->buffer_addr = seg_addr;
1658				current_tx_desc->lower.data = htole32(
1659				adapter->txd_cmd | txd_lower | seg_len);
1660				current_tx_desc->upper.data =
1661				    htole32(txd_upper);
1662				if (++i == adapter->num_tx_desc)
1663					i = 0;
1664			}
1665			tx_buffer->m_head = NULL;
1666		}
1667	}
1668
1669	adapter->next_avail_tx_desc = i;
1670	if (adapter->pcix_82544)
1671		adapter->num_tx_desc_avail -= txd_used;
1672	else {
1673		adapter->num_tx_desc_avail -= nsegs;
1674		if (tso_desc) /* TSO used an extra for sentinel */
1675			adapter->num_tx_desc_avail -= txd_used;
1676	}
1677
1678	if (m_head->m_flags & M_VLANTAG) {
1679		/* Set the vlan id. */
1680		current_tx_desc->upper.fields.special =
1681		    htole16(m_head->m_pkthdr.ether_vtag);
1682
1683		/* Tell hardware to add tag. */
1684		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1685	}
1686
1687	tx_buffer->m_head = m_head;
1688	tx_buffer_last->map = tx_buffer->map;
1689	tx_buffer->map = map;
1690	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1691
1692	/*
1693	 * Last Descriptor of Packet needs End Of Packet (EOP).
1694	 */
1695	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1696
1697	/*
1698	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1699	 * that this frame is available to transmit.
1700	 */
1701	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1702	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1703	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1704		em_82547_move_tail_locked(adapter);
1705	else {
1706		E1000_WRITE_REG(&adapter->hw, TDT, i);
1707		if (adapter->hw.mac_type == em_82547)
1708			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1709	}
1710
1711	return (0);
1712}
1713
1714/*********************************************************************
1715 *
1716 * 82547 workaround to avoid controller hang in half-duplex environment.
1717 * The workaround is to avoid queuing a large packet that would span
1718 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1719 * in this case. We do that only when FIFO is quiescent.
1720 *
1721 **********************************************************************/
1722static void
1723em_82547_move_tail_locked(struct adapter *adapter)
1724{
1725	uint16_t hw_tdt;
1726	uint16_t sw_tdt;
1727	struct em_tx_desc *tx_desc;
1728	uint16_t length = 0;
1729	boolean_t eop = 0;
1730
1731	EM_LOCK_ASSERT(adapter);
1732
1733	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1734	sw_tdt = adapter->next_avail_tx_desc;
1735
1736	while (hw_tdt != sw_tdt) {
1737		tx_desc = &adapter->tx_desc_base[hw_tdt];
1738		length += tx_desc->lower.flags.length;
1739		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1740		if(++hw_tdt == adapter->num_tx_desc)
1741			hw_tdt = 0;
1742
1743		if (eop) {
1744			if (em_82547_fifo_workaround(adapter, length)) {
1745				adapter->tx_fifo_wrk_cnt++;
1746				callout_reset(&adapter->tx_fifo_timer, 1,
1747					em_82547_move_tail, adapter);
1748				break;
1749			}
1750			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1751			em_82547_update_fifo_head(adapter, length);
1752			length = 0;
1753		}
1754	}
1755}
1756
1757static void
1758em_82547_move_tail(void *arg)
1759{
1760	struct adapter *adapter = arg;
1761
1762	EM_LOCK(adapter);
1763	em_82547_move_tail_locked(adapter);
1764	EM_UNLOCK(adapter);
1765}
1766
1767static int
1768em_82547_fifo_workaround(struct adapter *adapter, int len)
1769{
1770	int fifo_space, fifo_pkt_len;
1771
1772	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1773
1774	if (adapter->link_duplex == HALF_DUPLEX) {
1775		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1776
1777		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1778			if (em_82547_tx_fifo_reset(adapter))
1779				return (0);
1780			else
1781				return (1);
1782		}
1783	}
1784
1785	return (0);
1786}
1787
1788static void
1789em_82547_update_fifo_head(struct adapter *adapter, int len)
1790{
1791	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1792
1793	/* tx_fifo_head is always 16 byte aligned */
1794	adapter->tx_fifo_head += fifo_pkt_len;
1795	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1796		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1797	}
1798}
1799
1800
1801static int
1802em_82547_tx_fifo_reset(struct adapter *adapter)
1803{
1804	uint32_t tctl;
1805
1806	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1807	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1808	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1809	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1810
1811		/* Disable TX unit */
1812		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1813		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1814
1815		/* Reset FIFO pointers */
1816		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1817		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1818		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1819		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1820
1821		/* Re-enable TX unit */
1822		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1823		E1000_WRITE_FLUSH(&adapter->hw);
1824
1825		adapter->tx_fifo_head = 0;
1826		adapter->tx_fifo_reset_cnt++;
1827
1828		return (TRUE);
1829	}
1830	else {
1831		return (FALSE);
1832	}
1833}
1834
1835static void
1836em_set_promisc(struct adapter *adapter)
1837{
1838	struct ifnet	*ifp = adapter->ifp;
1839	uint32_t	reg_rctl;
1840
1841	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1842
1843	if (ifp->if_flags & IFF_PROMISC) {
1844		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1845		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1846		/* Disable VLAN stripping in promiscous mode
1847		 * This enables bridging of vlan tagged frames to occur
1848		 * and also allows vlan tags to be seen in tcpdump
1849		 */
1850		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1851			em_disable_vlans(adapter);
1852		adapter->em_insert_vlan_header = 1;
1853	} else if (ifp->if_flags & IFF_ALLMULTI) {
1854		reg_rctl |= E1000_RCTL_MPE;
1855		reg_rctl &= ~E1000_RCTL_UPE;
1856		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1857		adapter->em_insert_vlan_header = 0;
1858	} else
1859		adapter->em_insert_vlan_header = 0;
1860}
1861
1862static void
1863em_disable_promisc(struct adapter *adapter)
1864{
1865	struct ifnet	*ifp = adapter->ifp;
1866	uint32_t	reg_rctl;
1867
1868	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1869
1870	reg_rctl &=  (~E1000_RCTL_UPE);
1871	reg_rctl &=  (~E1000_RCTL_MPE);
1872	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1873
1874	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1875		em_enable_vlans(adapter);
1876	adapter->em_insert_vlan_header = 0;
1877}
1878
1879
1880/*********************************************************************
1881 *  Multicast Update
1882 *
1883 *  This routine is called whenever multicast address list is updated.
1884 *
1885 **********************************************************************/
1886
1887static void
1888em_set_multi(struct adapter *adapter)
1889{
1890	struct ifnet	*ifp = adapter->ifp;
1891	struct ifmultiaddr *ifma;
1892	uint32_t reg_rctl = 0;
1893	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1894	int mcnt = 0;
1895
1896	IOCTL_DEBUGOUT("em_set_multi: begin");
1897
1898	if (adapter->hw.mac_type == em_82542_rev2_0) {
1899		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1900		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1901			em_pci_clear_mwi(&adapter->hw);
1902		reg_rctl |= E1000_RCTL_RST;
1903		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1904		msec_delay(5);
1905	}
1906
1907	IF_ADDR_LOCK(ifp);
1908	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1909		if (ifma->ifma_addr->sa_family != AF_LINK)
1910			continue;
1911
1912		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1913			break;
1914
1915		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1916		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1917		mcnt++;
1918	}
1919	IF_ADDR_UNLOCK(ifp);
1920
1921	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1922		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1923		reg_rctl |= E1000_RCTL_MPE;
1924		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1925	} else
1926		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1927
1928	if (adapter->hw.mac_type == em_82542_rev2_0) {
1929		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1930		reg_rctl &= ~E1000_RCTL_RST;
1931		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1932		msec_delay(5);
1933		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1934			em_pci_set_mwi(&adapter->hw);
1935	}
1936}
1937
1938
1939/*********************************************************************
1940 *  Timer routine
1941 *
1942 *  This routine checks for link status and updates statistics.
1943 *
1944 **********************************************************************/
1945
1946static void
1947em_local_timer(void *arg)
1948{
1949	struct adapter	*adapter = arg;
1950	struct ifnet	*ifp = adapter->ifp;
1951
1952	EM_LOCK(adapter);
1953
1954	em_check_for_link(&adapter->hw);
1955	em_update_link_status(adapter);
1956	em_update_stats_counters(adapter);
1957	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1958		em_print_hw_stats(adapter);
1959	em_smartspeed(adapter);
1960
1961	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1962
1963	EM_UNLOCK(adapter);
1964}
1965
1966static void
1967em_update_link_status(struct adapter *adapter)
1968{
1969	struct ifnet *ifp = adapter->ifp;
1970	device_t dev = adapter->dev;
1971
1972	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1973		if (adapter->link_active == 0) {
1974			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1975			    &adapter->link_duplex);
1976			/* Check if we may set SPEED_MODE bit on PCI-E */
1977			if ((adapter->link_speed == SPEED_1000) &&
1978			    ((adapter->hw.mac_type == em_82571) ||
1979			    (adapter->hw.mac_type == em_82572))) {
1980				int tarc0;
1981
1982				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1983				tarc0 |= SPEED_MODE_BIT;
1984				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1985			}
1986			if (bootverbose)
1987				device_printf(dev, "Link is up %d Mbps %s\n",
1988				    adapter->link_speed,
1989				    ((adapter->link_duplex == FULL_DUPLEX) ?
1990				    "Full Duplex" : "Half Duplex"));
1991			adapter->link_active = 1;
1992			adapter->smartspeed = 0;
1993			ifp->if_baudrate = adapter->link_speed * 1000000;
1994			if_link_state_change(ifp, LINK_STATE_UP);
1995		}
1996	} else {
1997		if (adapter->link_active == 1) {
1998			ifp->if_baudrate = adapter->link_speed = 0;
1999			adapter->link_duplex = 0;
2000			if (bootverbose)
2001				device_printf(dev, "Link is Down\n");
2002			adapter->link_active = 0;
2003			if_link_state_change(ifp, LINK_STATE_DOWN);
2004		}
2005	}
2006}
2007
2008/*********************************************************************
2009 *
2010 *  This routine disables all traffic on the adapter by issuing a
2011 *  global reset on the MAC and deallocates TX/RX buffers.
2012 *
2013 **********************************************************************/
2014
2015static void
2016em_stop(void *arg)
2017{
2018	struct adapter	*adapter = arg;
2019	struct ifnet	*ifp = adapter->ifp;
2020
2021	EM_LOCK_ASSERT(adapter);
2022
2023	INIT_DEBUGOUT("em_stop: begin");
2024
2025	em_disable_intr(adapter);
2026	em_reset_hw(&adapter->hw);
2027	callout_stop(&adapter->timer);
2028	callout_stop(&adapter->tx_fifo_timer);
2029	em_free_transmit_structures(adapter);
2030	em_free_receive_structures(adapter);
2031
2032	/* Tell the stack that the interface is no longer active */
2033	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2034}
2035
2036
2037/********************************************************************
2038 *
2039 *  Determine hardware revision.
2040 *
2041 **********************************************************************/
2042static void
2043em_identify_hardware(struct adapter *adapter)
2044{
2045	device_t dev = adapter->dev;
2046
2047	/* Make sure our PCI config space has the necessary stuff set */
2048	pci_enable_busmaster(dev);
2049	pci_enable_io(dev, SYS_RES_MEMORY);
2050	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2051
2052	/* Save off the information about this board */
2053	adapter->hw.vendor_id = pci_get_vendor(dev);
2054	adapter->hw.device_id = pci_get_device(dev);
2055	adapter->hw.revision_id = pci_get_revid(dev);
2056	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2057	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2058
2059	/* Identify the MAC */
2060	if (em_set_mac_type(&adapter->hw))
2061		device_printf(dev, "Unknown MAC Type\n");
2062
2063	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2064	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2065		adapter->hw.phy_init_script = TRUE;
2066}
2067
2068static int
2069em_allocate_pci_resources(struct adapter *adapter)
2070{
2071	device_t	dev = adapter->dev;
2072	int		val, rid;
2073
2074	rid = PCIR_BAR(0);
2075	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2076	    &rid, RF_ACTIVE);
2077	if (adapter->res_memory == NULL) {
2078		device_printf(dev, "Unable to allocate bus resource: memory\n");
2079		return (ENXIO);
2080	}
2081	adapter->osdep.mem_bus_space_tag =
2082	rman_get_bustag(adapter->res_memory);
2083	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2084	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2085
2086	if (adapter->hw.mac_type > em_82543) {
2087		/* Figure our where our IO BAR is ? */
2088		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2089			val = pci_read_config(dev, rid, 4);
2090			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2091				adapter->io_rid = rid;
2092				break;
2093			}
2094			rid += 4;
2095			/* check for 64bit BAR */
2096			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2097				rid += 4;
2098		}
2099		if (rid >= PCIR_CIS) {
2100			device_printf(dev, "Unable to locate IO BAR\n");
2101			return (ENXIO);
2102		}
2103		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2104		    &adapter->io_rid, RF_ACTIVE);
2105		if (adapter->res_ioport == NULL) {
2106			device_printf(dev, "Unable to allocate bus resource: "
2107			    "ioport\n");
2108			return (ENXIO);
2109		}
2110		adapter->hw.io_base = 0;
2111		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2112		adapter->osdep.io_bus_space_handle =
2113		    rman_get_bushandle(adapter->res_ioport);
2114	}
2115
2116	/* For ICH8 we need to find the flash memory. */
2117	if (adapter->hw.mac_type == em_ich8lan) {
2118		rid = EM_FLASH;
2119
2120		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2121		    &rid, RF_ACTIVE);
2122		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2123		adapter->osdep.flash_bus_space_handle =
2124		    rman_get_bushandle(adapter->flash_mem);
2125	}
2126
2127	rid = 0x0;
2128	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2129	    RF_SHAREABLE | RF_ACTIVE);
2130	if (adapter->res_interrupt == NULL) {
2131		device_printf(dev, "Unable to allocate bus resource: "
2132		    "interrupt\n");
2133		return (ENXIO);
2134	}
2135
2136	adapter->hw.back = &adapter->osdep;
2137
2138	return (0);
2139}
2140
2141int
2142em_allocate_intr(struct adapter *adapter)
2143{
2144	device_t dev = adapter->dev;
2145	int error;
2146
2147	/* Manually turn off all interrupts */
2148	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2149
2150#ifdef DEVICE_POLLING
2151	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2152	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2153	    &adapter->int_handler_tag)) != 0) {
2154		device_printf(dev, "Failed to register interrupt handler");
2155		return (error);
2156	}
2157#else
2158	/*
2159	 * Try allocating a fast interrupt and the associated deferred
2160	 * processing contexts.
2161	 */
2162	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2163	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2164	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2165	    taskqueue_thread_enqueue, &adapter->tq);
2166	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2167	    device_get_nameunit(adapter->dev));
2168	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2169	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2170	    &adapter->int_handler_tag)) != 0) {
2171		device_printf(dev, "Failed to register fast interrupt "
2172			    "handler: %d\n", error);
2173		taskqueue_free(adapter->tq);
2174		adapter->tq = NULL;
2175		return (error);
2176	}
2177#endif
2178
2179	em_enable_intr(adapter);
2180	return (0);
2181}
2182
2183static void
2184em_free_intr(struct adapter *adapter)
2185{
2186	device_t dev = adapter->dev;
2187
2188	if (adapter->int_handler_tag != NULL) {
2189		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2190		adapter->int_handler_tag = NULL;
2191	}
2192	if (adapter->tq != NULL) {
2193		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2194		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2195		taskqueue_free(adapter->tq);
2196		adapter->tq = NULL;
2197	}
2198}
2199
2200static void
2201em_free_pci_resources(struct adapter *adapter)
2202{
2203	device_t dev = adapter->dev;
2204
2205	if (adapter->res_interrupt != NULL)
2206		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2207
2208	if (adapter->res_memory != NULL)
2209		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2210		    adapter->res_memory);
2211
2212	if (adapter->flash_mem != NULL)
2213		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2214		    adapter->flash_mem);
2215
2216	if (adapter->res_ioport != NULL)
2217		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2218		    adapter->res_ioport);
2219}
2220
2221/*********************************************************************
2222 *
2223 *  Initialize the hardware to a configuration as specified by the
2224 *  adapter structure. The controller is reset, the EEPROM is
2225 *  verified, the MAC address is set, then the shared initialization
2226 *  routines are called.
2227 *
2228 **********************************************************************/
2229static int
2230em_hardware_init(struct adapter *adapter)
2231{
2232	device_t dev = adapter->dev;
2233	uint16_t rx_buffer_size;
2234
2235	INIT_DEBUGOUT("em_hardware_init: begin");
2236	/* Issue a global reset */
2237	em_reset_hw(&adapter->hw);
2238
2239	/* When hardware is reset, fifo_head is also reset */
2240	adapter->tx_fifo_head = 0;
2241
2242	/* Make sure we have a good EEPROM before we read from it */
2243	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2244		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2245		return (EIO);
2246	}
2247
2248	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2249		device_printf(dev, "EEPROM read error while reading part "
2250		    "number\n");
2251		return (EIO);
2252	}
2253
2254	/* Set up smart power down as default off on newer adapters. */
2255	if (!em_smart_pwr_down &&
2256	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2257		uint16_t phy_tmp = 0;
2258
2259		/* Speed up time to link by disabling smart power down. */
2260		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2261		phy_tmp &= ~IGP02E1000_PM_SPD;
2262		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2263	}
2264
2265	/*
2266	 * These parameters control the automatic generation (Tx) and
2267	 * response (Rx) to Ethernet PAUSE frames.
2268	 * - High water mark should allow for at least two frames to be
2269	 *   received after sending an XOFF.
2270	 * - Low water mark works best when it is very near the high water mark.
2271	 *   This allows the receiver to restart by sending XON when it has
2272	 *   drained a bit. Here we use an arbitary value of 1500 which will
2273	 *   restart after one full frame is pulled from the buffer. There
2274	 *   could be several smaller frames in the buffer and if so they will
2275	 *   not trigger the XON until their total number reduces the buffer
2276	 *   by 1500.
2277	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2278	 */
2279	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2280
2281	adapter->hw.fc_high_water = rx_buffer_size -
2282	    roundup2(adapter->hw.max_frame_size, 1024);
2283	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2284	if (adapter->hw.mac_type == em_80003es2lan)
2285		adapter->hw.fc_pause_time = 0xFFFF;
2286	else
2287		adapter->hw.fc_pause_time = 0x1000;
2288	adapter->hw.fc_send_xon = TRUE;
2289	adapter->hw.fc = em_fc_full;
2290
2291	if (em_init_hw(&adapter->hw) < 0) {
2292		device_printf(dev, "Hardware Initialization Failed");
2293		return (EIO);
2294	}
2295
2296	em_check_for_link(&adapter->hw);
2297
2298	return (0);
2299}
2300
2301/*********************************************************************
2302 *
2303 *  Setup networking device structure and register an interface.
2304 *
2305 **********************************************************************/
2306static void
2307em_setup_interface(device_t dev, struct adapter *adapter)
2308{
2309	struct ifnet   *ifp;
2310	INIT_DEBUGOUT("em_setup_interface: begin");
2311
2312	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2313	if (ifp == NULL)
2314		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2315	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2316	ifp->if_mtu = ETHERMTU;
2317	ifp->if_init =  em_init;
2318	ifp->if_softc = adapter;
2319	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2320	ifp->if_ioctl = em_ioctl;
2321	ifp->if_start = em_start;
2322	ifp->if_watchdog = em_watchdog;
2323	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2324	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2325	IFQ_SET_READY(&ifp->if_snd);
2326
2327	ether_ifattach(ifp, adapter->hw.mac_addr);
2328
2329	ifp->if_capabilities = ifp->if_capenable = 0;
2330
2331	if (adapter->hw.mac_type >= em_82543) {
2332		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2333		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2334	}
2335
2336	/* Enable TSO if available */
2337	if ((adapter->hw.mac_type > em_82544) &&
2338	    (adapter->hw.mac_type != em_82547)) {
2339		ifp->if_capabilities |= IFCAP_TSO;
2340		ifp->if_capenable |= IFCAP_TSO;
2341	}
2342
2343	/*
2344	 * Tell the upper layer(s) we support long frames.
2345	 */
2346	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2347	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2348	ifp->if_capenable |= IFCAP_VLAN_MTU;
2349
2350#ifdef DEVICE_POLLING
2351	ifp->if_capabilities |= IFCAP_POLLING;
2352#endif
2353
2354	/*
2355	 * Specify the media types supported by this adapter and register
2356	 * callbacks to update media and link information
2357	 */
2358	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2359	    em_media_status);
2360	if ((adapter->hw.media_type == em_media_type_fiber) ||
2361	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2362		u_char fiber_type = IFM_1000_SX;	/* default type; */
2363
2364		if (adapter->hw.mac_type == em_82545)
2365			fiber_type = IFM_1000_LX;
2366		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2367		    0, NULL);
2368		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2369	} else {
2370		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2371		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2372			    0, NULL);
2373		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2374			    0, NULL);
2375		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2376			    0, NULL);
2377		if (adapter->hw.phy_type != em_phy_ife) {
2378			ifmedia_add(&adapter->media,
2379				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2380			ifmedia_add(&adapter->media,
2381				IFM_ETHER | IFM_1000_T, 0, NULL);
2382		}
2383	}
2384	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2385	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2386}
2387
2388
2389/*********************************************************************
2390 *
2391 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2392 *
2393 **********************************************************************/
2394static void
2395em_smartspeed(struct adapter *adapter)
2396{
2397	uint16_t phy_tmp;
2398
2399	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2400	    adapter->hw.autoneg == 0 ||
2401	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2402		return;
2403
2404	if (adapter->smartspeed == 0) {
2405		/* If Master/Slave config fault is asserted twice,
2406		 * we assume back-to-back */
2407		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2408		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2409			return;
2410		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2411		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2412			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2413			if(phy_tmp & CR_1000T_MS_ENABLE) {
2414				phy_tmp &= ~CR_1000T_MS_ENABLE;
2415				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2416				    phy_tmp);
2417				adapter->smartspeed++;
2418				if(adapter->hw.autoneg &&
2419				   !em_phy_setup_autoneg(&adapter->hw) &&
2420				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2421				    &phy_tmp)) {
2422					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2423						    MII_CR_RESTART_AUTO_NEG);
2424					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2425					    phy_tmp);
2426				}
2427			}
2428		}
2429		return;
2430	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2431		/* If still no link, perhaps using 2/3 pair cable */
2432		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2433		phy_tmp |= CR_1000T_MS_ENABLE;
2434		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2435		if(adapter->hw.autoneg &&
2436		   !em_phy_setup_autoneg(&adapter->hw) &&
2437		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2438			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2439				    MII_CR_RESTART_AUTO_NEG);
2440			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2441		}
2442	}
2443	/* Restart process after EM_SMARTSPEED_MAX iterations */
2444	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2445		adapter->smartspeed = 0;
2446}
2447
2448
2449/*
2450 * Manage DMA'able memory.
2451 */
2452static void
2453em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2454{
2455	if (error)
2456		return;
2457	*(bus_addr_t *) arg = segs[0].ds_addr;
2458}
2459
2460static int
2461em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2462	int mapflags)
2463{
2464	int error;
2465
2466	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2467				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2468				BUS_SPACE_MAXADDR,	/* lowaddr */
2469				BUS_SPACE_MAXADDR,	/* highaddr */
2470				NULL, NULL,		/* filter, filterarg */
2471				size,			/* maxsize */
2472				1,			/* nsegments */
2473				size,			/* maxsegsize */
2474				0,			/* flags */
2475				NULL,			/* lockfunc */
2476				NULL,			/* lockarg */
2477				&dma->dma_tag);
2478	if (error) {
2479		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2480		    __func__, error);
2481		goto fail_0;
2482	}
2483
2484	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2485	    BUS_DMA_NOWAIT, &dma->dma_map);
2486	if (error) {
2487		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2488		    __func__, (uintmax_t)size, error);
2489		goto fail_2;
2490	}
2491
2492	dma->dma_paddr = 0;
2493	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2494	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2495	if (error || dma->dma_paddr == 0) {
2496		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2497		    __func__, error);
2498		goto fail_3;
2499	}
2500
2501	return (0);
2502
2503fail_3:
2504	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2505fail_2:
2506	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2507	bus_dma_tag_destroy(dma->dma_tag);
2508fail_0:
2509	dma->dma_map = NULL;
2510	dma->dma_tag = NULL;
2511
2512	return (error);
2513}
2514
2515static void
2516em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2517{
2518	if (dma->dma_tag == NULL)
2519		return;
2520	if (dma->dma_map != NULL) {
2521		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2522		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2523		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2524		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2525		dma->dma_map = NULL;
2526	}
2527	bus_dma_tag_destroy(dma->dma_tag);
2528	dma->dma_tag = NULL;
2529}
2530
2531
2532/*********************************************************************
2533 *
2534 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2535 *  the information needed to transmit a packet on the wire.
2536 *
2537 **********************************************************************/
2538static int
2539em_allocate_transmit_structures(struct adapter *adapter)
2540{
2541	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2542	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2543	if (adapter->tx_buffer_area == NULL) {
2544		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2545		return (ENOMEM);
2546	}
2547
2548	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2549
2550	return (0);
2551}
2552
2553/*********************************************************************
2554 *
2555 *  Allocate and initialize transmit structures.
2556 *
2557 **********************************************************************/
2558static int
2559em_setup_transmit_structures(struct adapter *adapter)
2560{
2561	struct ifnet   *ifp = adapter->ifp;
2562	device_t dev = adapter->dev;
2563	struct em_buffer *tx_buffer;
2564	bus_size_t size, segsize;
2565	int error, i;
2566
2567	/*
2568	 * Setup DMA descriptor areas.
2569	 */
2570	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2571
2572	/* Overrides for TSO - want large sizes */
2573	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2574		size = EM_TSO_SIZE;
2575		segsize = PAGE_SIZE;
2576	}
2577
2578	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2579				1, 0,			/* alignment, bounds */
2580				BUS_SPACE_MAXADDR,	/* lowaddr */
2581				BUS_SPACE_MAXADDR,	/* highaddr */
2582				NULL, NULL,		/* filter, filterarg */
2583				size,			/* maxsize */
2584				EM_MAX_SCATTER,		/* nsegments */
2585				segsize,		/* maxsegsize */
2586				0,			/* flags */
2587				NULL,		/* lockfunc */
2588				NULL,		/* lockarg */
2589				&adapter->txtag)) != 0) {
2590		device_printf(dev, "Unable to allocate TX DMA tag\n");
2591		goto fail;
2592	}
2593
2594	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2595		goto fail;
2596
2597	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2598	tx_buffer = adapter->tx_buffer_area;
2599	for (i = 0; i < adapter->num_tx_desc; i++) {
2600		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2601		if (error != 0) {
2602			device_printf(dev, "Unable to create TX DMA map\n");
2603			goto fail;
2604		}
2605		tx_buffer++;
2606	}
2607
2608	adapter->next_avail_tx_desc = 0;
2609	adapter->oldest_used_tx_desc = 0;
2610
2611	/* Set number of descriptors available */
2612	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2613
2614	/* Set checksum context */
2615	adapter->active_checksum_context = OFFLOAD_NONE;
2616	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2617	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2618
2619	return (0);
2620
2621fail:
2622	em_free_transmit_structures(adapter);
2623	return (error);
2624}
2625
2626/*********************************************************************
2627 *
2628 *  Enable transmit unit.
2629 *
2630 **********************************************************************/
2631static void
2632em_initialize_transmit_unit(struct adapter *adapter)
2633{
2634	uint32_t	reg_tctl, reg_tarc;
2635	uint32_t	reg_tipg = 0;
2636	uint64_t	bus_addr;
2637
2638	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2639	/* Setup the Base and Length of the Tx Descriptor Ring */
2640	bus_addr = adapter->txdma.dma_paddr;
2641	E1000_WRITE_REG(&adapter->hw, TDLEN,
2642	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2643	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2644	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2645
2646	/* Setup the HW Tx Head and Tail descriptor pointers */
2647	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2648	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2649
2650
2651	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2652	    E1000_READ_REG(&adapter->hw, TDLEN));
2653
2654	/* Set the default values for the Tx Inter Packet Gap timer */
2655	switch (adapter->hw.mac_type) {
2656	case em_82542_rev2_0:
2657	case em_82542_rev2_1:
2658		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2659		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2660		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2661		break;
2662	case em_80003es2lan:
2663		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2664		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2665		    E1000_TIPG_IPGR2_SHIFT;
2666		break;
2667	default:
2668		if ((adapter->hw.media_type == em_media_type_fiber) ||
2669		    (adapter->hw.media_type == em_media_type_internal_serdes))
2670			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2671		else
2672			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2673		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2674		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2675	}
2676
2677	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2678	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2679	if(adapter->hw.mac_type >= em_82540)
2680		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2681
2682	/* Do adapter specific tweaks before we enable the transmitter. */
2683	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2684		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2685		reg_tarc |= (1 << 25);
2686		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2687		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2688		reg_tarc |= (1 << 25);
2689		reg_tarc &= ~(1 << 28);
2690		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2691	} else if (adapter->hw.mac_type == em_80003es2lan) {
2692		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2693		reg_tarc |= 1;
2694		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2695		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2696		reg_tarc |= 1;
2697		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2698	}
2699
2700	/* Program the Transmit Control Register */
2701	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2702		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2703	if (adapter->hw.mac_type >= em_82571)
2704		reg_tctl |= E1000_TCTL_MULR;
2705	if (adapter->link_duplex == FULL_DUPLEX) {
2706		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2707	} else {
2708		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2709	}
2710	/* This write will effectively turn on the transmit unit. */
2711	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2712
2713	/* Setup Transmit Descriptor Settings for this adapter */
2714	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2715
2716	if (adapter->tx_int_delay.value > 0)
2717		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2718}
2719
2720/*********************************************************************
2721 *
2722 *  Free all transmit related data structures.
2723 *
2724 **********************************************************************/
2725static void
2726em_free_transmit_structures(struct adapter *adapter)
2727{
2728	struct em_buffer *tx_buffer;
2729	int i;
2730
2731	INIT_DEBUGOUT("free_transmit_structures: begin");
2732
2733	if (adapter->tx_buffer_area != NULL) {
2734		tx_buffer = adapter->tx_buffer_area;
2735		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2736			if (tx_buffer->m_head != NULL) {
2737				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2738				    BUS_DMASYNC_POSTWRITE);
2739				bus_dmamap_unload(adapter->txtag,
2740				    tx_buffer->map);
2741				m_freem(tx_buffer->m_head);
2742				tx_buffer->m_head = NULL;
2743			} else if (tx_buffer->map != NULL)
2744				bus_dmamap_unload(adapter->txtag,
2745				    tx_buffer->map);
2746			if (tx_buffer->map != NULL) {
2747				bus_dmamap_destroy(adapter->txtag,
2748				    tx_buffer->map);
2749				tx_buffer->map = NULL;
2750			}
2751		}
2752	}
2753	if (adapter->tx_buffer_area != NULL) {
2754		free(adapter->tx_buffer_area, M_DEVBUF);
2755		adapter->tx_buffer_area = NULL;
2756	}
2757	if (adapter->txtag != NULL) {
2758		bus_dma_tag_destroy(adapter->txtag);
2759		adapter->txtag = NULL;
2760	}
2761}
2762
2763/*********************************************************************
2764 *
2765 *  The offload context needs to be set when we transfer the first
2766 *  packet of a particular protocol (TCP/UDP). We change the
2767 *  context only if the protocol type changes.
2768 *
2769 **********************************************************************/
2770static void
2771em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2772    uint32_t *txd_upper, uint32_t *txd_lower)
2773{
2774	struct em_context_desc *TXD;
2775	struct em_buffer *tx_buffer;
2776	int curr_txd;
2777
2778	if (mp->m_pkthdr.csum_flags) {
2779
2780		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2781			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2782			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2783			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2784				return;
2785			else
2786				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2787
2788		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2789			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2790			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2791			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2792				return;
2793			else
2794				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2795		} else {
2796			*txd_upper = 0;
2797			*txd_lower = 0;
2798			return;
2799		}
2800	} else {
2801		*txd_upper = 0;
2802		*txd_lower = 0;
2803		return;
2804	}
2805
2806	/* If we reach this point, the checksum offload context
2807	 * needs to be reset.
2808	 */
2809	curr_txd = adapter->next_avail_tx_desc;
2810	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2811	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2812
2813	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2814	TXD->lower_setup.ip_fields.ipcso =
2815		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2816	TXD->lower_setup.ip_fields.ipcse =
2817		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2818
2819	TXD->upper_setup.tcp_fields.tucss =
2820		ETHER_HDR_LEN + sizeof(struct ip);
2821	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2822
2823	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2824		TXD->upper_setup.tcp_fields.tucso =
2825			ETHER_HDR_LEN + sizeof(struct ip) +
2826			offsetof(struct tcphdr, th_sum);
2827	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2828		TXD->upper_setup.tcp_fields.tucso =
2829			ETHER_HDR_LEN + sizeof(struct ip) +
2830			offsetof(struct udphdr, uh_sum);
2831	}
2832
2833	TXD->tcp_seg_setup.data = htole32(0);
2834	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2835
2836	tx_buffer->m_head = NULL;
2837
2838	if (++curr_txd == adapter->num_tx_desc)
2839		curr_txd = 0;
2840
2841	adapter->num_tx_desc_avail--;
2842	adapter->next_avail_tx_desc = curr_txd;
2843}
2844
2845/**********************************************************************
2846 *
2847 *  Setup work for hardware segmentation offload (TSO)
2848 *
2849 **********************************************************************/
2850static boolean_t
2851em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2852   uint32_t *txd_lower)
2853{
2854	struct em_context_desc *TXD;
2855	struct em_buffer *tx_buffer;
2856	struct ip *ip;
2857	struct tcphdr *th;
2858	int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2859
2860	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2861	    (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2862		return FALSE;
2863	}
2864
2865	*txd_lower = (E1000_TXD_CMD_DEXT |
2866		      E1000_TXD_DTYP_D |
2867		      E1000_TXD_CMD_TSE);
2868
2869	*txd_upper = (E1000_TXD_POPTS_IXSM |
2870		      E1000_TXD_POPTS_TXSM) << 8;
2871
2872	curr_txd = adapter->next_avail_tx_desc;
2873	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2874	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2875
2876	mp->m_data += sizeof(struct ether_header);
2877	ip = mtod(mp, struct ip *);
2878	ip->ip_len = 0;
2879	ip->ip_sum = 0;
2880	ip_hlen = ip->ip_hl << 2 ;
2881	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2882	tcp_hlen = th->th_off << 2;
2883
2884	hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2885	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2886	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2887
2888	mp->m_data -= sizeof(struct ether_header);
2889	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2890	TXD->lower_setup.ip_fields.ipcso =
2891		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2892	TXD->lower_setup.ip_fields.ipcse =
2893		htole16(ETHER_HDR_LEN + ip_hlen - 1);
2894
2895	TXD->upper_setup.tcp_fields.tucss =
2896		ETHER_HDR_LEN + ip_hlen;
2897	TXD->upper_setup.tcp_fields.tucse = 0;
2898	TXD->upper_setup.tcp_fields.tucso =
2899		ETHER_HDR_LEN + ip_hlen +
2900		offsetof(struct tcphdr, th_sum);
2901	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2902	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2903	TXD->cmd_and_length = htole32(adapter->txd_cmd |
2904				E1000_TXD_CMD_DEXT |
2905				E1000_TXD_CMD_TSE |
2906				E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2907				(mp->m_pkthdr.len - (hdr_len)));
2908
2909	tx_buffer->m_head = NULL;
2910
2911	if (++curr_txd == adapter->num_tx_desc)
2912		curr_txd = 0;
2913
2914	adapter->num_tx_desc_avail--;
2915	adapter->next_avail_tx_desc = curr_txd;
2916	adapter->tx_tso = TRUE;
2917
2918	return TRUE;
2919}
2920
2921/**********************************************************************
2922 *
2923 *  Examine each tx_buffer in the used queue. If the hardware is done
2924 *  processing the packet then free associated resources. The
2925 *  tx_buffer is put back on the free queue.
2926 *
2927 **********************************************************************/
2928static void
2929em_txeof(struct adapter *adapter)
2930{
2931	int i, num_avail;
2932	struct em_buffer *tx_buffer;
2933	struct em_tx_desc   *tx_desc;
2934	struct ifnet   *ifp = adapter->ifp;
2935
2936	EM_LOCK_ASSERT(adapter);
2937
2938	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2939		return;
2940
2941	num_avail = adapter->num_tx_desc_avail;
2942	i = adapter->oldest_used_tx_desc;
2943
2944	tx_buffer = &adapter->tx_buffer_area[i];
2945	tx_desc = &adapter->tx_desc_base[i];
2946
2947	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2948	    BUS_DMASYNC_POSTREAD);
2949	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2950
2951		tx_desc->upper.data = 0;
2952		num_avail++;
2953
2954		if (tx_buffer->m_head) {
2955			ifp->if_opackets++;
2956			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2957			    BUS_DMASYNC_POSTWRITE);
2958			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2959
2960			m_freem(tx_buffer->m_head);
2961			tx_buffer->m_head = NULL;
2962		}
2963
2964		if (++i == adapter->num_tx_desc)
2965			i = 0;
2966
2967		tx_buffer = &adapter->tx_buffer_area[i];
2968		tx_desc = &adapter->tx_desc_base[i];
2969	}
2970	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2971	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2972
2973	adapter->oldest_used_tx_desc = i;
2974
2975	/*
2976	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2977	 * that it is OK to send packets.
2978	 * If there are no pending descriptors, clear the timeout. Otherwise,
2979	 * if some descriptors have been freed, restart the timeout.
2980	 */
2981	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2982		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2983		if (num_avail == adapter->num_tx_desc)
2984			ifp->if_timer = 0;
2985		else if (num_avail != adapter->num_tx_desc_avail)
2986			ifp->if_timer = EM_TX_TIMEOUT;
2987	}
2988	adapter->num_tx_desc_avail = num_avail;
2989}
2990
2991/*********************************************************************
2992 *
2993 *  Get a buffer from system mbuf buffer pool.
2994 *
2995 **********************************************************************/
2996static int
2997em_get_buf(struct adapter *adapter, int i)
2998{
2999	struct mbuf		*m;
3000	bus_dma_segment_t	segs[1];
3001	bus_dmamap_t		map;
3002	struct em_buffer	*rx_buffer;
3003	int			error, nsegs;
3004
3005	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3006	if (m == NULL) {
3007		adapter->mbuf_cluster_failed++;
3008		return (ENOBUFS);
3009	}
3010	m->m_len = m->m_pkthdr.len = MCLBYTES;
3011	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3012		m_adj(m, ETHER_ALIGN);
3013
3014	/*
3015	 * Using memory from the mbuf cluster pool, invoke the
3016	 * bus_dma machinery to arrange the memory mapping.
3017	 */
3018	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3019	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3020	if (error != 0) {
3021		m_free(m);
3022		return (error);
3023	}
3024	/* If nsegs is wrong then the stack is corrupt. */
3025	KASSERT(nsegs == 1, ("Too many segments returned!"));
3026
3027	rx_buffer = &adapter->rx_buffer_area[i];
3028	if (rx_buffer->m_head != NULL)
3029		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3030
3031	map = rx_buffer->map;
3032	rx_buffer->map = adapter->rx_sparemap;
3033	adapter->rx_sparemap = map;
3034	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3035	rx_buffer->m_head = m;
3036
3037	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3038
3039	return (0);
3040}
3041
3042/*********************************************************************
3043 *
3044 *  Allocate memory for rx_buffer structures. Since we use one
3045 *  rx_buffer per received packet, the maximum number of rx_buffer's
3046 *  that we'll need is equal to the number of receive descriptors
3047 *  that we've allocated.
3048 *
3049 **********************************************************************/
3050static int
3051em_allocate_receive_structures(struct adapter *adapter)
3052{
3053	device_t dev = adapter->dev;
3054	struct em_buffer *rx_buffer;
3055	int i, error;
3056
3057	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3058	    M_DEVBUF, M_NOWAIT);
3059	if (adapter->rx_buffer_area == NULL) {
3060		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3061		return (ENOMEM);
3062	}
3063
3064	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3065
3066	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3067				1, 0,			/* alignment, bounds */
3068				BUS_SPACE_MAXADDR,	/* lowaddr */
3069				BUS_SPACE_MAXADDR,	/* highaddr */
3070				NULL, NULL,		/* filter, filterarg */
3071				MCLBYTES,		/* maxsize */
3072				1,			/* nsegments */
3073				MCLBYTES,		/* maxsegsize */
3074				0,			/* flags */
3075				NULL,			/* lockfunc */
3076				NULL,			/* lockarg */
3077				&adapter->rxtag);
3078	if (error) {
3079		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3080		    __func__, error);
3081		goto fail;
3082	}
3083
3084	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3085	    &adapter->rx_sparemap);
3086	if (error) {
3087		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3088		    __func__, error);
3089		goto fail;
3090	}
3091	rx_buffer = adapter->rx_buffer_area;
3092	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3093		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3094		    &rx_buffer->map);
3095		if (error) {
3096			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3097			    __func__, error);
3098			goto fail;
3099		}
3100	}
3101
3102	for (i = 0; i < adapter->num_rx_desc; i++) {
3103		error = em_get_buf(adapter, i);
3104		if (error)
3105			goto fail;
3106	}
3107	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3108	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3109
3110	return (0);
3111
3112fail:
3113	em_free_receive_structures(adapter);
3114	return (error);
3115}
3116
3117/*********************************************************************
3118 *
3119 *  Allocate and initialize receive structures.
3120 *
3121 **********************************************************************/
3122static int
3123em_setup_receive_structures(struct adapter *adapter)
3124{
3125	int error;
3126
3127	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3128
3129	if ((error = em_allocate_receive_structures(adapter)) != 0)
3130		return (error);
3131
3132	/* Setup our descriptor pointers */
3133	adapter->next_rx_desc_to_check = 0;
3134
3135	return (0);
3136}
3137
3138/*********************************************************************
3139 *
3140 *  Enable receive unit.
3141 *
3142 **********************************************************************/
3143static void
3144em_initialize_receive_unit(struct adapter *adapter)
3145{
3146	struct ifnet	*ifp = adapter->ifp;
3147	uint64_t	bus_addr;
3148	uint32_t	reg_rctl;
3149	uint32_t	reg_rxcsum;
3150
3151	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3152
3153	/*
3154	 * Make sure receives are disabled while setting
3155	 * up the descriptor ring
3156	 */
3157	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3158
3159	/* Set the Receive Delay Timer Register */
3160	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3161
3162	if(adapter->hw.mac_type >= em_82540) {
3163		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3164
3165		/*
3166		 * Set the interrupt throttling rate. Value is calculated
3167		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3168		 */
3169#define MAX_INTS_PER_SEC	8000
3170#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3171		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3172	}
3173
3174	/* Setup the Base and Length of the Rx Descriptor Ring */
3175	bus_addr = adapter->rxdma.dma_paddr;
3176	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3177			sizeof(struct em_rx_desc));
3178	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3179	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3180
3181	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3182	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3183	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3184
3185	/* Setup the Receive Control Register */
3186	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3187		   E1000_RCTL_RDMTS_HALF |
3188		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3189
3190	if (adapter->hw.tbi_compatibility_on == TRUE)
3191		reg_rctl |= E1000_RCTL_SBP;
3192
3193
3194	switch (adapter->rx_buffer_len) {
3195	default:
3196	case EM_RXBUFFER_2048:
3197		reg_rctl |= E1000_RCTL_SZ_2048;
3198		break;
3199	case EM_RXBUFFER_4096:
3200		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3201		break;
3202	case EM_RXBUFFER_8192:
3203		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3204		break;
3205	case EM_RXBUFFER_16384:
3206		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3207		break;
3208	}
3209
3210	if (ifp->if_mtu > ETHERMTU)
3211		reg_rctl |= E1000_RCTL_LPE;
3212
3213	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3214	if ((adapter->hw.mac_type >= em_82543) &&
3215	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3216		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3217		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3218		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3219	}
3220
3221	/* Enable Receives */
3222	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3223}
3224
3225/*********************************************************************
3226 *
3227 *  Free receive related data structures.
3228 *
3229 **********************************************************************/
3230static void
3231em_free_receive_structures(struct adapter *adapter)
3232{
3233	struct em_buffer *rx_buffer;
3234	int i;
3235
3236	INIT_DEBUGOUT("free_receive_structures: begin");
3237
3238	if (adapter->rx_sparemap) {
3239		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3240		adapter->rx_sparemap = NULL;
3241	}
3242	if (adapter->rx_buffer_area != NULL) {
3243		rx_buffer = adapter->rx_buffer_area;
3244		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3245			if (rx_buffer->m_head != NULL) {
3246				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3247				    BUS_DMASYNC_POSTREAD);
3248				bus_dmamap_unload(adapter->rxtag,
3249				    rx_buffer->map);
3250				m_freem(rx_buffer->m_head);
3251				rx_buffer->m_head = NULL;
3252			} else if (rx_buffer->map != NULL)
3253				bus_dmamap_unload(adapter->rxtag,
3254				    rx_buffer->map);
3255			if (rx_buffer->map != NULL) {
3256				bus_dmamap_destroy(adapter->rxtag,
3257				    rx_buffer->map);
3258				rx_buffer->map = NULL;
3259			}
3260		}
3261	}
3262	if (adapter->rx_buffer_area != NULL) {
3263		free(adapter->rx_buffer_area, M_DEVBUF);
3264		adapter->rx_buffer_area = NULL;
3265	}
3266	if (adapter->rxtag != NULL) {
3267		bus_dma_tag_destroy(adapter->rxtag);
3268		adapter->rxtag = NULL;
3269	}
3270}
3271
3272/*********************************************************************
3273 *
3274 *  This routine executes in interrupt context. It replenishes
3275 *  the mbufs in the descriptor and sends data which has been
3276 *  dma'ed into host memory to upper layer.
3277 *
3278 *  We loop at most count times if count is > 0, or until done if
3279 *  count < 0.
3280 *
3281 *********************************************************************/
3282static int
3283em_rxeof(struct adapter *adapter, int count)
3284{
3285	struct ifnet	*ifp;
3286	struct mbuf	*mp;
3287	uint8_t		accept_frame = 0;
3288	uint8_t		eop = 0;
3289	uint16_t 	len, desc_len, prev_len_adj;
3290	int		i;
3291
3292	/* Pointer to the receive descriptor being examined. */
3293	struct em_rx_desc   *current_desc;
3294	uint8_t		status;
3295
3296	ifp = adapter->ifp;
3297	i = adapter->next_rx_desc_to_check;
3298	current_desc = &adapter->rx_desc_base[i];
3299	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3300	    BUS_DMASYNC_POSTREAD);
3301
3302	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3303		return (0);
3304
3305	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3306	    (count != 0) &&
3307	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3308		struct mbuf *m = NULL;
3309
3310		mp = adapter->rx_buffer_area[i].m_head;
3311		/*
3312		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3313		 * needs to access the last received byte in the mbuf.
3314		 */
3315		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3316		    BUS_DMASYNC_POSTREAD);
3317
3318		accept_frame = 1;
3319		prev_len_adj = 0;
3320		desc_len = le16toh(current_desc->length);
3321		status = current_desc->status;
3322		if (status & E1000_RXD_STAT_EOP) {
3323			count--;
3324			eop = 1;
3325			if (desc_len < ETHER_CRC_LEN) {
3326				len = 0;
3327				prev_len_adj = ETHER_CRC_LEN - desc_len;
3328			} else
3329				len = desc_len - ETHER_CRC_LEN;
3330		} else {
3331			eop = 0;
3332			len = desc_len;
3333		}
3334
3335		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3336			uint8_t		last_byte;
3337			uint32_t	pkt_len = desc_len;
3338
3339			if (adapter->fmp != NULL)
3340				pkt_len += adapter->fmp->m_pkthdr.len;
3341
3342			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3343			if (TBI_ACCEPT(&adapter->hw, status,
3344			    current_desc->errors, pkt_len, last_byte)) {
3345				em_tbi_adjust_stats(&adapter->hw,
3346				    &adapter->stats, pkt_len,
3347				    adapter->hw.mac_addr);
3348				if (len > 0)
3349					len--;
3350			} else
3351				accept_frame = 0;
3352		}
3353
3354		if (accept_frame) {
3355			if (em_get_buf(adapter, i) != 0) {
3356				ifp->if_iqdrops++;
3357				goto discard;
3358			}
3359
3360			/* Assign correct length to the current fragment */
3361			mp->m_len = len;
3362
3363			if (adapter->fmp == NULL) {
3364				mp->m_pkthdr.len = len;
3365				adapter->fmp = mp; /* Store the first mbuf */
3366				adapter->lmp = mp;
3367			} else {
3368				/* Chain mbuf's together */
3369				mp->m_flags &= ~M_PKTHDR;
3370				/*
3371				 * Adjust length of previous mbuf in chain if
3372				 * we received less than 4 bytes in the last
3373				 * descriptor.
3374				 */
3375				if (prev_len_adj > 0) {
3376					adapter->lmp->m_len -= prev_len_adj;
3377					adapter->fmp->m_pkthdr.len -=
3378					    prev_len_adj;
3379				}
3380				adapter->lmp->m_next = mp;
3381				adapter->lmp = adapter->lmp->m_next;
3382				adapter->fmp->m_pkthdr.len += len;
3383			}
3384
3385			if (eop) {
3386				adapter->fmp->m_pkthdr.rcvif = ifp;
3387				ifp->if_ipackets++;
3388				em_receive_checksum(adapter, current_desc,
3389				    adapter->fmp);
3390#ifndef __NO_STRICT_ALIGNMENT
3391				if (adapter->hw.max_frame_size >
3392				    (MCLBYTES - ETHER_ALIGN) &&
3393				    em_fixup_rx(adapter) != 0)
3394					goto skip;
3395#endif
3396				if (status & E1000_RXD_STAT_VP) {
3397					adapter->fmp->m_pkthdr.ether_vtag =
3398					    (le16toh(current_desc->special) &
3399					    E1000_RXD_SPC_VLAN_MASK);
3400					adapter->fmp->m_flags |= M_VLANTAG;
3401				}
3402#ifndef __NO_STRICT_ALIGNMENT
3403skip:
3404#endif
3405				m = adapter->fmp;
3406				adapter->fmp = NULL;
3407				adapter->lmp = NULL;
3408			}
3409		} else {
3410			ifp->if_ierrors++;
3411discard:
3412			/* Reuse loaded DMA map and just update mbuf chain */
3413			mp = adapter->rx_buffer_area[i].m_head;
3414			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3415			mp->m_data = mp->m_ext.ext_buf;
3416			mp->m_next = NULL;
3417			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3418				m_adj(mp, ETHER_ALIGN);
3419			if (adapter->fmp != NULL) {
3420				m_freem(adapter->fmp);
3421				adapter->fmp = NULL;
3422				adapter->lmp = NULL;
3423			}
3424			m = NULL;
3425		}
3426
3427		/* Zero out the receive descriptors status. */
3428		current_desc->status = 0;
3429		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3430		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3431
3432		/* Advance our pointers to the next descriptor. */
3433		if (++i == adapter->num_rx_desc)
3434			i = 0;
3435		if (m != NULL) {
3436			adapter->next_rx_desc_to_check = i;
3437#ifdef DEVICE_POLLING
3438			EM_UNLOCK(adapter);
3439			(*ifp->if_input)(ifp, m);
3440			EM_LOCK(adapter);
3441#else
3442			(*ifp->if_input)(ifp, m);
3443#endif
3444			i = adapter->next_rx_desc_to_check;
3445		}
3446		current_desc = &adapter->rx_desc_base[i];
3447	}
3448	adapter->next_rx_desc_to_check = i;
3449
3450	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3451	if (--i < 0)
3452		i = adapter->num_rx_desc - 1;
3453	E1000_WRITE_REG(&adapter->hw, RDT, i);
3454	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3455		return (0);
3456
3457	return (1);
3458}
3459
3460#ifndef __NO_STRICT_ALIGNMENT
3461/*
3462 * When jumbo frames are enabled we should realign entire payload on
3463 * architecures with strict alignment. This is serious design mistake of 8254x
3464 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3465 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3466 * payload. On architecures without strict alignment restrictions 8254x still
3467 * performs unaligned memory access which would reduce the performance too.
3468 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3469 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3470 * existing mbuf chain.
3471 *
3472 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3473 * not used at all on architectures with strict alignment.
3474 */
3475static int
3476em_fixup_rx(struct adapter *adapter)
3477{
3478	struct mbuf *m, *n;
3479	int error;
3480
3481	error = 0;
3482	m = adapter->fmp;
3483	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3484		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3485		m->m_data += ETHER_HDR_LEN;
3486	} else {
3487		MGETHDR(n, M_DONTWAIT, MT_DATA);
3488		if (n != NULL) {
3489			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3490			m->m_data += ETHER_HDR_LEN;
3491			m->m_len -= ETHER_HDR_LEN;
3492			n->m_len = ETHER_HDR_LEN;
3493			M_MOVE_PKTHDR(n, m);
3494			n->m_next = m;
3495			adapter->fmp = n;
3496		} else {
3497			adapter->ifp->if_iqdrops++;
3498			adapter->mbuf_alloc_failed++;
3499			m_freem(adapter->fmp);
3500			adapter->fmp = NULL;
3501			adapter->lmp = NULL;
3502			error = ENOBUFS;
3503		}
3504	}
3505
3506	return (error);
3507}
3508#endif
3509
3510/*********************************************************************
3511 *
3512 *  Verify that the hardware indicated that the checksum is valid.
3513 *  Inform the stack about the status of checksum so that stack
3514 *  doesn't spend time verifying the checksum.
3515 *
3516 *********************************************************************/
3517static void
3518em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3519		    struct mbuf *mp)
3520{
3521	/* 82543 or newer only */
3522	if ((adapter->hw.mac_type < em_82543) ||
3523	    /* Ignore Checksum bit is set */
3524	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3525		mp->m_pkthdr.csum_flags = 0;
3526		return;
3527	}
3528
3529	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3530		/* Did it pass? */
3531		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3532			/* IP Checksum Good */
3533			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3534			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3535
3536		} else {
3537			mp->m_pkthdr.csum_flags = 0;
3538		}
3539	}
3540
3541	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3542		/* Did it pass? */
3543		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3544			mp->m_pkthdr.csum_flags |=
3545			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3546			mp->m_pkthdr.csum_data = htons(0xffff);
3547		}
3548	}
3549}
3550
3551
3552static void
3553em_enable_vlans(struct adapter *adapter)
3554{
3555	uint32_t ctrl;
3556
3557	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3558
3559	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3560	ctrl |= E1000_CTRL_VME;
3561	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3562}
3563
3564static void
3565em_disable_vlans(struct adapter *adapter)
3566{
3567	uint32_t ctrl;
3568
3569	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3570	ctrl &= ~E1000_CTRL_VME;
3571	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3572}
3573
3574static void
3575em_enable_intr(struct adapter *adapter)
3576{
3577	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3578}
3579
3580static void
3581em_disable_intr(struct adapter *adapter)
3582{
3583	/*
3584	 * The first version of 82542 had an errata where when link was forced
3585	 * it would stay up even up even if the cable was disconnected.
3586	 * Sequence errors were used to detect the disconnect and then the
3587	 * driver would unforce the link. This code in the in the ISR. For this
3588	 * to work correctly the Sequence error interrupt had to be enabled
3589	 * all the time.
3590	 */
3591
3592	if (adapter->hw.mac_type == em_82542_rev2_0)
3593	    E1000_WRITE_REG(&adapter->hw, IMC,
3594		(0xffffffff & ~E1000_IMC_RXSEQ));
3595	else
3596	    E1000_WRITE_REG(&adapter->hw, IMC,
3597		0xffffffff);
3598}
3599
3600static int
3601em_is_valid_ether_addr(uint8_t *addr)
3602{
3603	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3604
3605	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3606		return (FALSE);
3607	}
3608
3609	return (TRUE);
3610}
3611
3612void
3613em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3614{
3615	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3616}
3617
3618void
3619em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3620{
3621	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3622}
3623
3624void
3625em_pci_set_mwi(struct em_hw *hw)
3626{
3627	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3628	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3629}
3630
3631void
3632em_pci_clear_mwi(struct em_hw *hw)
3633{
3634	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3635	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3636}
3637
3638/*********************************************************************
3639* 82544 Coexistence issue workaround.
3640*    There are 2 issues.
3641*       1. Transmit Hang issue.
3642*    To detect this issue, following equation can be used...
3643*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3644*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3645*
3646*       2. DAC issue.
3647*    To detect this issue, following equation can be used...
3648*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3649*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3650*
3651*
3652*    WORKAROUND:
3653*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3654*
3655*** *********************************************************************/
3656static uint32_t
3657em_fill_descriptors (bus_addr_t address, uint32_t length,
3658		PDESC_ARRAY desc_array)
3659{
3660	/* Since issue is sensitive to length and address.*/
3661	/* Let us first check the address...*/
3662	uint32_t safe_terminator;
3663	if (length <= 4) {
3664		desc_array->descriptor[0].address = address;
3665		desc_array->descriptor[0].length = length;
3666		desc_array->elements = 1;
3667		return (desc_array->elements);
3668	}
3669	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3670	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3671	if (safe_terminator == 0   ||
3672	(safe_terminator > 4   &&
3673	safe_terminator < 9)   ||
3674	(safe_terminator > 0xC &&
3675	safe_terminator <= 0xF)) {
3676		desc_array->descriptor[0].address = address;
3677		desc_array->descriptor[0].length = length;
3678		desc_array->elements = 1;
3679		return (desc_array->elements);
3680	}
3681
3682	desc_array->descriptor[0].address = address;
3683	desc_array->descriptor[0].length = length - 4;
3684	desc_array->descriptor[1].address = address + (length - 4);
3685	desc_array->descriptor[1].length = 4;
3686	desc_array->elements = 2;
3687	return (desc_array->elements);
3688}
3689
3690/**********************************************************************
3691 *
3692 *  Update the board statistics counters.
3693 *
3694 **********************************************************************/
3695static void
3696em_update_stats_counters(struct adapter *adapter)
3697{
3698	struct ifnet   *ifp;
3699
3700	if(adapter->hw.media_type == em_media_type_copper ||
3701	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3702		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3703		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3704	}
3705	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3706	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3707	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3708	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3709
3710	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3711	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3712	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3713	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3714	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3715	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3716	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3717	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3718	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3719	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3720	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3721	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3722	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3723	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3724	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3725	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3726	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3727	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3728	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3729	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3730
3731	/* For the 64-bit byte counters the low dword must be read first. */
3732	/* Both registers clear on the read of the high dword */
3733
3734	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3735	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3736	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3737	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3738
3739	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3740	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3741	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3742	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3743	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3744
3745	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3746	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3747	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3748	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3749
3750	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3751	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3752	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3753	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3754	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3755	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3756	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3757	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3758	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3759	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3760
3761	if (adapter->hw.mac_type >= em_82543) {
3762		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3763		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3764		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3765		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3766		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3767		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3768	}
3769	ifp = adapter->ifp;
3770
3771	ifp->if_collisions = adapter->stats.colc;
3772
3773	/* Rx Errors */
3774	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3775	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3776	    adapter->stats.mpc + adapter->stats.cexterr;
3777
3778	/* Tx Errors */
3779	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3780	    adapter->watchdog_events;
3781}
3782
3783
3784/**********************************************************************
3785 *
3786 *  This routine is called only when em_display_debug_stats is enabled.
3787 *  This routine provides a way to take a look at important statistics
3788 *  maintained by the driver and hardware.
3789 *
3790 **********************************************************************/
3791static void
3792em_print_debug_info(struct adapter *adapter)
3793{
3794	device_t dev = adapter->dev;
3795	uint8_t *hw_addr = adapter->hw.hw_addr;
3796
3797	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3798	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3799	    E1000_READ_REG(&adapter->hw, CTRL),
3800	    E1000_READ_REG(&adapter->hw, RCTL));
3801	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3802	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3803	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3804	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3805	    adapter->hw.fc_high_water,
3806	    adapter->hw.fc_low_water);
3807	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3808	    E1000_READ_REG(&adapter->hw, TIDV),
3809	    E1000_READ_REG(&adapter->hw, TADV));
3810	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3811	    E1000_READ_REG(&adapter->hw, RDTR),
3812	    E1000_READ_REG(&adapter->hw, RADV));
3813	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3814	    (long long)adapter->tx_fifo_wrk_cnt,
3815	    (long long)adapter->tx_fifo_reset_cnt);
3816	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3817	    E1000_READ_REG(&adapter->hw, TDH),
3818	    E1000_READ_REG(&adapter->hw, TDT));
3819	device_printf(dev, "Num Tx descriptors avail = %d\n",
3820	    adapter->num_tx_desc_avail);
3821	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3822	    adapter->no_tx_desc_avail1);
3823	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3824	    adapter->no_tx_desc_avail2);
3825	device_printf(dev, "Std mbuf failed = %ld\n",
3826	    adapter->mbuf_alloc_failed);
3827	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3828	    adapter->mbuf_cluster_failed);
3829}
3830
3831static void
3832em_print_hw_stats(struct adapter *adapter)
3833{
3834	device_t dev = adapter->dev;
3835
3836	device_printf(dev, "Excessive collisions = %lld\n",
3837	    (long long)adapter->stats.ecol);
3838	device_printf(dev, "Symbol errors = %lld\n",
3839	    (long long)adapter->stats.symerrs);
3840	device_printf(dev, "Sequence errors = %lld\n",
3841	    (long long)adapter->stats.sec);
3842	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3843
3844	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3845	device_printf(dev, "Receive No Buffers = %lld\n",
3846	    (long long)adapter->stats.rnbc);
3847	/* RLEC is inaccurate on some hardware, calculate our own. */
3848	device_printf(dev, "Receive Length Errors = %lld\n",
3849	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3850	device_printf(dev, "Receive errors = %lld\n",
3851	    (long long)adapter->stats.rxerrc);
3852	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3853	device_printf(dev, "Alignment errors = %lld\n",
3854	    (long long)adapter->stats.algnerrc);
3855	device_printf(dev, "Carrier extension errors = %lld\n",
3856	    (long long)adapter->stats.cexterr);
3857	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3858	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3859
3860	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3861	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3862	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3863	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3864
3865	device_printf(dev, "Good Packets Rcvd = %lld\n",
3866	    (long long)adapter->stats.gprc);
3867	device_printf(dev, "Good Packets Xmtd = %lld\n",
3868	    (long long)adapter->stats.gptc);
3869	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3870	    (long long)adapter->stats.tsctc);
3871	device_printf(dev, "TSO Contexts Failed = %lld\n",
3872	    (long long)adapter->stats.tsctfc);
3873}
3874
3875static int
3876em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3877{
3878	struct adapter *adapter;
3879	int error;
3880	int result;
3881
3882	result = -1;
3883	error = sysctl_handle_int(oidp, &result, 0, req);
3884
3885	if (error || !req->newptr)
3886		return (error);
3887
3888	if (result == 1) {
3889		adapter = (struct adapter *)arg1;
3890		em_print_debug_info(adapter);
3891	}
3892
3893	return (error);
3894}
3895
3896
3897static int
3898em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3899{
3900	struct adapter *adapter;
3901	int error;
3902	int result;
3903
3904	result = -1;
3905	error = sysctl_handle_int(oidp, &result, 0, req);
3906
3907	if (error || !req->newptr)
3908		return (error);
3909
3910	if (result == 1) {
3911		adapter = (struct adapter *)arg1;
3912		em_print_hw_stats(adapter);
3913	}
3914
3915	return (error);
3916}
3917
3918static int
3919em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3920{
3921	struct em_int_delay_info *info;
3922	struct adapter *adapter;
3923	uint32_t regval;
3924	int error;
3925	int usecs;
3926	int ticks;
3927
3928	info = (struct em_int_delay_info *)arg1;
3929	usecs = info->value;
3930	error = sysctl_handle_int(oidp, &usecs, 0, req);
3931	if (error != 0 || req->newptr == NULL)
3932		return (error);
3933	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3934		return (EINVAL);
3935	info->value = usecs;
3936	ticks = E1000_USECS_TO_TICKS(usecs);
3937
3938	adapter = info->adapter;
3939
3940	EM_LOCK(adapter);
3941	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3942	regval = (regval & ~0xffff) | (ticks & 0xffff);
3943	/* Handle a few special cases. */
3944	switch (info->offset) {
3945	case E1000_RDTR:
3946	case E1000_82542_RDTR:
3947		regval |= E1000_RDT_FPDB;
3948		break;
3949	case E1000_TIDV:
3950	case E1000_82542_TIDV:
3951		if (ticks == 0) {
3952			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3953			/* Don't write 0 into the TIDV register. */
3954			regval++;
3955		} else
3956			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3957		break;
3958	}
3959	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3960	EM_UNLOCK(adapter);
3961	return (0);
3962}
3963
3964static void
3965em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3966	const char *description, struct em_int_delay_info *info,
3967	int offset, int value)
3968{
3969	info->adapter = adapter;
3970	info->offset = offset;
3971	info->value = value;
3972	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3973	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3974	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3975	    info, 0, em_sysctl_int_delay, "I", description);
3976}
3977
3978#ifndef DEVICE_POLLING
3979static void
3980em_add_int_process_limit(struct adapter *adapter, const char *name,
3981	const char *description, int *limit, int value)
3982{
3983	*limit = value;
3984	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3985	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3986	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3987}
3988#endif
3989