if_em.c revision 162789
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162789 2006-09-29 13:47:38Z andre $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.1.4 - TSO";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157
158	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
162
163	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
177
178	/* required last entry */
179	{ 0, 0, 0, 0, 0}
180};
181
182/*********************************************************************
183 *  Table of branding strings for all supported NICs.
184 *********************************************************************/
185
186static char *em_strings[] = {
187	"Intel(R) PRO/1000 Network Connection"
188};
189
190/*********************************************************************
191 *  Function prototypes
192 *********************************************************************/
193static int	em_probe(device_t);
194static int	em_attach(device_t);
195static int	em_detach(device_t);
196static int	em_shutdown(device_t);
197static int	em_suspend(device_t);
198static int	em_resume(device_t);
199static void	em_start(struct ifnet *);
200static void	em_start_locked(struct ifnet *ifp);
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_watchdog(struct ifnet *);
203static void	em_init(void *);
204static void	em_init_locked(struct adapter *);
205static void	em_stop(void *);
206static void	em_media_status(struct ifnet *, struct ifmediareq *);
207static int	em_media_change(struct ifnet *);
208static void	em_identify_hardware(struct adapter *);
209static int	em_allocate_pci_resources(struct adapter *);
210static int	em_allocate_intr(struct adapter *);
211static void	em_free_intr(struct adapter *);
212static void	em_free_pci_resources(struct adapter *);
213static void	em_local_timer(void *);
214static int	em_hardware_init(struct adapter *);
215static void	em_setup_interface(device_t, struct adapter *);
216static int	em_setup_transmit_structures(struct adapter *);
217static void	em_initialize_transmit_unit(struct adapter *);
218static int	em_setup_receive_structures(struct adapter *);
219static void	em_initialize_receive_unit(struct adapter *);
220static void	em_enable_intr(struct adapter *);
221static void	em_disable_intr(struct adapter *);
222static void	em_free_transmit_structures(struct adapter *);
223static void	em_free_receive_structures(struct adapter *);
224static void	em_update_stats_counters(struct adapter *);
225static void	em_txeof(struct adapter *);
226static int	em_allocate_receive_structures(struct adapter *);
227static int	em_allocate_transmit_structures(struct adapter *);
228static int	em_rxeof(struct adapter *, int);
229#ifndef __NO_STRICT_ALIGNMENT
230static int	em_fixup_rx(struct adapter *);
231#endif
232static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
233		    struct mbuf *);
234static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
235		    uint32_t *, uint32_t *);
236static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
237		    uint32_t *, uint32_t *);
238static void	em_set_promisc(struct adapter *);
239static void	em_disable_promisc(struct adapter *);
240static void	em_set_multi(struct adapter *);
241static void	em_print_hw_stats(struct adapter *);
242static void	em_update_link_status(struct adapter *);
243static int	em_get_buf(struct adapter *, int);
244static void	em_enable_vlans(struct adapter *);
245static void	em_disable_vlans(struct adapter *);
246static int	em_encap(struct adapter *, struct mbuf **);
247static void	em_smartspeed(struct adapter *);
248static int	em_82547_fifo_workaround(struct adapter *, int);
249static void	em_82547_update_fifo_head(struct adapter *, int);
250static int	em_82547_tx_fifo_reset(struct adapter *);
251static void	em_82547_move_tail(void *arg);
252static void	em_82547_move_tail_locked(struct adapter *);
253static int	em_dma_malloc(struct adapter *, bus_size_t,
254		struct em_dma_alloc *, int);
255static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
256static void	em_print_debug_info(struct adapter *);
257static int 	em_is_valid_ether_addr(uint8_t *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
261		    PDESC_ARRAY desc_array);
262static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
263static void	em_add_int_delay_sysctl(struct adapter *, const char *,
264		const char *, struct em_int_delay_info *, int, int);
265
266/*
267 * Fast interrupt handler and legacy ithread/polling modes are
268 * mutually exclusive.
269 */
270#ifdef DEVICE_POLLING
271static poll_handler_t em_poll;
272static void	em_intr(void *);
273#else
274static void	em_intr_fast(void *);
275static void	em_add_int_process_limit(struct adapter *, const char *,
276		const char *, int *, int);
277static void	em_handle_rxtx(void *context, int pending);
278static void	em_handle_link(void *context, int pending);
279#endif
280
281/*********************************************************************
282 *  FreeBSD Device Interface Entry Points
283 *********************************************************************/
284
285static device_method_t em_methods[] = {
286	/* Device interface */
287	DEVMETHOD(device_probe, em_probe),
288	DEVMETHOD(device_attach, em_attach),
289	DEVMETHOD(device_detach, em_detach),
290	DEVMETHOD(device_shutdown, em_shutdown),
291	DEVMETHOD(device_suspend, em_suspend),
292	DEVMETHOD(device_resume, em_resume),
293	{0, 0}
294};
295
296static driver_t em_driver = {
297	"em", em_methods, sizeof(struct adapter),
298};
299
300static devclass_t em_devclass;
301DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
302MODULE_DEPEND(em, pci, 1, 1, 1);
303MODULE_DEPEND(em, ether, 1, 1, 1);
304
305/*********************************************************************
306 *  Tunable default values.
307 *********************************************************************/
308
309#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
310#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
311#define M_TSO_LEN			66
312
313static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
314static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
315static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
316static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
317static int em_rxd = EM_DEFAULT_RXD;
318static int em_txd = EM_DEFAULT_TXD;
319static int em_smart_pwr_down = FALSE;
320
321TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
322TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
323TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
325TUNABLE_INT("hw.em.rxd", &em_rxd);
326TUNABLE_INT("hw.em.txd", &em_txd);
327TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
328#ifndef DEVICE_POLLING
329static int em_rx_process_limit = 100;
330TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
331#endif
332
333/*********************************************************************
334 *  Device identification routine
335 *
336 *  em_probe determines if the driver should be loaded on
337 *  adapter based on PCI vendor/device id of the adapter.
338 *
339 *  return BUS_PROBE_DEFAULT on success, positive on failure
340 *********************************************************************/
341
342static int
343em_probe(device_t dev)
344{
345	char		adapter_name[60];
346	uint16_t	pci_vendor_id = 0;
347	uint16_t	pci_device_id = 0;
348	uint16_t	pci_subvendor_id = 0;
349	uint16_t	pci_subdevice_id = 0;
350	em_vendor_info_t *ent;
351
352	INIT_DEBUGOUT("em_probe: begin");
353
354	pci_vendor_id = pci_get_vendor(dev);
355	if (pci_vendor_id != EM_VENDOR_ID)
356		return (ENXIO);
357
358	pci_device_id = pci_get_device(dev);
359	pci_subvendor_id = pci_get_subvendor(dev);
360	pci_subdevice_id = pci_get_subdevice(dev);
361
362	ent = em_vendor_info_array;
363	while (ent->vendor_id != 0) {
364		if ((pci_vendor_id == ent->vendor_id) &&
365		    (pci_device_id == ent->device_id) &&
366
367		    ((pci_subvendor_id == ent->subvendor_id) ||
368		    (ent->subvendor_id == PCI_ANY_ID)) &&
369
370		    ((pci_subdevice_id == ent->subdevice_id) ||
371		    (ent->subdevice_id == PCI_ANY_ID))) {
372			sprintf(adapter_name, "%s %s",
373				em_strings[ent->index],
374				em_driver_version);
375			device_set_desc_copy(dev, adapter_name);
376			return (BUS_PROBE_DEFAULT);
377		}
378		ent++;
379	}
380
381	return (ENXIO);
382}
383
384/*********************************************************************
385 *  Device initialization routine
386 *
387 *  The attach entry point is called when the driver is being loaded.
388 *  This routine identifies the type of hardware, allocates all resources
389 *  and initializes the hardware.
390 *
391 *  return 0 on success, positive on failure
392 *********************************************************************/
393
394static int
395em_attach(device_t dev)
396{
397	struct adapter	*adapter;
398	int		tsize, rsize;
399	int		error = 0;
400
401	INIT_DEBUGOUT("em_attach: begin");
402
403	adapter = device_get_softc(dev);
404	adapter->dev = adapter->osdep.dev = dev;
405	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
406
407	/* SYSCTL stuff */
408	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
409	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
410	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
411	    em_sysctl_debug_info, "I", "Debug Information");
412
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    em_sysctl_stats, "I", "Statistics");
417
418	callout_init(&adapter->timer, CALLOUT_MPSAFE);
419	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
420
421	/* Determine hardware revision */
422	em_identify_hardware(adapter);
423
424	/* Set up some sysctls for the tunable interrupt delays */
425	em_add_int_delay_sysctl(adapter, "rx_int_delay",
426	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
427	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
428	em_add_int_delay_sysctl(adapter, "tx_int_delay",
429	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
430	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
431	if (adapter->hw.mac_type >= em_82540) {
432		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
433		    "receive interrupt delay limit in usecs",
434		    &adapter->rx_abs_int_delay,
435		    E1000_REG_OFFSET(&adapter->hw, RADV),
436		    em_rx_abs_int_delay_dflt);
437		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
438		    "transmit interrupt delay limit in usecs",
439		    &adapter->tx_abs_int_delay,
440		    E1000_REG_OFFSET(&adapter->hw, TADV),
441		    em_tx_abs_int_delay_dflt);
442	}
443
444#ifndef DEVICE_POLLING
445	/* Sysctls for limiting the amount of work done in the taskqueue */
446	em_add_int_process_limit(adapter, "rx_processing_limit",
447	    "max number of rx packets to process", &adapter->rx_process_limit,
448	    em_rx_process_limit);
449#endif
450
451	/*
452	 * Validate number of transmit and receive descriptors. It
453	 * must not exceed hardware maximum, and must be multiple
454	 * of EM_DBA_ALIGN.
455	 */
456	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
457	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
458	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
459	    (em_txd < EM_MIN_TXD)) {
460		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
461		    EM_DEFAULT_TXD, em_txd);
462		adapter->num_tx_desc = EM_DEFAULT_TXD;
463	} else
464		adapter->num_tx_desc = em_txd;
465	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
466	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
467	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
468	    (em_rxd < EM_MIN_RXD)) {
469		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
470		    EM_DEFAULT_RXD, em_rxd);
471		adapter->num_rx_desc = EM_DEFAULT_RXD;
472	} else
473		adapter->num_rx_desc = em_rxd;
474
475	adapter->hw.autoneg = DO_AUTO_NEG;
476	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
477	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
478	adapter->hw.tbi_compatibility_en = TRUE;
479	adapter->rx_buffer_len = EM_RXBUFFER_2048;
480
481	adapter->hw.phy_init_script = 1;
482	adapter->hw.phy_reset_disable = FALSE;
483
484#ifndef EM_MASTER_SLAVE
485	adapter->hw.master_slave = em_ms_hw_default;
486#else
487	adapter->hw.master_slave = EM_MASTER_SLAVE;
488#endif
489	/*
490	 * Set the max frame size assuming standard ethernet
491	 * sized frames.
492	 */
493	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
494
495	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
496
497	/*
498	 * This controls when hardware reports transmit completion
499	 * status.
500	 */
501	adapter->hw.report_tx_early = 1;
502	if (em_allocate_pci_resources(adapter)) {
503		device_printf(dev, "Allocation of PCI resources failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	/* Initialize eeprom parameters */
509	em_init_eeprom_params(&adapter->hw);
510
511	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
512	    EM_DBA_ALIGN);
513
514	/* Allocate Transmit Descriptor ring */
515	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
516		device_printf(dev, "Unable to allocate tx_desc memory\n");
517		error = ENOMEM;
518		goto err_tx_desc;
519	}
520	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
521
522	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
523	    EM_DBA_ALIGN);
524
525	/* Allocate Receive Descriptor ring */
526	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
527		device_printf(dev, "Unable to allocate rx_desc memory\n");
528		error = ENOMEM;
529		goto err_rx_desc;
530	}
531	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
532
533	/* Initialize the hardware */
534	if (em_hardware_init(adapter)) {
535		device_printf(dev, "Unable to initialize the hardware\n");
536		error = EIO;
537		goto err_hw_init;
538	}
539
540	/* Copy the permanent MAC address out of the EEPROM */
541	if (em_read_mac_addr(&adapter->hw) < 0) {
542		device_printf(dev, "EEPROM read error while reading MAC"
543		    " address\n");
544		error = EIO;
545		goto err_hw_init;
546	}
547
548	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
549		device_printf(dev, "Invalid MAC address\n");
550		error = EIO;
551		goto err_hw_init;
552	}
553
554	/* Setup OS specific network interface */
555	em_setup_interface(dev, adapter);
556
557	em_allocate_intr(adapter);
558
559	/* Initialize statistics */
560	em_clear_hw_cntrs(&adapter->hw);
561	em_update_stats_counters(adapter);
562	adapter->hw.get_link_status = 1;
563	em_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (em_check_phy_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Identify 82544 on PCIX */
571	em_get_bus_info(&adapter->hw);
572	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
573		adapter->pcix_82544 = TRUE;
574	else
575		adapter->pcix_82544 = FALSE;
576
577	INIT_DEBUGOUT("em_attach: end");
578
579	return (0);
580
581err_hw_init:
582	em_dma_free(adapter, &adapter->rxdma);
583err_rx_desc:
584	em_dma_free(adapter, &adapter->txdma);
585err_tx_desc:
586err_pci:
587	em_free_intr(adapter);
588	em_free_pci_resources(adapter);
589	EM_LOCK_DESTROY(adapter);
590
591	return (error);
592}
593
594/*********************************************************************
595 *  Device removal routine
596 *
597 *  The detach entry point is called when the driver is being removed.
598 *  This routine stops the adapter and deallocates all the resources
599 *  that were allocated for driver operation.
600 *
601 *  return 0 on success, positive on failure
602 *********************************************************************/
603
604static int
605em_detach(device_t dev)
606{
607	struct adapter	*adapter = device_get_softc(dev);
608	struct ifnet	*ifp = adapter->ifp;
609
610	INIT_DEBUGOUT("em_detach: begin");
611
612#ifdef DEVICE_POLLING
613	if (ifp->if_capenable & IFCAP_POLLING)
614		ether_poll_deregister(ifp);
615#endif
616
617	em_free_intr(adapter);
618	EM_LOCK(adapter);
619	adapter->in_detach = 1;
620	em_stop(adapter);
621	em_phy_hw_reset(&adapter->hw);
622	EM_UNLOCK(adapter);
623	ether_ifdetach(adapter->ifp);
624
625	em_free_pci_resources(adapter);
626	bus_generic_detach(dev);
627	if_free(ifp);
628
629	/* Free Transmit Descriptor ring */
630	if (adapter->tx_desc_base) {
631		em_dma_free(adapter, &adapter->txdma);
632		adapter->tx_desc_base = NULL;
633	}
634
635	/* Free Receive Descriptor ring */
636	if (adapter->rx_desc_base) {
637		em_dma_free(adapter, &adapter->rxdma);
638		adapter->rx_desc_base = NULL;
639	}
640
641	EM_LOCK_DESTROY(adapter);
642
643	return (0);
644}
645
646/*********************************************************************
647 *
648 *  Shutdown entry point
649 *
650 **********************************************************************/
651
652static int
653em_shutdown(device_t dev)
654{
655	struct adapter *adapter = device_get_softc(dev);
656	EM_LOCK(adapter);
657	em_stop(adapter);
658	EM_UNLOCK(adapter);
659	return (0);
660}
661
662/*
663 * Suspend/resume device methods.
664 */
665static int
666em_suspend(device_t dev)
667{
668	struct adapter *adapter = device_get_softc(dev);
669
670	EM_LOCK(adapter);
671	em_stop(adapter);
672	EM_UNLOCK(adapter);
673
674	return bus_generic_suspend(dev);
675}
676
677static int
678em_resume(device_t dev)
679{
680	struct adapter *adapter = device_get_softc(dev);
681	struct ifnet *ifp = adapter->ifp;
682
683	EM_LOCK(adapter);
684	em_init_locked(adapter);
685	if ((ifp->if_flags & IFF_UP) &&
686	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
687		em_start_locked(ifp);
688	EM_UNLOCK(adapter);
689
690	return bus_generic_resume(dev);
691}
692
693
694/*********************************************************************
695 *  Transmit entry point
696 *
697 *  em_start is called by the stack to initiate a transmit.
698 *  The driver will remain in this routine as long as there are
699 *  packets to transmit and transmit resources are available.
700 *  In case resources are not available stack is notified and
701 *  the packet is requeued.
702 **********************************************************************/
703
704static void
705em_start_locked(struct ifnet *ifp)
706{
707	struct adapter	*adapter = ifp->if_softc;
708	struct mbuf	*m_head;
709
710	EM_LOCK_ASSERT(adapter);
711
712	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
713	    IFF_DRV_RUNNING)
714		return;
715	if (!adapter->link_active)
716		return;
717
718	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
719
720		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
721		if (m_head == NULL)
722			break;
723		/*
724		 * em_encap() can modify our pointer, and or make it NULL on
725		 * failure.  In that event, we can't requeue.
726		 */
727		if (em_encap(adapter, &m_head)) {
728			if (m_head == NULL)
729				break;
730			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
731			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
732			break;
733		}
734
735		/* Send a copy of the frame to the BPF listener */
736		BPF_MTAP(ifp, m_head);
737
738		/* Set timeout in case hardware has problems transmitting. */
739		ifp->if_timer = EM_TX_TIMEOUT;
740	}
741}
742
743static void
744em_start(struct ifnet *ifp)
745{
746	struct adapter *adapter = ifp->if_softc;
747
748	EM_LOCK(adapter);
749	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
750		em_start_locked(ifp);
751	EM_UNLOCK(adapter);
752}
753
754/*********************************************************************
755 *  Ioctl entry point
756 *
757 *  em_ioctl is called when the user wants to configure the
758 *  interface.
759 *
760 *  return 0 on success, positive on failure
761 **********************************************************************/
762
763static int
764em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
765{
766	struct adapter	*adapter = ifp->if_softc;
767	struct ifreq *ifr = (struct ifreq *)data;
768	struct ifaddr *ifa = (struct ifaddr *)data;
769	int error = 0;
770
771	if (adapter->in_detach)
772		return (error);
773
774	switch (command) {
775	case SIOCSIFADDR:
776	case SIOCGIFADDR:
777		if (ifa->ifa_addr->sa_family == AF_INET) {
778			/*
779			 * XXX
780			 * Since resetting hardware takes a very long time
781			 * and results in link renegotiation we only
782			 * initialize the hardware only when it is absolutely
783			 * required.
784			 */
785			ifp->if_flags |= IFF_UP;
786			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
787				EM_LOCK(adapter);
788				em_init_locked(adapter);
789				EM_UNLOCK(adapter);
790			}
791			arp_ifinit(ifp, ifa);
792		} else
793			error = ether_ioctl(ifp, command, data);
794		break;
795	case SIOCSIFMTU:
796	    {
797		int max_frame_size;
798		uint16_t eeprom_data = 0;
799
800		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
801
802		EM_LOCK(adapter);
803		switch (adapter->hw.mac_type) {
804		case em_82573:
805			/*
806			 * 82573 only supports jumbo frames
807			 * if ASPM is disabled.
808			 */
809			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
810			    &eeprom_data);
811			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
812				max_frame_size = ETHER_MAX_LEN;
813				break;
814			}
815			/* Allow Jumbo frames - fall thru */
816		case em_82571:
817		case em_82572:
818		case em_80003es2lan:	/* Limit Jumbo Frame size */
819			max_frame_size = 9234;
820			break;
821		case em_ich8lan:
822			/* ICH8 does not support jumbo frames */
823			max_frame_size = ETHER_MAX_LEN;
824			break;
825		default:
826			max_frame_size = MAX_JUMBO_FRAME_SIZE;
827		}
828		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
829		    ETHER_CRC_LEN) {
830			EM_UNLOCK(adapter);
831			error = EINVAL;
832			break;
833		}
834
835		ifp->if_mtu = ifr->ifr_mtu;
836		adapter->hw.max_frame_size =
837		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
838		em_init_locked(adapter);
839		EM_UNLOCK(adapter);
840		break;
841	    }
842	case SIOCSIFFLAGS:
843		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
844		EM_LOCK(adapter);
845		if (ifp->if_flags & IFF_UP) {
846			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
847				if ((ifp->if_flags ^ adapter->if_flags) &
848				    IFF_PROMISC) {
849					em_disable_promisc(adapter);
850					em_set_promisc(adapter);
851				}
852			} else
853				em_init_locked(adapter);
854		} else {
855			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
856				em_stop(adapter);
857			}
858		}
859		adapter->if_flags = ifp->if_flags;
860		EM_UNLOCK(adapter);
861		break;
862	case SIOCADDMULTI:
863	case SIOCDELMULTI:
864		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
865		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
866			EM_LOCK(adapter);
867			em_disable_intr(adapter);
868			em_set_multi(adapter);
869			if (adapter->hw.mac_type == em_82542_rev2_0) {
870				em_initialize_receive_unit(adapter);
871			}
872#ifdef DEVICE_POLLING
873			if (!(ifp->if_capenable & IFCAP_POLLING))
874#endif
875				em_enable_intr(adapter);
876			EM_UNLOCK(adapter);
877		}
878		break;
879	case SIOCSIFMEDIA:
880	case SIOCGIFMEDIA:
881		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
882		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
883		break;
884	case SIOCSIFCAP:
885	    {
886		int mask, reinit;
887
888		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
889		reinit = 0;
890		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
891#ifdef DEVICE_POLLING
892		if (mask & IFCAP_POLLING) {
893			if (ifr->ifr_reqcap & IFCAP_POLLING) {
894				error = ether_poll_register(em_poll, ifp);
895				if (error)
896					return (error);
897				EM_LOCK(adapter);
898				em_disable_intr(adapter);
899				ifp->if_capenable |= IFCAP_POLLING;
900				EM_UNLOCK(adapter);
901			} else {
902				error = ether_poll_deregister(ifp);
903				/* Enable interrupt even in error case */
904				EM_LOCK(adapter);
905				em_enable_intr(adapter);
906				ifp->if_capenable &= ~IFCAP_POLLING;
907				EM_UNLOCK(adapter);
908			}
909		}
910#endif
911		if (mask & IFCAP_HWCSUM) {
912			ifp->if_capenable ^= IFCAP_HWCSUM;
913			reinit = 1;
914		}
915		if (mask & IFCAP_TSO4) {
916			ifp->if_capenable ^= IFCAP_TSO4;
917			reinit = 1;
918		}
919		if (mask & IFCAP_VLAN_HWTAGGING) {
920			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
921			reinit = 1;
922		}
923		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
924			em_init(adapter);
925		VLAN_CAPABILITIES(ifp);
926		break;
927	    }
928	default:
929		error = ether_ioctl(ifp, command, data);
930		break;
931	}
932
933	return (error);
934}
935
936/*********************************************************************
937 *  Watchdog entry point
938 *
939 *  This routine is called whenever hardware quits transmitting.
940 *
941 **********************************************************************/
942
943static void
944em_watchdog(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	/* If we are in this routine because of pause frames, then
950	 * don't reset the hardware.
951	 */
952	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
953		ifp->if_timer = EM_TX_TIMEOUT;
954		EM_UNLOCK(adapter);
955		return;
956	}
957
958	/*
959	 * Reclaim first as there is a possibility of losing Tx completion
960	 * interrupts. Possible cause of missing Tx completion interrupts
961	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
962	 * or chipset bug.
963	 */
964	em_txeof(adapter);
965	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
966		EM_UNLOCK(adapter);
967		return;
968	}
969
970	if (em_check_for_link(&adapter->hw) == 0)
971		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
972
973	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
974	adapter->watchdog_events++;
975
976	em_init_locked(adapter);
977	EM_UNLOCK(adapter);
978}
979
980/*********************************************************************
981 *  Init entry point
982 *
983 *  This routine is used in two ways. It is used by the stack as
984 *  init entry point in network interface structure. It is also used
985 *  by the driver as a hw/sw initialization routine to get to a
986 *  consistent state.
987 *
988 *  return 0 on success, positive on failure
989 **********************************************************************/
990
991static void
992em_init_locked(struct adapter *adapter)
993{
994	struct ifnet	*ifp = adapter->ifp;
995	device_t	dev = adapter->dev;
996	uint32_t	pba;
997
998	INIT_DEBUGOUT("em_init: begin");
999
1000	EM_LOCK_ASSERT(adapter);
1001
1002	em_stop(adapter);
1003
1004	/*
1005	 * Packet Buffer Allocation (PBA)
1006	 * Writing PBA sets the receive portion of the buffer
1007	 * the remainder is used for the transmit buffer.
1008	 *
1009	 * Devices before the 82547 had a Packet Buffer of 64K.
1010	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1011	 * After the 82547 the buffer was reduced to 40K.
1012	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1013	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1014	 */
1015	switch (adapter->hw.mac_type) {
1016	case em_82547:
1017	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1018		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1019			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1020		else
1021			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1022		adapter->tx_fifo_head = 0;
1023		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1024		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1025		break;
1026	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1027	case em_82571: /* 82571: Total Packet Buffer is 48K */
1028	case em_82572: /* 82572: Total Packet Buffer is 48K */
1029			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1030		break;
1031	case em_82573: /* 82573: Total Packet Buffer is 32K */
1032		/* Jumbo frames not supported */
1033			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1034		break;
1035	case em_ich8lan:
1036		pba = E1000_PBA_8K;
1037		break;
1038	default:
1039		/* Devices before 82547 had a Packet Buffer of 64K.   */
1040		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1041			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1042		else
1043			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1044	}
1045
1046	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1047	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1048
1049	/* Get the latest mac address, User can use a LAA */
1050	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1051
1052	/* Initialize the hardware */
1053	if (em_hardware_init(adapter)) {
1054		device_printf(dev, "Unable to initialize the hardware\n");
1055		return;
1056	}
1057	em_update_link_status(adapter);
1058
1059	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1060		em_enable_vlans(adapter);
1061
1062	ifp->if_hwassist = 0;
1063	if (adapter->hw.mac_type >= em_82543) {
1064		if (ifp->if_capenable & IFCAP_TXCSUM)
1065			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1066		/*
1067		 * em_setup_transmit_structures() will behave differently
1068		 * based on the state of TSO.
1069		 */
1070		if (ifp->if_capenable & IFCAP_TSO)
1071			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1072	}
1073
1074	/* Prepare transmit descriptors and buffers */
1075	if (em_setup_transmit_structures(adapter)) {
1076		device_printf(dev, "Could not setup transmit structures\n");
1077		em_stop(adapter);
1078		return;
1079	}
1080	em_initialize_transmit_unit(adapter);
1081
1082	/* Setup Multicast table */
1083	em_set_multi(adapter);
1084
1085	/* Prepare receive descriptors and buffers */
1086	if (em_setup_receive_structures(adapter)) {
1087		device_printf(dev, "Could not setup receive structures\n");
1088		em_stop(adapter);
1089		return;
1090	}
1091	em_initialize_receive_unit(adapter);
1092
1093	/* Don't lose promiscuous settings */
1094	em_set_promisc(adapter);
1095
1096	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1097	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1098
1099	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1100	em_clear_hw_cntrs(&adapter->hw);
1101#ifdef DEVICE_POLLING
1102	/*
1103	 * Only enable interrupts if we are not polling, make sure
1104	 * they are off otherwise.
1105	 */
1106	if (ifp->if_capenable & IFCAP_POLLING)
1107		em_disable_intr(adapter);
1108	else
1109#endif /* DEVICE_POLLING */
1110		em_enable_intr(adapter);
1111
1112	/* Don't reset the phy next time init gets called */
1113	adapter->hw.phy_reset_disable = TRUE;
1114}
1115
1116static void
1117em_init(void *arg)
1118{
1119	struct adapter *adapter = arg;
1120
1121	EM_LOCK(adapter);
1122	em_init_locked(adapter);
1123	EM_UNLOCK(adapter);
1124}
1125
1126
1127#ifdef DEVICE_POLLING
1128/*********************************************************************
1129 *
1130 *  Legacy polling routine
1131 *
1132 *********************************************************************/
1133static void
1134em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1135{
1136	struct adapter *adapter = ifp->if_softc;
1137	uint32_t reg_icr;
1138
1139	EM_LOCK(adapter);
1140	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1141		EM_UNLOCK(adapter);
1142		return;
1143	}
1144
1145	if (cmd == POLL_AND_CHECK_STATUS) {
1146		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1147		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1148			callout_stop(&adapter->timer);
1149			adapter->hw.get_link_status = 1;
1150			em_check_for_link(&adapter->hw);
1151			em_update_link_status(adapter);
1152			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1153		}
1154	}
1155	em_rxeof(adapter, count);
1156	em_txeof(adapter);
1157
1158	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1159		em_start_locked(ifp);
1160	EM_UNLOCK(adapter);
1161}
1162
1163/*********************************************************************
1164 *
1165 *  Legacy Interrupt Service routine
1166 *
1167 *********************************************************************/
1168static void
1169em_intr(void *arg)
1170{
1171	struct adapter	*adapter = arg;
1172	struct ifnet	*ifp;
1173	uint32_t	reg_icr;
1174
1175	EM_LOCK(adapter);
1176
1177	ifp = adapter->ifp;
1178
1179	if (ifp->if_capenable & IFCAP_POLLING) {
1180		EM_UNLOCK(adapter);
1181		return;
1182	}
1183
1184	for (;;) {
1185		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1186		if (adapter->hw.mac_type >= em_82571 &&
1187		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1188			break;
1189		else if (reg_icr == 0)
1190			break;
1191
1192		/*
1193		 * XXX: some laptops trigger several spurious interrupts
1194		 * on em(4) when in the resume cycle. The ICR register
1195		 * reports all-ones value in this case. Processing such
1196		 * interrupts would lead to a freeze. I don't know why.
1197		 */
1198		if (reg_icr == 0xffffffff)
1199			break;
1200
1201		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1202			em_rxeof(adapter, -1);
1203			em_txeof(adapter);
1204		}
1205
1206		/* Link status change */
1207		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1208			callout_stop(&adapter->timer);
1209			adapter->hw.get_link_status = 1;
1210			em_check_for_link(&adapter->hw);
1211			em_update_link_status(adapter);
1212			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1213		}
1214
1215		if (reg_icr & E1000_ICR_RXO)
1216			adapter->rx_overruns++;
1217	}
1218
1219	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1220	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1221		em_start_locked(ifp);
1222
1223	EM_UNLOCK(adapter);
1224}
1225
1226#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1227
1228static void
1229em_handle_link(void *context, int pending)
1230{
1231	struct adapter	*adapter = context;
1232	struct ifnet *ifp;
1233
1234	ifp = adapter->ifp;
1235
1236	EM_LOCK(adapter);
1237
1238	callout_stop(&adapter->timer);
1239	adapter->hw.get_link_status = 1;
1240	em_check_for_link(&adapter->hw);
1241	em_update_link_status(adapter);
1242	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1243	EM_UNLOCK(adapter);
1244}
1245
1246static void
1247em_handle_rxtx(void *context, int pending)
1248{
1249	struct adapter	*adapter = context;
1250	struct ifnet	*ifp;
1251
1252	NET_LOCK_GIANT();
1253	ifp = adapter->ifp;
1254
1255	/*
1256	 * TODO:
1257	 * It should be possible to run the tx clean loop without the lock.
1258	 */
1259	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1260		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1261			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1262		EM_LOCK(adapter);
1263		em_txeof(adapter);
1264
1265		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1266			em_start_locked(ifp);
1267		EM_UNLOCK(adapter);
1268	}
1269
1270	em_enable_intr(adapter);
1271	NET_UNLOCK_GIANT();
1272}
1273
1274/*********************************************************************
1275 *
1276 *  Fast Interrupt Service routine
1277 *
1278 *********************************************************************/
1279static void
1280em_intr_fast(void *arg)
1281{
1282	struct adapter	*adapter = arg;
1283	struct ifnet	*ifp;
1284	uint32_t	reg_icr;
1285
1286	ifp = adapter->ifp;
1287
1288	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1289
1290	/* Hot eject?  */
1291	if (reg_icr == 0xffffffff)
1292		return;
1293
1294	/* Definitely not our interrupt.  */
1295	if (reg_icr == 0x0)
1296		return;
1297
1298	/*
1299	 * Starting with the 82571 chip, bit 31 should be used to
1300	 * determine whether the interrupt belongs to us.
1301	 */
1302	if (adapter->hw.mac_type >= em_82571 &&
1303	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1304		return;
1305
1306	/*
1307	 * Mask interrupts until the taskqueue is finished running.  This is
1308	 * cheap, just assume that it is needed.  This also works around the
1309	 * MSI message reordering errata on certain systems.
1310	 */
1311	em_disable_intr(adapter);
1312	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1313
1314	/* Link status change */
1315	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1316		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1317
1318	if (reg_icr & E1000_ICR_RXO)
1319		adapter->rx_overruns++;
1320}
1321#endif /* ! DEVICE_POLLING */
1322
1323/*********************************************************************
1324 *
1325 *  Media Ioctl callback
1326 *
1327 *  This routine is called whenever the user queries the status of
1328 *  the interface using ifconfig.
1329 *
1330 **********************************************************************/
1331static void
1332em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1333{
1334	struct adapter *adapter = ifp->if_softc;
1335
1336	INIT_DEBUGOUT("em_media_status: begin");
1337
1338	EM_LOCK(adapter);
1339	em_check_for_link(&adapter->hw);
1340	em_update_link_status(adapter);
1341
1342	ifmr->ifm_status = IFM_AVALID;
1343	ifmr->ifm_active = IFM_ETHER;
1344
1345	if (!adapter->link_active) {
1346		EM_UNLOCK(adapter);
1347		return;
1348	}
1349
1350	ifmr->ifm_status |= IFM_ACTIVE;
1351
1352	if ((adapter->hw.media_type == em_media_type_fiber) ||
1353	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1354		if (adapter->hw.mac_type == em_82545)
1355			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1356		else
1357			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1358	} else {
1359		switch (adapter->link_speed) {
1360		case 10:
1361			ifmr->ifm_active |= IFM_10_T;
1362			break;
1363		case 100:
1364			ifmr->ifm_active |= IFM_100_TX;
1365			break;
1366		case 1000:
1367			ifmr->ifm_active |= IFM_1000_T;
1368			break;
1369		}
1370		if (adapter->link_duplex == FULL_DUPLEX)
1371			ifmr->ifm_active |= IFM_FDX;
1372		else
1373			ifmr->ifm_active |= IFM_HDX;
1374	}
1375	EM_UNLOCK(adapter);
1376}
1377
1378/*********************************************************************
1379 *
1380 *  Media Ioctl callback
1381 *
1382 *  This routine is called when the user changes speed/duplex using
1383 *  media/mediopt option with ifconfig.
1384 *
1385 **********************************************************************/
1386static int
1387em_media_change(struct ifnet *ifp)
1388{
1389	struct adapter *adapter = ifp->if_softc;
1390	struct ifmedia  *ifm = &adapter->media;
1391
1392	INIT_DEBUGOUT("em_media_change: begin");
1393
1394	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1395		return (EINVAL);
1396
1397	EM_LOCK(adapter);
1398	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1399	case IFM_AUTO:
1400		adapter->hw.autoneg = DO_AUTO_NEG;
1401		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1402		break;
1403	case IFM_1000_LX:
1404	case IFM_1000_SX:
1405	case IFM_1000_T:
1406		adapter->hw.autoneg = DO_AUTO_NEG;
1407		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1408		break;
1409	case IFM_100_TX:
1410		adapter->hw.autoneg = FALSE;
1411		adapter->hw.autoneg_advertised = 0;
1412		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1413			adapter->hw.forced_speed_duplex = em_100_full;
1414		else
1415			adapter->hw.forced_speed_duplex = em_100_half;
1416		break;
1417	case IFM_10_T:
1418		adapter->hw.autoneg = FALSE;
1419		adapter->hw.autoneg_advertised = 0;
1420		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1421			adapter->hw.forced_speed_duplex = em_10_full;
1422		else
1423			adapter->hw.forced_speed_duplex = em_10_half;
1424		break;
1425	default:
1426		device_printf(adapter->dev, "Unsupported media type\n");
1427	}
1428
1429	/* As the speed/duplex settings my have changed we need to
1430	 * reset the PHY.
1431	 */
1432	adapter->hw.phy_reset_disable = FALSE;
1433
1434	em_init_locked(adapter);
1435	EM_UNLOCK(adapter);
1436
1437	return (0);
1438}
1439
1440/*********************************************************************
1441 *
1442 *  This routine maps the mbufs to tx descriptors.
1443 *
1444 *  return 0 on success, positive on failure
1445 **********************************************************************/
1446static int
1447em_encap(struct adapter *adapter, struct mbuf **m_headp)
1448{
1449	struct ifnet		*ifp = adapter->ifp;
1450	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1451	bus_dmamap_t		map;
1452	struct em_buffer	*tx_buffer, *tx_buffer_last;
1453	struct em_tx_desc	*current_tx_desc;
1454	struct mbuf		*m_head;
1455	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1456	int			nsegs, i, j;
1457	int			error, do_tso, tso_desc = 0;
1458
1459	m_head = *m_headp;
1460	current_tx_desc = NULL;
1461	txd_upper = txd_lower = txd_used = txd_saved = 0;
1462
1463	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1464
1465	/*
1466	 * Force a cleanup if number of TX descriptors
1467	 * available hits the threshold.
1468	 */
1469	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1470		em_txeof(adapter);
1471		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1472			adapter->no_tx_desc_avail1++;
1473			return (ENOBUFS);
1474		}
1475	}
1476
1477	/*
1478	 * TSO workaround:
1479	 *  If an mbuf is only header we need
1480	 *     to pull 4 bytes of data into it.
1481	 */
1482	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1483		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1484		*m_headp = m_head;
1485		if (m_head == NULL) {
1486			return (ENOBUFS);
1487		}
1488	}
1489
1490	/*
1491	 * Map the packet for DMA.
1492	 */
1493	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1494	tx_buffer_last = tx_buffer;
1495	map = tx_buffer->map;
1496
1497	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1498	    &nsegs, BUS_DMA_NOWAIT);
1499
1500	/*
1501	 * There are two types of errors we can (try) to handle:
1502	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1503	 *   out of segments.  Defragment the mbuf chain and try again.
1504	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1505	 *   at this point in time.  Defer sending and try again later.
1506	 * All other errors, in particular EINVAL, are fatal and prevent the
1507	 * mbuf chain from ever going through.  Drop it and report error.
1508	 */
1509	if (error == EFBIG) {
1510		struct mbuf *m;
1511
1512		m = m_defrag(*m_headp, M_DONTWAIT);
1513		if (m == NULL) {
1514			/* Assume m_defrag(9) used only m_get(9). */
1515			adapter->mbuf_alloc_failed++;
1516			m_freem(*m_headp);
1517			*m_headp = NULL;
1518			return (ENOBUFS);
1519		}
1520		*m_headp = m;
1521
1522		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1523		    segs, &nsegs, BUS_DMA_NOWAIT);
1524
1525		if (error == ENOMEM) {
1526			adapter->no_tx_dma_setup++;
1527			return (error);
1528		} else if (error != 0) {
1529			adapter->no_tx_dma_setup++;
1530			m_freem(*m_headp);
1531			*m_headp = NULL;
1532			return (error);
1533		}
1534	} else if (error == ENOMEM) {
1535		adapter->no_tx_dma_setup++;
1536		return (error);
1537	} else if (error != 0) {
1538		adapter->no_tx_dma_setup++;
1539		m_freem(*m_headp);
1540		*m_headp = NULL;
1541		return (error);
1542	}
1543
1544	/*
1545	 * TSO Hardware workaround, if this packet is not
1546	 * TSO, and is only a single descriptor long, and
1547	 * it follows a TSO burst, then we need to add a
1548	 * sentinel descriptor to prevent premature writeback.
1549	 */
1550	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1551		if (nsegs == 1)
1552			tso_desc = TRUE;
1553		adapter->tx_tso = FALSE;
1554	}
1555
1556	if (nsegs > adapter->num_tx_desc_avail - 2) {
1557		adapter->no_tx_desc_avail2++;
1558		bus_dmamap_unload(adapter->txtag, map);
1559		return (ENOBUFS);
1560	}
1561	m_head = *m_headp;
1562
1563	/* Do hardware assists */
1564	if (ifp->if_hwassist) {
1565		if (do_tso &&
1566		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1567			/* we need to make a final sentinel transmit desc */
1568			tso_desc = TRUE;
1569		} else
1570			em_transmit_checksum_setup(adapter,  m_head,
1571			    &txd_upper, &txd_lower);
1572	}
1573
1574	i = adapter->next_avail_tx_desc;
1575	if (adapter->pcix_82544)
1576		txd_saved = i;
1577
1578	for (j = 0; j < nsegs; j++) {
1579		bus_size_t seg_len;
1580		bus_addr_t seg_addr;
1581		/* If adapter is 82544 and on PCIX bus. */
1582		if(adapter->pcix_82544) {
1583			DESC_ARRAY	desc_array;
1584			uint32_t	array_elements, counter;
1585
1586			/*
1587			 * Check the Address and Length combination and
1588			 * split the data accordingly
1589			 */
1590			array_elements = em_fill_descriptors(segs[j].ds_addr,
1591			    segs[j].ds_len, &desc_array);
1592			for (counter = 0; counter < array_elements; counter++) {
1593				if (txd_used == adapter->num_tx_desc_avail) {
1594					adapter->next_avail_tx_desc = txd_saved;
1595					adapter->no_tx_desc_avail2++;
1596					bus_dmamap_unload(adapter->txtag, map);
1597					return (ENOBUFS);
1598				}
1599				tx_buffer = &adapter->tx_buffer_area[i];
1600				current_tx_desc = &adapter->tx_desc_base[i];
1601				current_tx_desc->buffer_addr = htole64(
1602					desc_array.descriptor[counter].address);
1603				current_tx_desc->lower.data = htole32(
1604					(adapter->txd_cmd | txd_lower |
1605					(uint16_t)desc_array.descriptor[counter].length));
1606				current_tx_desc->upper.data = htole32((txd_upper));
1607				if (++i == adapter->num_tx_desc)
1608					i = 0;
1609
1610				tx_buffer->m_head = NULL;
1611				txd_used++;
1612			}
1613		} else {
1614			tx_buffer = &adapter->tx_buffer_area[i];
1615			current_tx_desc = &adapter->tx_desc_base[i];
1616			seg_addr = htole64(segs[j].ds_addr);
1617			seg_len  = segs[j].ds_len;
1618			/*
1619			** TSO Workaround:
1620			** If this is the last descriptor, we want to
1621			** split it so we have a small final sentinel
1622			*/
1623			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1624				seg_len -= 4;
1625				current_tx_desc->buffer_addr = seg_addr;
1626				current_tx_desc->lower.data = htole32(
1627				adapter->txd_cmd | txd_lower | seg_len);
1628				current_tx_desc->upper.data =
1629				    htole32(txd_upper);
1630				if (++i == adapter->num_tx_desc)
1631					i = 0;
1632				/* Now make the sentinel */
1633				++txd_used; /* using an extra txd */
1634				current_tx_desc = &adapter->tx_desc_base[i];
1635				tx_buffer = &adapter->tx_buffer_area[i];
1636				current_tx_desc->buffer_addr =
1637				    seg_addr + seg_len;
1638				current_tx_desc->lower.data = htole32(
1639				adapter->txd_cmd | txd_lower | 4);
1640				current_tx_desc->upper.data =
1641				    htole32(txd_upper);
1642				if (++i == adapter->num_tx_desc)
1643					i = 0;
1644			} else {
1645				current_tx_desc->buffer_addr = seg_addr;
1646				current_tx_desc->lower.data = htole32(
1647				adapter->txd_cmd | txd_lower | seg_len);
1648				current_tx_desc->upper.data =
1649				    htole32(txd_upper);
1650				if (++i == adapter->num_tx_desc)
1651					i = 0;
1652			}
1653			tx_buffer->m_head = NULL;
1654		}
1655	}
1656
1657	adapter->next_avail_tx_desc = i;
1658	if (adapter->pcix_82544)
1659		adapter->num_tx_desc_avail -= txd_used;
1660	else {
1661		adapter->num_tx_desc_avail -= nsegs;
1662		if (tso_desc) /* TSO used an extra for sentinel */
1663			adapter->num_tx_desc_avail -= txd_used;
1664	}
1665
1666	if (m_head->m_flags & M_VLANTAG) {
1667		/* Set the vlan id. */
1668		current_tx_desc->upper.fields.special =
1669		    htole16(m_head->m_pkthdr.ether_vtag);
1670
1671		/* Tell hardware to add tag. */
1672		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1673	}
1674
1675	tx_buffer->m_head = m_head;
1676	tx_buffer_last->map = tx_buffer->map;
1677	tx_buffer->map = map;
1678	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1679
1680	/*
1681	 * Last Descriptor of Packet needs End Of Packet (EOP).
1682	 */
1683	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1684
1685	/*
1686	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1687	 * that this frame is available to transmit.
1688	 */
1689	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1690	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1691	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1692		em_82547_move_tail_locked(adapter);
1693	else {
1694		E1000_WRITE_REG(&adapter->hw, TDT, i);
1695		if (adapter->hw.mac_type == em_82547)
1696			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1697	}
1698
1699	return (0);
1700}
1701
1702/*********************************************************************
1703 *
1704 * 82547 workaround to avoid controller hang in half-duplex environment.
1705 * The workaround is to avoid queuing a large packet that would span
1706 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1707 * in this case. We do that only when FIFO is quiescent.
1708 *
1709 **********************************************************************/
1710static void
1711em_82547_move_tail_locked(struct adapter *adapter)
1712{
1713	uint16_t hw_tdt;
1714	uint16_t sw_tdt;
1715	struct em_tx_desc *tx_desc;
1716	uint16_t length = 0;
1717	boolean_t eop = 0;
1718
1719	EM_LOCK_ASSERT(adapter);
1720
1721	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1722	sw_tdt = adapter->next_avail_tx_desc;
1723
1724	while (hw_tdt != sw_tdt) {
1725		tx_desc = &adapter->tx_desc_base[hw_tdt];
1726		length += tx_desc->lower.flags.length;
1727		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1728		if(++hw_tdt == adapter->num_tx_desc)
1729			hw_tdt = 0;
1730
1731		if (eop) {
1732			if (em_82547_fifo_workaround(adapter, length)) {
1733				adapter->tx_fifo_wrk_cnt++;
1734				callout_reset(&adapter->tx_fifo_timer, 1,
1735					em_82547_move_tail, adapter);
1736				break;
1737			}
1738			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1739			em_82547_update_fifo_head(adapter, length);
1740			length = 0;
1741		}
1742	}
1743}
1744
1745static void
1746em_82547_move_tail(void *arg)
1747{
1748	struct adapter *adapter = arg;
1749
1750	EM_LOCK(adapter);
1751	em_82547_move_tail_locked(adapter);
1752	EM_UNLOCK(adapter);
1753}
1754
1755static int
1756em_82547_fifo_workaround(struct adapter *adapter, int len)
1757{
1758	int fifo_space, fifo_pkt_len;
1759
1760	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1761
1762	if (adapter->link_duplex == HALF_DUPLEX) {
1763		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1764
1765		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1766			if (em_82547_tx_fifo_reset(adapter))
1767				return (0);
1768			else
1769				return (1);
1770		}
1771	}
1772
1773	return (0);
1774}
1775
1776static void
1777em_82547_update_fifo_head(struct adapter *adapter, int len)
1778{
1779	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1780
1781	/* tx_fifo_head is always 16 byte aligned */
1782	adapter->tx_fifo_head += fifo_pkt_len;
1783	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1784		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1785	}
1786}
1787
1788
1789static int
1790em_82547_tx_fifo_reset(struct adapter *adapter)
1791{
1792	uint32_t tctl;
1793
1794	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1795	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1796	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1797	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1798
1799		/* Disable TX unit */
1800		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1801		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1802
1803		/* Reset FIFO pointers */
1804		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1805		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1806		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1807		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1808
1809		/* Re-enable TX unit */
1810		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1811		E1000_WRITE_FLUSH(&adapter->hw);
1812
1813		adapter->tx_fifo_head = 0;
1814		adapter->tx_fifo_reset_cnt++;
1815
1816		return (TRUE);
1817	}
1818	else {
1819		return (FALSE);
1820	}
1821}
1822
1823static void
1824em_set_promisc(struct adapter *adapter)
1825{
1826	struct ifnet	*ifp = adapter->ifp;
1827	uint32_t	reg_rctl;
1828
1829	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1830
1831	if (ifp->if_flags & IFF_PROMISC) {
1832		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1833		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1834		/*
1835		 * Disable VLAN stripping in promiscous mode.
1836		 * This enables bridging of vlan tagged frames to occur
1837		 * and also allows vlan tags to be seen in tcpdump.
1838		 * XXX: This is a bit bogus as tcpdump may be used
1839		 * w/o promisc mode as well.
1840		 */
1841		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1842			em_disable_vlans(adapter);
1843		adapter->em_insert_vlan_header = 1;
1844	} else if (ifp->if_flags & IFF_ALLMULTI) {
1845		reg_rctl |= E1000_RCTL_MPE;
1846		reg_rctl &= ~E1000_RCTL_UPE;
1847		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1848		adapter->em_insert_vlan_header = 0;
1849	} else
1850		adapter->em_insert_vlan_header = 0;
1851}
1852
1853static void
1854em_disable_promisc(struct adapter *adapter)
1855{
1856	struct ifnet	*ifp = adapter->ifp;
1857	uint32_t	reg_rctl;
1858
1859	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1860
1861	reg_rctl &=  (~E1000_RCTL_UPE);
1862	reg_rctl &=  (~E1000_RCTL_MPE);
1863	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1864
1865	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1866		em_enable_vlans(adapter);
1867	adapter->em_insert_vlan_header = 0;
1868}
1869
1870
1871/*********************************************************************
1872 *  Multicast Update
1873 *
1874 *  This routine is called whenever multicast address list is updated.
1875 *
1876 **********************************************************************/
1877
1878static void
1879em_set_multi(struct adapter *adapter)
1880{
1881	struct ifnet	*ifp = adapter->ifp;
1882	struct ifmultiaddr *ifma;
1883	uint32_t reg_rctl = 0;
1884	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1885	int mcnt = 0;
1886
1887	IOCTL_DEBUGOUT("em_set_multi: begin");
1888
1889	if (adapter->hw.mac_type == em_82542_rev2_0) {
1890		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1891		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1892			em_pci_clear_mwi(&adapter->hw);
1893		reg_rctl |= E1000_RCTL_RST;
1894		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1895		msec_delay(5);
1896	}
1897
1898	IF_ADDR_LOCK(ifp);
1899	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1900		if (ifma->ifma_addr->sa_family != AF_LINK)
1901			continue;
1902
1903		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1904			break;
1905
1906		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1907		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1908		mcnt++;
1909	}
1910	IF_ADDR_UNLOCK(ifp);
1911
1912	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1913		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1914		reg_rctl |= E1000_RCTL_MPE;
1915		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1916	} else
1917		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1918
1919	if (adapter->hw.mac_type == em_82542_rev2_0) {
1920		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1921		reg_rctl &= ~E1000_RCTL_RST;
1922		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1923		msec_delay(5);
1924		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1925			em_pci_set_mwi(&adapter->hw);
1926	}
1927}
1928
1929
1930/*********************************************************************
1931 *  Timer routine
1932 *
1933 *  This routine checks for link status and updates statistics.
1934 *
1935 **********************************************************************/
1936
1937static void
1938em_local_timer(void *arg)
1939{
1940	struct adapter	*adapter = arg;
1941	struct ifnet	*ifp = adapter->ifp;
1942
1943	EM_LOCK(adapter);
1944
1945	em_check_for_link(&adapter->hw);
1946	em_update_link_status(adapter);
1947	em_update_stats_counters(adapter);
1948	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1949		em_print_hw_stats(adapter);
1950	em_smartspeed(adapter);
1951
1952	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1953
1954	EM_UNLOCK(adapter);
1955}
1956
1957static void
1958em_update_link_status(struct adapter *adapter)
1959{
1960	struct ifnet *ifp = adapter->ifp;
1961	device_t dev = adapter->dev;
1962
1963	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1964		if (adapter->link_active == 0) {
1965			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1966			    &adapter->link_duplex);
1967			/* Check if we may set SPEED_MODE bit on PCI-E */
1968			if ((adapter->link_speed == SPEED_1000) &&
1969			    ((adapter->hw.mac_type == em_82571) ||
1970			    (adapter->hw.mac_type == em_82572))) {
1971				int tarc0;
1972
1973				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1974				tarc0 |= SPEED_MODE_BIT;
1975				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1976			}
1977			if (bootverbose)
1978				device_printf(dev, "Link is up %d Mbps %s\n",
1979				    adapter->link_speed,
1980				    ((adapter->link_duplex == FULL_DUPLEX) ?
1981				    "Full Duplex" : "Half Duplex"));
1982			adapter->link_active = 1;
1983			adapter->smartspeed = 0;
1984			ifp->if_baudrate = adapter->link_speed * 1000000;
1985			if_link_state_change(ifp, LINK_STATE_UP);
1986		}
1987	} else {
1988		if (adapter->link_active == 1) {
1989			ifp->if_baudrate = adapter->link_speed = 0;
1990			adapter->link_duplex = 0;
1991			if (bootverbose)
1992				device_printf(dev, "Link is Down\n");
1993			adapter->link_active = 0;
1994			if_link_state_change(ifp, LINK_STATE_DOWN);
1995		}
1996	}
1997}
1998
1999/*********************************************************************
2000 *
2001 *  This routine disables all traffic on the adapter by issuing a
2002 *  global reset on the MAC and deallocates TX/RX buffers.
2003 *
2004 **********************************************************************/
2005
2006static void
2007em_stop(void *arg)
2008{
2009	struct adapter	*adapter = arg;
2010	struct ifnet	*ifp = adapter->ifp;
2011
2012	EM_LOCK_ASSERT(adapter);
2013
2014	INIT_DEBUGOUT("em_stop: begin");
2015
2016	em_disable_intr(adapter);
2017	em_reset_hw(&adapter->hw);
2018	callout_stop(&adapter->timer);
2019	callout_stop(&adapter->tx_fifo_timer);
2020	em_free_transmit_structures(adapter);
2021	em_free_receive_structures(adapter);
2022
2023	/* Tell the stack that the interface is no longer active */
2024	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2025}
2026
2027
2028/********************************************************************
2029 *
2030 *  Determine hardware revision.
2031 *
2032 **********************************************************************/
2033static void
2034em_identify_hardware(struct adapter *adapter)
2035{
2036	device_t dev = adapter->dev;
2037
2038	/* Make sure our PCI config space has the necessary stuff set */
2039	pci_enable_busmaster(dev);
2040	pci_enable_io(dev, SYS_RES_MEMORY);
2041	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2042
2043	/* Save off the information about this board */
2044	adapter->hw.vendor_id = pci_get_vendor(dev);
2045	adapter->hw.device_id = pci_get_device(dev);
2046	adapter->hw.revision_id = pci_get_revid(dev);
2047	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2048	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2049
2050	/* Identify the MAC */
2051	if (em_set_mac_type(&adapter->hw))
2052		device_printf(dev, "Unknown MAC Type\n");
2053
2054	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2055	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2056		adapter->hw.phy_init_script = TRUE;
2057}
2058
2059static int
2060em_allocate_pci_resources(struct adapter *adapter)
2061{
2062	device_t	dev = adapter->dev;
2063	int		val, rid;
2064
2065	rid = PCIR_BAR(0);
2066	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2067	    &rid, RF_ACTIVE);
2068	if (adapter->res_memory == NULL) {
2069		device_printf(dev, "Unable to allocate bus resource: memory\n");
2070		return (ENXIO);
2071	}
2072	adapter->osdep.mem_bus_space_tag =
2073	rman_get_bustag(adapter->res_memory);
2074	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2075	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2076
2077	if (adapter->hw.mac_type > em_82543) {
2078		/* Figure our where our IO BAR is ? */
2079		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2080			val = pci_read_config(dev, rid, 4);
2081			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2082				adapter->io_rid = rid;
2083				break;
2084			}
2085			rid += 4;
2086			/* check for 64bit BAR */
2087			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2088				rid += 4;
2089		}
2090		if (rid >= PCIR_CIS) {
2091			device_printf(dev, "Unable to locate IO BAR\n");
2092			return (ENXIO);
2093		}
2094		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2095		    &adapter->io_rid, RF_ACTIVE);
2096		if (adapter->res_ioport == NULL) {
2097			device_printf(dev, "Unable to allocate bus resource: "
2098			    "ioport\n");
2099			return (ENXIO);
2100		}
2101		adapter->hw.io_base = 0;
2102		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2103		adapter->osdep.io_bus_space_handle =
2104		    rman_get_bushandle(adapter->res_ioport);
2105	}
2106
2107	/* For ICH8 we need to find the flash memory. */
2108	if (adapter->hw.mac_type == em_ich8lan) {
2109		rid = EM_FLASH;
2110
2111		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2112		    &rid, RF_ACTIVE);
2113		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2114		adapter->osdep.flash_bus_space_handle =
2115		    rman_get_bushandle(adapter->flash_mem);
2116	}
2117
2118	rid = 0x0;
2119	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2120	    RF_SHAREABLE | RF_ACTIVE);
2121	if (adapter->res_interrupt == NULL) {
2122		device_printf(dev, "Unable to allocate bus resource: "
2123		    "interrupt\n");
2124		return (ENXIO);
2125	}
2126
2127	adapter->hw.back = &adapter->osdep;
2128
2129	return (0);
2130}
2131
2132int
2133em_allocate_intr(struct adapter *adapter)
2134{
2135	device_t dev = adapter->dev;
2136	int error;
2137
2138	/* Manually turn off all interrupts */
2139	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2140
2141#ifdef DEVICE_POLLING
2142	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2143	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2144	    &adapter->int_handler_tag)) != 0) {
2145		device_printf(dev, "Failed to register interrupt handler");
2146		return (error);
2147	}
2148#else
2149	/*
2150	 * Try allocating a fast interrupt and the associated deferred
2151	 * processing contexts.
2152	 */
2153	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2154	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2155	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2156	    taskqueue_thread_enqueue, &adapter->tq);
2157	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2158	    device_get_nameunit(adapter->dev));
2159	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2160	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2161	    &adapter->int_handler_tag)) != 0) {
2162		device_printf(dev, "Failed to register fast interrupt "
2163			    "handler: %d\n", error);
2164		taskqueue_free(adapter->tq);
2165		adapter->tq = NULL;
2166		return (error);
2167	}
2168#endif
2169
2170	em_enable_intr(adapter);
2171	return (0);
2172}
2173
2174static void
2175em_free_intr(struct adapter *adapter)
2176{
2177	device_t dev = adapter->dev;
2178
2179	if (adapter->int_handler_tag != NULL) {
2180		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2181		adapter->int_handler_tag = NULL;
2182	}
2183	if (adapter->tq != NULL) {
2184		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2185		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2186		taskqueue_free(adapter->tq);
2187		adapter->tq = NULL;
2188	}
2189}
2190
2191static void
2192em_free_pci_resources(struct adapter *adapter)
2193{
2194	device_t dev = adapter->dev;
2195
2196	if (adapter->res_interrupt != NULL)
2197		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2198
2199	if (adapter->res_memory != NULL)
2200		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2201		    adapter->res_memory);
2202
2203	if (adapter->flash_mem != NULL)
2204		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2205		    adapter->flash_mem);
2206
2207	if (adapter->res_ioport != NULL)
2208		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2209		    adapter->res_ioport);
2210}
2211
2212/*********************************************************************
2213 *
2214 *  Initialize the hardware to a configuration as specified by the
2215 *  adapter structure. The controller is reset, the EEPROM is
2216 *  verified, the MAC address is set, then the shared initialization
2217 *  routines are called.
2218 *
2219 **********************************************************************/
2220static int
2221em_hardware_init(struct adapter *adapter)
2222{
2223	device_t dev = adapter->dev;
2224	uint16_t rx_buffer_size;
2225
2226	INIT_DEBUGOUT("em_hardware_init: begin");
2227	/* Issue a global reset */
2228	em_reset_hw(&adapter->hw);
2229
2230	/* When hardware is reset, fifo_head is also reset */
2231	adapter->tx_fifo_head = 0;
2232
2233	/* Make sure we have a good EEPROM before we read from it */
2234	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2235		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2236		return (EIO);
2237	}
2238
2239	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2240		device_printf(dev, "EEPROM read error while reading part "
2241		    "number\n");
2242		return (EIO);
2243	}
2244
2245	/* Set up smart power down as default off on newer adapters. */
2246	if (!em_smart_pwr_down &&
2247	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2248		uint16_t phy_tmp = 0;
2249
2250		/* Speed up time to link by disabling smart power down. */
2251		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2252		phy_tmp &= ~IGP02E1000_PM_SPD;
2253		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2254	}
2255
2256	/*
2257	 * These parameters control the automatic generation (Tx) and
2258	 * response (Rx) to Ethernet PAUSE frames.
2259	 * - High water mark should allow for at least two frames to be
2260	 *   received after sending an XOFF.
2261	 * - Low water mark works best when it is very near the high water mark.
2262	 *   This allows the receiver to restart by sending XON when it has
2263	 *   drained a bit. Here we use an arbitary value of 1500 which will
2264	 *   restart after one full frame is pulled from the buffer. There
2265	 *   could be several smaller frames in the buffer and if so they will
2266	 *   not trigger the XON until their total number reduces the buffer
2267	 *   by 1500.
2268	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2269	 */
2270	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2271
2272	adapter->hw.fc_high_water = rx_buffer_size -
2273	    roundup2(adapter->hw.max_frame_size, 1024);
2274	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2275	if (adapter->hw.mac_type == em_80003es2lan)
2276		adapter->hw.fc_pause_time = 0xFFFF;
2277	else
2278		adapter->hw.fc_pause_time = 0x1000;
2279	adapter->hw.fc_send_xon = TRUE;
2280	adapter->hw.fc = em_fc_full;
2281
2282	if (em_init_hw(&adapter->hw) < 0) {
2283		device_printf(dev, "Hardware Initialization Failed");
2284		return (EIO);
2285	}
2286
2287	em_check_for_link(&adapter->hw);
2288
2289	return (0);
2290}
2291
2292/*********************************************************************
2293 *
2294 *  Setup networking device structure and register an interface.
2295 *
2296 **********************************************************************/
2297static void
2298em_setup_interface(device_t dev, struct adapter *adapter)
2299{
2300	struct ifnet   *ifp;
2301	INIT_DEBUGOUT("em_setup_interface: begin");
2302
2303	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2304	if (ifp == NULL)
2305		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2306	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2307	ifp->if_mtu = ETHERMTU;
2308	ifp->if_init =  em_init;
2309	ifp->if_softc = adapter;
2310	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2311	ifp->if_ioctl = em_ioctl;
2312	ifp->if_start = em_start;
2313	ifp->if_watchdog = em_watchdog;
2314	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2315	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2316	IFQ_SET_READY(&ifp->if_snd);
2317
2318	ether_ifattach(ifp, adapter->hw.mac_addr);
2319
2320	ifp->if_capabilities = ifp->if_capenable = 0;
2321
2322	if (adapter->hw.mac_type >= em_82543) {
2323		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2324		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2325	}
2326
2327	/* Enable TSO if available */
2328	if ((adapter->hw.mac_type > em_82544) &&
2329	    (adapter->hw.mac_type != em_82547)) {
2330		ifp->if_capabilities |= IFCAP_TSO4;
2331		ifp->if_capenable |= IFCAP_TSO4;
2332	}
2333
2334	/*
2335	 * Tell the upper layer(s) we support long frames.
2336	 */
2337	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2338	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2339	ifp->if_capenable |= IFCAP_VLAN_MTU;
2340
2341#ifdef DEVICE_POLLING
2342	ifp->if_capabilities |= IFCAP_POLLING;
2343#endif
2344
2345	/*
2346	 * Specify the media types supported by this adapter and register
2347	 * callbacks to update media and link information
2348	 */
2349	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2350	    em_media_status);
2351	if ((adapter->hw.media_type == em_media_type_fiber) ||
2352	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2353		u_char fiber_type = IFM_1000_SX;	/* default type; */
2354
2355		if (adapter->hw.mac_type == em_82545)
2356			fiber_type = IFM_1000_LX;
2357		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2358		    0, NULL);
2359		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2360	} else {
2361		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2362		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2363			    0, NULL);
2364		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2365			    0, NULL);
2366		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2367			    0, NULL);
2368		if (adapter->hw.phy_type != em_phy_ife) {
2369			ifmedia_add(&adapter->media,
2370				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2371			ifmedia_add(&adapter->media,
2372				IFM_ETHER | IFM_1000_T, 0, NULL);
2373		}
2374	}
2375	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2376	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2377}
2378
2379
2380/*********************************************************************
2381 *
2382 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2383 *
2384 **********************************************************************/
2385static void
2386em_smartspeed(struct adapter *adapter)
2387{
2388	uint16_t phy_tmp;
2389
2390	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2391	    adapter->hw.autoneg == 0 ||
2392	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2393		return;
2394
2395	if (adapter->smartspeed == 0) {
2396		/* If Master/Slave config fault is asserted twice,
2397		 * we assume back-to-back */
2398		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2399		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2400			return;
2401		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2402		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2403			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2404			if(phy_tmp & CR_1000T_MS_ENABLE) {
2405				phy_tmp &= ~CR_1000T_MS_ENABLE;
2406				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2407				    phy_tmp);
2408				adapter->smartspeed++;
2409				if(adapter->hw.autoneg &&
2410				   !em_phy_setup_autoneg(&adapter->hw) &&
2411				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2412				    &phy_tmp)) {
2413					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2414						    MII_CR_RESTART_AUTO_NEG);
2415					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2416					    phy_tmp);
2417				}
2418			}
2419		}
2420		return;
2421	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2422		/* If still no link, perhaps using 2/3 pair cable */
2423		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2424		phy_tmp |= CR_1000T_MS_ENABLE;
2425		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2426		if(adapter->hw.autoneg &&
2427		   !em_phy_setup_autoneg(&adapter->hw) &&
2428		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2429			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2430				    MII_CR_RESTART_AUTO_NEG);
2431			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2432		}
2433	}
2434	/* Restart process after EM_SMARTSPEED_MAX iterations */
2435	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2436		adapter->smartspeed = 0;
2437}
2438
2439
2440/*
2441 * Manage DMA'able memory.
2442 */
2443static void
2444em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2445{
2446	if (error)
2447		return;
2448	*(bus_addr_t *) arg = segs[0].ds_addr;
2449}
2450
2451static int
2452em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2453	int mapflags)
2454{
2455	int error;
2456
2457	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2458				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2459				BUS_SPACE_MAXADDR,	/* lowaddr */
2460				BUS_SPACE_MAXADDR,	/* highaddr */
2461				NULL, NULL,		/* filter, filterarg */
2462				size,			/* maxsize */
2463				1,			/* nsegments */
2464				size,			/* maxsegsize */
2465				0,			/* flags */
2466				NULL,			/* lockfunc */
2467				NULL,			/* lockarg */
2468				&dma->dma_tag);
2469	if (error) {
2470		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2471		    __func__, error);
2472		goto fail_0;
2473	}
2474
2475	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2476	    BUS_DMA_NOWAIT, &dma->dma_map);
2477	if (error) {
2478		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2479		    __func__, (uintmax_t)size, error);
2480		goto fail_2;
2481	}
2482
2483	dma->dma_paddr = 0;
2484	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2485	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2486	if (error || dma->dma_paddr == 0) {
2487		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2488		    __func__, error);
2489		goto fail_3;
2490	}
2491
2492	return (0);
2493
2494fail_3:
2495	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2496fail_2:
2497	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2498	bus_dma_tag_destroy(dma->dma_tag);
2499fail_0:
2500	dma->dma_map = NULL;
2501	dma->dma_tag = NULL;
2502
2503	return (error);
2504}
2505
2506static void
2507em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2508{
2509	if (dma->dma_tag == NULL)
2510		return;
2511	if (dma->dma_map != NULL) {
2512		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2513		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2514		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2515		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2516		dma->dma_map = NULL;
2517	}
2518	bus_dma_tag_destroy(dma->dma_tag);
2519	dma->dma_tag = NULL;
2520}
2521
2522
2523/*********************************************************************
2524 *
2525 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2526 *  the information needed to transmit a packet on the wire.
2527 *
2528 **********************************************************************/
2529static int
2530em_allocate_transmit_structures(struct adapter *adapter)
2531{
2532	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2533	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2534	if (adapter->tx_buffer_area == NULL) {
2535		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2536		return (ENOMEM);
2537	}
2538
2539	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2540
2541	return (0);
2542}
2543
2544/*********************************************************************
2545 *
2546 *  Allocate and initialize transmit structures.
2547 *
2548 **********************************************************************/
2549static int
2550em_setup_transmit_structures(struct adapter *adapter)
2551{
2552	struct ifnet   *ifp = adapter->ifp;
2553	device_t dev = adapter->dev;
2554	struct em_buffer *tx_buffer;
2555	bus_size_t size, segsize;
2556	int error, i;
2557
2558	/*
2559	 * Setup DMA descriptor areas.
2560	 */
2561	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2562
2563	/* Overrides for TSO - want large sizes */
2564	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2565		size = EM_TSO_SIZE;
2566		segsize = PAGE_SIZE;
2567	}
2568
2569	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2570				1, 0,			/* alignment, bounds */
2571				BUS_SPACE_MAXADDR,	/* lowaddr */
2572				BUS_SPACE_MAXADDR,	/* highaddr */
2573				NULL, NULL,		/* filter, filterarg */
2574				size,			/* maxsize */
2575				EM_MAX_SCATTER,		/* nsegments */
2576				segsize,		/* maxsegsize */
2577				0,			/* flags */
2578				NULL,		/* lockfunc */
2579				NULL,		/* lockarg */
2580				&adapter->txtag)) != 0) {
2581		device_printf(dev, "Unable to allocate TX DMA tag\n");
2582		goto fail;
2583	}
2584
2585	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2586		goto fail;
2587
2588	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2589	tx_buffer = adapter->tx_buffer_area;
2590	for (i = 0; i < adapter->num_tx_desc; i++) {
2591		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2592		if (error != 0) {
2593			device_printf(dev, "Unable to create TX DMA map\n");
2594			goto fail;
2595		}
2596		tx_buffer++;
2597	}
2598
2599	adapter->next_avail_tx_desc = 0;
2600	adapter->oldest_used_tx_desc = 0;
2601
2602	/* Set number of descriptors available */
2603	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2604
2605	/* Set checksum context */
2606	adapter->active_checksum_context = OFFLOAD_NONE;
2607	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2608	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2609
2610	return (0);
2611
2612fail:
2613	em_free_transmit_structures(adapter);
2614	return (error);
2615}
2616
2617/*********************************************************************
2618 *
2619 *  Enable transmit unit.
2620 *
2621 **********************************************************************/
2622static void
2623em_initialize_transmit_unit(struct adapter *adapter)
2624{
2625	uint32_t	reg_tctl, reg_tarc;
2626	uint32_t	reg_tipg = 0;
2627	uint64_t	bus_addr;
2628
2629	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2630	/* Setup the Base and Length of the Tx Descriptor Ring */
2631	bus_addr = adapter->txdma.dma_paddr;
2632	E1000_WRITE_REG(&adapter->hw, TDLEN,
2633	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2634	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2635	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2636
2637	/* Setup the HW Tx Head and Tail descriptor pointers */
2638	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2639	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2640
2641
2642	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2643	    E1000_READ_REG(&adapter->hw, TDLEN));
2644
2645	/* Set the default values for the Tx Inter Packet Gap timer */
2646	switch (adapter->hw.mac_type) {
2647	case em_82542_rev2_0:
2648	case em_82542_rev2_1:
2649		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2650		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2651		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2652		break;
2653	case em_80003es2lan:
2654		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2655		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2656		    E1000_TIPG_IPGR2_SHIFT;
2657		break;
2658	default:
2659		if ((adapter->hw.media_type == em_media_type_fiber) ||
2660		    (adapter->hw.media_type == em_media_type_internal_serdes))
2661			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2662		else
2663			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2664		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2665		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2666	}
2667
2668	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2669	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2670	if(adapter->hw.mac_type >= em_82540)
2671		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2672
2673	/* Do adapter specific tweaks before we enable the transmitter. */
2674	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2675		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2676		reg_tarc |= (1 << 25);
2677		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2678		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2679		reg_tarc |= (1 << 25);
2680		reg_tarc &= ~(1 << 28);
2681		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2682	} else if (adapter->hw.mac_type == em_80003es2lan) {
2683		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2684		reg_tarc |= 1;
2685		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2686		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2687		reg_tarc |= 1;
2688		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2689	}
2690
2691	/* Program the Transmit Control Register */
2692	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2693		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2694	if (adapter->hw.mac_type >= em_82571)
2695		reg_tctl |= E1000_TCTL_MULR;
2696	if (adapter->link_duplex == FULL_DUPLEX) {
2697		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2698	} else {
2699		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2700	}
2701	/* This write will effectively turn on the transmit unit. */
2702	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2703
2704	/* Setup Transmit Descriptor Settings for this adapter */
2705	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2706
2707	if (adapter->tx_int_delay.value > 0)
2708		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2709}
2710
2711/*********************************************************************
2712 *
2713 *  Free all transmit related data structures.
2714 *
2715 **********************************************************************/
2716static void
2717em_free_transmit_structures(struct adapter *adapter)
2718{
2719	struct em_buffer *tx_buffer;
2720	int i;
2721
2722	INIT_DEBUGOUT("free_transmit_structures: begin");
2723
2724	if (adapter->tx_buffer_area != NULL) {
2725		tx_buffer = adapter->tx_buffer_area;
2726		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2727			if (tx_buffer->m_head != NULL) {
2728				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2729				    BUS_DMASYNC_POSTWRITE);
2730				bus_dmamap_unload(adapter->txtag,
2731				    tx_buffer->map);
2732				m_freem(tx_buffer->m_head);
2733				tx_buffer->m_head = NULL;
2734			} else if (tx_buffer->map != NULL)
2735				bus_dmamap_unload(adapter->txtag,
2736				    tx_buffer->map);
2737			if (tx_buffer->map != NULL) {
2738				bus_dmamap_destroy(adapter->txtag,
2739				    tx_buffer->map);
2740				tx_buffer->map = NULL;
2741			}
2742		}
2743	}
2744	if (adapter->tx_buffer_area != NULL) {
2745		free(adapter->tx_buffer_area, M_DEVBUF);
2746		adapter->tx_buffer_area = NULL;
2747	}
2748	if (adapter->txtag != NULL) {
2749		bus_dma_tag_destroy(adapter->txtag);
2750		adapter->txtag = NULL;
2751	}
2752}
2753
2754/*********************************************************************
2755 *
2756 *  The offload context needs to be set when we transfer the first
2757 *  packet of a particular protocol (TCP/UDP). We change the
2758 *  context only if the protocol type changes.
2759 *
2760 **********************************************************************/
2761static void
2762em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2763    uint32_t *txd_upper, uint32_t *txd_lower)
2764{
2765	struct em_context_desc *TXD;
2766	struct em_buffer *tx_buffer;
2767	struct ether_vlan_header *eh;
2768	struct ip *ip;
2769	struct ip6_hdr *ip6;
2770	struct tcp_hdr *th;
2771	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2772	uint32_t cmd = 0;
2773	uint16_t etype;
2774	uint8_t ipproto;
2775
2776	/* Setup checksum offload context. */
2777	curr_txd = adapter->next_avail_tx_desc;
2778	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2779	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2780
2781	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2782		     E1000_TXD_DTYP_D;		/* Data descr */
2783
2784	/*
2785	 * Determine where frame payload starts.
2786	 * Jump over vlan headers if already present,
2787	 * helpful for QinQ too.
2788	 */
2789	eh = mtod(mp, struct ether_vlan_header *);
2790	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2791		etype = ntohs(eh->evl_proto);
2792		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2793	} else {
2794		etype = ntohs(eh->evl_encap_proto);
2795		ehdrlen = ETHER_HDR_LEN;
2796	}
2797
2798	/*
2799	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2800	 * TODO: Support SCTP too when it hits the tree.
2801	 */
2802	switch (etype) {
2803	case ETHERTYPE_IP:
2804		ip = (struct ip *)(mp->m_data + ehdrlen);
2805		ip_hlen = ip->ip_hl << 2;
2806
2807		/* Setup of IP header checksum. */
2808		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2809			/*
2810			 * Start offset for header checksum calculation.
2811			 * End offset for header checksum calculation.
2812			 * Offset of place to put the checksum.
2813			 */
2814			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2815			TXD->lower_setup.ip_fields.ipcse =
2816			    htole16(ehdrlen + ip_hlen);
2817			TXD->lower_setup.ip_fields.ipcso =
2818			    ehdrlen + offsetof(struct ip, ip_sum);
2819			cmd |= E1000_TXD_CMD_IP;
2820			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2821		}
2822
2823		if (mp->m_len < ehdrlen + ip_hlen)
2824			return;	/* failure */
2825
2826		hdr_len = ehdrlen + ip_hlen;
2827		ipproto = ip->ip_p;
2828
2829		break;
2830	case ETHERTYPE_IPV6:
2831		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2832		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2833
2834		if (mp->m_len < ehdrlen + ip_hlen)
2835			return;	/* failure */
2836
2837		/* IPv6 doesn't have a header checksum. */
2838
2839		hdr_len = ehdrlen + ip_hlen;
2840		ipproto = ip6->ip6_nxt;
2841
2842		break;
2843	default:
2844		*txd_upper = 0;
2845		*txd_lower = 0;
2846		return;
2847	}
2848
2849	switch (ipproto) {
2850	case IPPROTO_TCP:
2851		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2852			/*
2853			 * Start offset for payload checksum calculation.
2854			 * End offset for payload checksum calculation.
2855			 * Offset of place to put the checksum.
2856			 */
2857			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2858			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2859			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2860			TXD->upper_setup.tcp_fields.tucso =
2861			    hdr_len + offsetof(struct tcphdr, th_sum);
2862			cmd |= E1000_TXD_CMD_TCP;
2863			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2864		}
2865		break;
2866	case IPPROTO_UDP:
2867		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2868			/*
2869			 * Start offset for header checksum calculation.
2870			 * End offset for header checksum calculation.
2871			 * Offset of place to put the checksum.
2872			 */
2873			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2874			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2875			TXD->upper_setup.tcp_fields.tucso =
2876			    hdr_len + offsetof(struct udphdr, uh_sum);
2877			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2878		}
2879		break;
2880	default:
2881		break;
2882	}
2883
2884	TXD->tcp_seg_setup.data = htole32(0);
2885	TXD->cmd_and_length =
2886	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2887	tx_buffer->m_head = NULL;
2888
2889	if (++curr_txd == adapter->num_tx_desc)
2890		curr_txd = 0;
2891
2892	adapter->num_tx_desc_avail--;
2893	adapter->next_avail_tx_desc = curr_txd;
2894}
2895
2896/**********************************************************************
2897 *
2898 *  Setup work for hardware segmentation offload (TSO)
2899 *
2900 **********************************************************************/
2901static boolean_t
2902em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2903   uint32_t *txd_lower)
2904{
2905	struct em_context_desc *TXD;
2906	struct em_buffer *tx_buffer;
2907	struct ether_vlan_header *eh;
2908	struct ip *ip;
2909	struct ip6_hdr *ip6;
2910	struct tcphdr *th;
2911	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2912	uint16_t etype;
2913
2914	/*
2915	 * XXX: This is not really correct as the stack would not have
2916	 * set up all checksums.
2917	 * XXX: Return FALSE is not sufficient as we may have to return
2918	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2919	 * and 1 (success).
2920	 */
2921	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2922		return FALSE;	/* 0 */
2923
2924	/*
2925	 * This function could/should be extended to support IP/IPv6
2926	 * fragmentation as well.  But as they say, one step at a time.
2927	 */
2928
2929	/*
2930	 * Determine where frame payload starts.
2931	 * Jump over vlan headers if already present,
2932	 * helpful for QinQ too.
2933	 */
2934	eh = mtod(mp, struct ether_vlan_header *);
2935	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2936		etype = ntohs(eh->evl_proto);
2937		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2938	} else {
2939		etype = ntohs(eh->evl_encap_proto);
2940		ehdrlen = ETHER_HDR_LEN;
2941	}
2942
2943	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2944	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2945		return FALSE;	/* -1 */
2946
2947	/*
2948	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
2949	 * TODO: Support SCTP too when it hits the tree.
2950	 */
2951	switch (etype) {
2952	case ETHERTYPE_IP:
2953		isip6 = 0;
2954		ip = (struct ip *)(mp->m_data + ehdrlen);
2955		if (ip->ip_p != IPPROTO_TCP)
2956			return FALSE;	/* 0 */
2957		ip->ip_len = 0;
2958		ip->ip_sum = 0;
2959		ip_hlen = ip->ip_hl << 2;
2960		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
2961			return FALSE;	/* -1 */
2962		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2963#if 1
2964		th->th_sum = in_pseudo(ip->ip_src.s_addr,
2965		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2966#else
2967		th->th_sum = mp->m_pkthdr.csum_data;
2968#endif
2969		break;
2970	case ETHERTYPE_IPV6:
2971		isip6 = 1;
2972		return FALSE;			/* Not supported yet. */
2973		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2974		if (ip6->ip6_nxt != IPPROTO_TCP)
2975			return FALSE;	/* 0 */
2976		ip6->ip6_plen = 0;
2977		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
2978		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
2979			return FALSE;	/* -1 */
2980		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
2981#if 0
2982		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
2983		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
2984#else
2985		th->th_sum = mp->m_pkthdr.csum_data;
2986#endif
2987		break;
2988	default:
2989		return FALSE;
2990	}
2991	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
2992
2993	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
2994		      E1000_TXD_DTYP_D |	/* Data descr type */
2995		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
2996
2997	/* IP and/or TCP header checksum calculation and insertion. */
2998	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
2999		      E1000_TXD_POPTS_TXSM) << 8;
3000
3001	curr_txd = adapter->next_avail_tx_desc;
3002	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3003	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3004
3005	/* IPv6 doesn't have a header checksum. */
3006	if (!isip6) {
3007		/*
3008		 * Start offset for header checksum calculation.
3009		 * End offset for header checksum calculation.
3010		 * Offset of place put the checksum.
3011		 */
3012		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3013		TXD->lower_setup.ip_fields.ipcse =
3014		    htole16(ehdrlen + ip_hlen - 1);
3015		TXD->lower_setup.ip_fields.ipcso =
3016		    ehdrlen + offsetof(struct ip, ip_sum);
3017	}
3018	/*
3019	 * Start offset for payload checksum calculation.
3020	 * End offset for payload checksum calculation.
3021	 * Offset of place to put the checksum.
3022	 */
3023	TXD->upper_setup.tcp_fields.tucss =
3024	    ehdrlen + ip_hlen;
3025	TXD->upper_setup.tcp_fields.tucse = 0;
3026	TXD->upper_setup.tcp_fields.tucso =
3027	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3028	/*
3029	 * Payload size per packet w/o any headers.
3030	 * Length of all headers up to payload.
3031	 */
3032	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3033	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3034
3035	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3036				E1000_TXD_CMD_DEXT |	/* Extended descr */
3037				E1000_TXD_CMD_TSE |	/* TSE context */
3038				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3039				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3040				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3041
3042	tx_buffer->m_head = NULL;
3043
3044	if (++curr_txd == adapter->num_tx_desc)
3045		curr_txd = 0;
3046
3047	adapter->num_tx_desc_avail--;
3048	adapter->next_avail_tx_desc = curr_txd;
3049	adapter->tx_tso = TRUE;
3050
3051	return TRUE;
3052}
3053
3054/**********************************************************************
3055 *
3056 *  Examine each tx_buffer in the used queue. If the hardware is done
3057 *  processing the packet then free associated resources. The
3058 *  tx_buffer is put back on the free queue.
3059 *
3060 **********************************************************************/
3061static void
3062em_txeof(struct adapter *adapter)
3063{
3064	int i, num_avail;
3065	struct em_buffer *tx_buffer;
3066	struct em_tx_desc   *tx_desc;
3067	struct ifnet   *ifp = adapter->ifp;
3068
3069	EM_LOCK_ASSERT(adapter);
3070
3071	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3072		return;
3073
3074	num_avail = adapter->num_tx_desc_avail;
3075	i = adapter->oldest_used_tx_desc;
3076
3077	tx_buffer = &adapter->tx_buffer_area[i];
3078	tx_desc = &adapter->tx_desc_base[i];
3079
3080	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3081	    BUS_DMASYNC_POSTREAD);
3082	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3083
3084		tx_desc->upper.data = 0;
3085		num_avail++;
3086
3087		if (tx_buffer->m_head) {
3088			ifp->if_opackets++;
3089			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3090			    BUS_DMASYNC_POSTWRITE);
3091			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
3092
3093			m_freem(tx_buffer->m_head);
3094			tx_buffer->m_head = NULL;
3095		}
3096
3097		if (++i == adapter->num_tx_desc)
3098			i = 0;
3099
3100		tx_buffer = &adapter->tx_buffer_area[i];
3101		tx_desc = &adapter->tx_desc_base[i];
3102	}
3103	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3104	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3105
3106	adapter->oldest_used_tx_desc = i;
3107
3108	/*
3109	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3110	 * that it is OK to send packets.
3111	 * If there are no pending descriptors, clear the timeout. Otherwise,
3112	 * if some descriptors have been freed, restart the timeout.
3113	 */
3114	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3115		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3116		if (num_avail == adapter->num_tx_desc)
3117			ifp->if_timer = 0;
3118		else if (num_avail != adapter->num_tx_desc_avail)
3119			ifp->if_timer = EM_TX_TIMEOUT;
3120	}
3121	adapter->num_tx_desc_avail = num_avail;
3122}
3123
3124/*********************************************************************
3125 *
3126 *  Get a buffer from system mbuf buffer pool.
3127 *
3128 **********************************************************************/
3129static int
3130em_get_buf(struct adapter *adapter, int i)
3131{
3132	struct mbuf		*m;
3133	bus_dma_segment_t	segs[1];
3134	bus_dmamap_t		map;
3135	struct em_buffer	*rx_buffer;
3136	int			error, nsegs;
3137
3138	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3139	if (m == NULL) {
3140		adapter->mbuf_cluster_failed++;
3141		return (ENOBUFS);
3142	}
3143	m->m_len = m->m_pkthdr.len = MCLBYTES;
3144	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3145		m_adj(m, ETHER_ALIGN);
3146
3147	/*
3148	 * Using memory from the mbuf cluster pool, invoke the
3149	 * bus_dma machinery to arrange the memory mapping.
3150	 */
3151	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3152	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3153	if (error != 0) {
3154		m_free(m);
3155		return (error);
3156	}
3157	/* If nsegs is wrong then the stack is corrupt. */
3158	KASSERT(nsegs == 1, ("Too many segments returned!"));
3159
3160	rx_buffer = &adapter->rx_buffer_area[i];
3161	if (rx_buffer->m_head != NULL)
3162		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3163
3164	map = rx_buffer->map;
3165	rx_buffer->map = adapter->rx_sparemap;
3166	adapter->rx_sparemap = map;
3167	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3168	rx_buffer->m_head = m;
3169
3170	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3171
3172	return (0);
3173}
3174
3175/*********************************************************************
3176 *
3177 *  Allocate memory for rx_buffer structures. Since we use one
3178 *  rx_buffer per received packet, the maximum number of rx_buffer's
3179 *  that we'll need is equal to the number of receive descriptors
3180 *  that we've allocated.
3181 *
3182 **********************************************************************/
3183static int
3184em_allocate_receive_structures(struct adapter *adapter)
3185{
3186	device_t dev = adapter->dev;
3187	struct em_buffer *rx_buffer;
3188	int i, error;
3189
3190	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3191	    M_DEVBUF, M_NOWAIT);
3192	if (adapter->rx_buffer_area == NULL) {
3193		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3194		return (ENOMEM);
3195	}
3196
3197	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3198
3199	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3200				1, 0,			/* alignment, bounds */
3201				BUS_SPACE_MAXADDR,	/* lowaddr */
3202				BUS_SPACE_MAXADDR,	/* highaddr */
3203				NULL, NULL,		/* filter, filterarg */
3204				MCLBYTES,		/* maxsize */
3205				1,			/* nsegments */
3206				MCLBYTES,		/* maxsegsize */
3207				0,			/* flags */
3208				NULL,			/* lockfunc */
3209				NULL,			/* lockarg */
3210				&adapter->rxtag);
3211	if (error) {
3212		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3213		    __func__, error);
3214		goto fail;
3215	}
3216
3217	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3218	    &adapter->rx_sparemap);
3219	if (error) {
3220		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3221		    __func__, error);
3222		goto fail;
3223	}
3224	rx_buffer = adapter->rx_buffer_area;
3225	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3226		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3227		    &rx_buffer->map);
3228		if (error) {
3229			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3230			    __func__, error);
3231			goto fail;
3232		}
3233	}
3234
3235	for (i = 0; i < adapter->num_rx_desc; i++) {
3236		error = em_get_buf(adapter, i);
3237		if (error)
3238			goto fail;
3239	}
3240	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3241	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3242
3243	return (0);
3244
3245fail:
3246	em_free_receive_structures(adapter);
3247	return (error);
3248}
3249
3250/*********************************************************************
3251 *
3252 *  Allocate and initialize receive structures.
3253 *
3254 **********************************************************************/
3255static int
3256em_setup_receive_structures(struct adapter *adapter)
3257{
3258	int error;
3259
3260	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3261
3262	if ((error = em_allocate_receive_structures(adapter)) != 0)
3263		return (error);
3264
3265	/* Setup our descriptor pointers */
3266	adapter->next_rx_desc_to_check = 0;
3267
3268	return (0);
3269}
3270
3271/*********************************************************************
3272 *
3273 *  Enable receive unit.
3274 *
3275 **********************************************************************/
3276static void
3277em_initialize_receive_unit(struct adapter *adapter)
3278{
3279	struct ifnet	*ifp = adapter->ifp;
3280	uint64_t	bus_addr;
3281	uint32_t	reg_rctl;
3282	uint32_t	reg_rxcsum;
3283
3284	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3285
3286	/*
3287	 * Make sure receives are disabled while setting
3288	 * up the descriptor ring
3289	 */
3290	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3291
3292	/* Set the Receive Delay Timer Register */
3293	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3294
3295	if(adapter->hw.mac_type >= em_82540) {
3296		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3297
3298		/*
3299		 * Set the interrupt throttling rate. Value is calculated
3300		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3301		 */
3302#define MAX_INTS_PER_SEC	8000
3303#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3304		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3305	}
3306
3307	/* Setup the Base and Length of the Rx Descriptor Ring */
3308	bus_addr = adapter->rxdma.dma_paddr;
3309	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3310			sizeof(struct em_rx_desc));
3311	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3312	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3313
3314	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3315	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3316	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3317
3318	/* Setup the Receive Control Register */
3319	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3320		   E1000_RCTL_RDMTS_HALF |
3321		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3322
3323	if (adapter->hw.tbi_compatibility_on == TRUE)
3324		reg_rctl |= E1000_RCTL_SBP;
3325
3326
3327	switch (adapter->rx_buffer_len) {
3328	default:
3329	case EM_RXBUFFER_2048:
3330		reg_rctl |= E1000_RCTL_SZ_2048;
3331		break;
3332	case EM_RXBUFFER_4096:
3333		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3334		break;
3335	case EM_RXBUFFER_8192:
3336		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3337		break;
3338	case EM_RXBUFFER_16384:
3339		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3340		break;
3341	}
3342
3343	if (ifp->if_mtu > ETHERMTU)
3344		reg_rctl |= E1000_RCTL_LPE;
3345
3346	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3347	if ((adapter->hw.mac_type >= em_82543) &&
3348	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3349		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3350		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3351		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3352	}
3353
3354	/* Enable Receives */
3355	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3356}
3357
3358/*********************************************************************
3359 *
3360 *  Free receive related data structures.
3361 *
3362 **********************************************************************/
3363static void
3364em_free_receive_structures(struct adapter *adapter)
3365{
3366	struct em_buffer *rx_buffer;
3367	int i;
3368
3369	INIT_DEBUGOUT("free_receive_structures: begin");
3370
3371	if (adapter->rx_sparemap) {
3372		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3373		adapter->rx_sparemap = NULL;
3374	}
3375	if (adapter->rx_buffer_area != NULL) {
3376		rx_buffer = adapter->rx_buffer_area;
3377		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3378			if (rx_buffer->m_head != NULL) {
3379				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3380				    BUS_DMASYNC_POSTREAD);
3381				bus_dmamap_unload(adapter->rxtag,
3382				    rx_buffer->map);
3383				m_freem(rx_buffer->m_head);
3384				rx_buffer->m_head = NULL;
3385			} else if (rx_buffer->map != NULL)
3386				bus_dmamap_unload(adapter->rxtag,
3387				    rx_buffer->map);
3388			if (rx_buffer->map != NULL) {
3389				bus_dmamap_destroy(adapter->rxtag,
3390				    rx_buffer->map);
3391				rx_buffer->map = NULL;
3392			}
3393		}
3394	}
3395	if (adapter->rx_buffer_area != NULL) {
3396		free(adapter->rx_buffer_area, M_DEVBUF);
3397		adapter->rx_buffer_area = NULL;
3398	}
3399	if (adapter->rxtag != NULL) {
3400		bus_dma_tag_destroy(adapter->rxtag);
3401		adapter->rxtag = NULL;
3402	}
3403}
3404
3405/*********************************************************************
3406 *
3407 *  This routine executes in interrupt context. It replenishes
3408 *  the mbufs in the descriptor and sends data which has been
3409 *  dma'ed into host memory to upper layer.
3410 *
3411 *  We loop at most count times if count is > 0, or until done if
3412 *  count < 0.
3413 *
3414 *********************************************************************/
3415static int
3416em_rxeof(struct adapter *adapter, int count)
3417{
3418	struct ifnet	*ifp;
3419	struct mbuf	*mp;
3420	uint8_t		accept_frame = 0;
3421	uint8_t		eop = 0;
3422	uint16_t 	len, desc_len, prev_len_adj;
3423	int		i;
3424
3425	/* Pointer to the receive descriptor being examined. */
3426	struct em_rx_desc   *current_desc;
3427	uint8_t		status;
3428
3429	ifp = adapter->ifp;
3430	i = adapter->next_rx_desc_to_check;
3431	current_desc = &adapter->rx_desc_base[i];
3432	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3433	    BUS_DMASYNC_POSTREAD);
3434
3435	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3436		return (0);
3437
3438	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3439	    (count != 0) &&
3440	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3441		struct mbuf *m = NULL;
3442
3443		mp = adapter->rx_buffer_area[i].m_head;
3444		/*
3445		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3446		 * needs to access the last received byte in the mbuf.
3447		 */
3448		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3449		    BUS_DMASYNC_POSTREAD);
3450
3451		accept_frame = 1;
3452		prev_len_adj = 0;
3453		desc_len = le16toh(current_desc->length);
3454		status = current_desc->status;
3455		if (status & E1000_RXD_STAT_EOP) {
3456			count--;
3457			eop = 1;
3458			if (desc_len < ETHER_CRC_LEN) {
3459				len = 0;
3460				prev_len_adj = ETHER_CRC_LEN - desc_len;
3461			} else
3462				len = desc_len - ETHER_CRC_LEN;
3463		} else {
3464			eop = 0;
3465			len = desc_len;
3466		}
3467
3468		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3469			uint8_t		last_byte;
3470			uint32_t	pkt_len = desc_len;
3471
3472			if (adapter->fmp != NULL)
3473				pkt_len += adapter->fmp->m_pkthdr.len;
3474
3475			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3476			if (TBI_ACCEPT(&adapter->hw, status,
3477			    current_desc->errors, pkt_len, last_byte)) {
3478				em_tbi_adjust_stats(&adapter->hw,
3479				    &adapter->stats, pkt_len,
3480				    adapter->hw.mac_addr);
3481				if (len > 0)
3482					len--;
3483			} else
3484				accept_frame = 0;
3485		}
3486
3487		if (accept_frame) {
3488			if (em_get_buf(adapter, i) != 0) {
3489				ifp->if_iqdrops++;
3490				goto discard;
3491			}
3492
3493			/* Assign correct length to the current fragment */
3494			mp->m_len = len;
3495
3496			if (adapter->fmp == NULL) {
3497				mp->m_pkthdr.len = len;
3498				adapter->fmp = mp; /* Store the first mbuf */
3499				adapter->lmp = mp;
3500			} else {
3501				/* Chain mbuf's together */
3502				mp->m_flags &= ~M_PKTHDR;
3503				/*
3504				 * Adjust length of previous mbuf in chain if
3505				 * we received less than 4 bytes in the last
3506				 * descriptor.
3507				 */
3508				if (prev_len_adj > 0) {
3509					adapter->lmp->m_len -= prev_len_adj;
3510					adapter->fmp->m_pkthdr.len -=
3511					    prev_len_adj;
3512				}
3513				adapter->lmp->m_next = mp;
3514				adapter->lmp = adapter->lmp->m_next;
3515				adapter->fmp->m_pkthdr.len += len;
3516			}
3517
3518			if (eop) {
3519				adapter->fmp->m_pkthdr.rcvif = ifp;
3520				ifp->if_ipackets++;
3521				em_receive_checksum(adapter, current_desc,
3522				    adapter->fmp);
3523#ifndef __NO_STRICT_ALIGNMENT
3524				if (adapter->hw.max_frame_size >
3525				    (MCLBYTES - ETHER_ALIGN) &&
3526				    em_fixup_rx(adapter) != 0)
3527					goto skip;
3528#endif
3529				if (status & E1000_RXD_STAT_VP) {
3530					adapter->fmp->m_pkthdr.ether_vtag =
3531					    (le16toh(current_desc->special) &
3532					    E1000_RXD_SPC_VLAN_MASK);
3533					adapter->fmp->m_flags |= M_VLANTAG;
3534				}
3535#ifndef __NO_STRICT_ALIGNMENT
3536skip:
3537#endif
3538				m = adapter->fmp;
3539				adapter->fmp = NULL;
3540				adapter->lmp = NULL;
3541			}
3542		} else {
3543			ifp->if_ierrors++;
3544discard:
3545			/* Reuse loaded DMA map and just update mbuf chain */
3546			mp = adapter->rx_buffer_area[i].m_head;
3547			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3548			mp->m_data = mp->m_ext.ext_buf;
3549			mp->m_next = NULL;
3550			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3551				m_adj(mp, ETHER_ALIGN);
3552			if (adapter->fmp != NULL) {
3553				m_freem(adapter->fmp);
3554				adapter->fmp = NULL;
3555				adapter->lmp = NULL;
3556			}
3557			m = NULL;
3558		}
3559
3560		/* Zero out the receive descriptors status. */
3561		current_desc->status = 0;
3562		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3563		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3564
3565		/* Advance our pointers to the next descriptor. */
3566		if (++i == adapter->num_rx_desc)
3567			i = 0;
3568		if (m != NULL) {
3569			adapter->next_rx_desc_to_check = i;
3570#ifdef DEVICE_POLLING
3571			EM_UNLOCK(adapter);
3572			(*ifp->if_input)(ifp, m);
3573			EM_LOCK(adapter);
3574#else
3575			(*ifp->if_input)(ifp, m);
3576#endif
3577			i = adapter->next_rx_desc_to_check;
3578		}
3579		current_desc = &adapter->rx_desc_base[i];
3580	}
3581	adapter->next_rx_desc_to_check = i;
3582
3583	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3584	if (--i < 0)
3585		i = adapter->num_rx_desc - 1;
3586	E1000_WRITE_REG(&adapter->hw, RDT, i);
3587	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3588		return (0);
3589
3590	return (1);
3591}
3592
3593#ifndef __NO_STRICT_ALIGNMENT
3594/*
3595 * When jumbo frames are enabled we should realign entire payload on
3596 * architecures with strict alignment. This is serious design mistake of 8254x
3597 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3598 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3599 * payload. On architecures without strict alignment restrictions 8254x still
3600 * performs unaligned memory access which would reduce the performance too.
3601 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3602 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3603 * existing mbuf chain.
3604 *
3605 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3606 * not used at all on architectures with strict alignment.
3607 */
3608static int
3609em_fixup_rx(struct adapter *adapter)
3610{
3611	struct mbuf *m, *n;
3612	int error;
3613
3614	error = 0;
3615	m = adapter->fmp;
3616	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3617		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3618		m->m_data += ETHER_HDR_LEN;
3619	} else {
3620		MGETHDR(n, M_DONTWAIT, MT_DATA);
3621		if (n != NULL) {
3622			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3623			m->m_data += ETHER_HDR_LEN;
3624			m->m_len -= ETHER_HDR_LEN;
3625			n->m_len = ETHER_HDR_LEN;
3626			M_MOVE_PKTHDR(n, m);
3627			n->m_next = m;
3628			adapter->fmp = n;
3629		} else {
3630			adapter->ifp->if_iqdrops++;
3631			adapter->mbuf_alloc_failed++;
3632			m_freem(adapter->fmp);
3633			adapter->fmp = NULL;
3634			adapter->lmp = NULL;
3635			error = ENOBUFS;
3636		}
3637	}
3638
3639	return (error);
3640}
3641#endif
3642
3643/*********************************************************************
3644 *
3645 *  Verify that the hardware indicated that the checksum is valid.
3646 *  Inform the stack about the status of checksum so that stack
3647 *  doesn't spend time verifying the checksum.
3648 *
3649 *********************************************************************/
3650static void
3651em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3652		    struct mbuf *mp)
3653{
3654	/* 82543 or newer only */
3655	if ((adapter->hw.mac_type < em_82543) ||
3656	    /* Ignore Checksum bit is set */
3657	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3658		mp->m_pkthdr.csum_flags = 0;
3659		return;
3660	}
3661
3662	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3663		/* Did it pass? */
3664		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3665			/* IP Checksum Good */
3666			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3667			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3668
3669		} else {
3670			mp->m_pkthdr.csum_flags = 0;
3671		}
3672	}
3673
3674	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3675		/* Did it pass? */
3676		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3677			mp->m_pkthdr.csum_flags |=
3678			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3679			mp->m_pkthdr.csum_data = htons(0xffff);
3680		}
3681	}
3682}
3683
3684
3685static void
3686em_enable_vlans(struct adapter *adapter)
3687{
3688	uint32_t ctrl;
3689
3690	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3691
3692	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3693	ctrl |= E1000_CTRL_VME;
3694	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3695}
3696
3697static void
3698em_disable_vlans(struct adapter *adapter)
3699{
3700	uint32_t ctrl;
3701
3702	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3703	ctrl &= ~E1000_CTRL_VME;
3704	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3705}
3706
3707static void
3708em_enable_intr(struct adapter *adapter)
3709{
3710	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3711}
3712
3713static void
3714em_disable_intr(struct adapter *adapter)
3715{
3716	/*
3717	 * The first version of 82542 had an errata where when link was forced
3718	 * it would stay up even up even if the cable was disconnected.
3719	 * Sequence errors were used to detect the disconnect and then the
3720	 * driver would unforce the link. This code in the in the ISR. For this
3721	 * to work correctly the Sequence error interrupt had to be enabled
3722	 * all the time.
3723	 */
3724
3725	if (adapter->hw.mac_type == em_82542_rev2_0)
3726	    E1000_WRITE_REG(&adapter->hw, IMC,
3727		(0xffffffff & ~E1000_IMC_RXSEQ));
3728	else
3729	    E1000_WRITE_REG(&adapter->hw, IMC,
3730		0xffffffff);
3731}
3732
3733static int
3734em_is_valid_ether_addr(uint8_t *addr)
3735{
3736	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3737
3738	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3739		return (FALSE);
3740	}
3741
3742	return (TRUE);
3743}
3744
3745void
3746em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3747{
3748	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3749}
3750
3751void
3752em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3753{
3754	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3755}
3756
3757void
3758em_pci_set_mwi(struct em_hw *hw)
3759{
3760	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3761	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3762}
3763
3764void
3765em_pci_clear_mwi(struct em_hw *hw)
3766{
3767	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3768	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3769}
3770
3771/*********************************************************************
3772* 82544 Coexistence issue workaround.
3773*    There are 2 issues.
3774*       1. Transmit Hang issue.
3775*    To detect this issue, following equation can be used...
3776*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3777*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3778*
3779*       2. DAC issue.
3780*    To detect this issue, following equation can be used...
3781*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3782*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3783*
3784*
3785*    WORKAROUND:
3786*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3787*
3788*** *********************************************************************/
3789static uint32_t
3790em_fill_descriptors (bus_addr_t address, uint32_t length,
3791		PDESC_ARRAY desc_array)
3792{
3793	/* Since issue is sensitive to length and address.*/
3794	/* Let us first check the address...*/
3795	uint32_t safe_terminator;
3796	if (length <= 4) {
3797		desc_array->descriptor[0].address = address;
3798		desc_array->descriptor[0].length = length;
3799		desc_array->elements = 1;
3800		return (desc_array->elements);
3801	}
3802	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3803	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3804	if (safe_terminator == 0   ||
3805	(safe_terminator > 4   &&
3806	safe_terminator < 9)   ||
3807	(safe_terminator > 0xC &&
3808	safe_terminator <= 0xF)) {
3809		desc_array->descriptor[0].address = address;
3810		desc_array->descriptor[0].length = length;
3811		desc_array->elements = 1;
3812		return (desc_array->elements);
3813	}
3814
3815	desc_array->descriptor[0].address = address;
3816	desc_array->descriptor[0].length = length - 4;
3817	desc_array->descriptor[1].address = address + (length - 4);
3818	desc_array->descriptor[1].length = 4;
3819	desc_array->elements = 2;
3820	return (desc_array->elements);
3821}
3822
3823/**********************************************************************
3824 *
3825 *  Update the board statistics counters.
3826 *
3827 **********************************************************************/
3828static void
3829em_update_stats_counters(struct adapter *adapter)
3830{
3831	struct ifnet   *ifp;
3832
3833	if(adapter->hw.media_type == em_media_type_copper ||
3834	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3835		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3836		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3837	}
3838	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3839	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3840	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3841	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3842
3843	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3844	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3845	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3846	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3847	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3848	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3849	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3850	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3851	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3852	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3853	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3854	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3855	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3856	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3857	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3858	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3859	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3860	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3861	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3862	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3863
3864	/* For the 64-bit byte counters the low dword must be read first. */
3865	/* Both registers clear on the read of the high dword */
3866
3867	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3868	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3869	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3870	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3871
3872	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3873	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3874	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3875	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3876	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3877
3878	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3879	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3880	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3881	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3882
3883	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3884	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3885	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3886	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3887	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3888	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3889	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3890	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3891	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3892	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3893
3894	if (adapter->hw.mac_type >= em_82543) {
3895		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3896		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3897		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3898		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3899		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3900		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3901	}
3902	ifp = adapter->ifp;
3903
3904	ifp->if_collisions = adapter->stats.colc;
3905
3906	/* Rx Errors */
3907	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3908	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3909	    adapter->stats.mpc + adapter->stats.cexterr;
3910
3911	/* Tx Errors */
3912	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3913	    adapter->watchdog_events;
3914}
3915
3916
3917/**********************************************************************
3918 *
3919 *  This routine is called only when em_display_debug_stats is enabled.
3920 *  This routine provides a way to take a look at important statistics
3921 *  maintained by the driver and hardware.
3922 *
3923 **********************************************************************/
3924static void
3925em_print_debug_info(struct adapter *adapter)
3926{
3927	device_t dev = adapter->dev;
3928	uint8_t *hw_addr = adapter->hw.hw_addr;
3929
3930	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3931	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3932	    E1000_READ_REG(&adapter->hw, CTRL),
3933	    E1000_READ_REG(&adapter->hw, RCTL));
3934	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3935	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3936	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3937	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3938	    adapter->hw.fc_high_water,
3939	    adapter->hw.fc_low_water);
3940	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3941	    E1000_READ_REG(&adapter->hw, TIDV),
3942	    E1000_READ_REG(&adapter->hw, TADV));
3943	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3944	    E1000_READ_REG(&adapter->hw, RDTR),
3945	    E1000_READ_REG(&adapter->hw, RADV));
3946	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3947	    (long long)adapter->tx_fifo_wrk_cnt,
3948	    (long long)adapter->tx_fifo_reset_cnt);
3949	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3950	    E1000_READ_REG(&adapter->hw, TDH),
3951	    E1000_READ_REG(&adapter->hw, TDT));
3952	device_printf(dev, "Num Tx descriptors avail = %d\n",
3953	    adapter->num_tx_desc_avail);
3954	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3955	    adapter->no_tx_desc_avail1);
3956	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3957	    adapter->no_tx_desc_avail2);
3958	device_printf(dev, "Std mbuf failed = %ld\n",
3959	    adapter->mbuf_alloc_failed);
3960	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3961	    adapter->mbuf_cluster_failed);
3962}
3963
3964static void
3965em_print_hw_stats(struct adapter *adapter)
3966{
3967	device_t dev = adapter->dev;
3968
3969	device_printf(dev, "Excessive collisions = %lld\n",
3970	    (long long)adapter->stats.ecol);
3971	device_printf(dev, "Symbol errors = %lld\n",
3972	    (long long)adapter->stats.symerrs);
3973	device_printf(dev, "Sequence errors = %lld\n",
3974	    (long long)adapter->stats.sec);
3975	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3976
3977	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3978	device_printf(dev, "Receive No Buffers = %lld\n",
3979	    (long long)adapter->stats.rnbc);
3980	/* RLEC is inaccurate on some hardware, calculate our own. */
3981	device_printf(dev, "Receive Length Errors = %lld\n",
3982	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3983	device_printf(dev, "Receive errors = %lld\n",
3984	    (long long)adapter->stats.rxerrc);
3985	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3986	device_printf(dev, "Alignment errors = %lld\n",
3987	    (long long)adapter->stats.algnerrc);
3988	device_printf(dev, "Carrier extension errors = %lld\n",
3989	    (long long)adapter->stats.cexterr);
3990	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3991	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3992
3993	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3994	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3995	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3996	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3997
3998	device_printf(dev, "Good Packets Rcvd = %lld\n",
3999	    (long long)adapter->stats.gprc);
4000	device_printf(dev, "Good Packets Xmtd = %lld\n",
4001	    (long long)adapter->stats.gptc);
4002	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4003	    (long long)adapter->stats.tsctc);
4004	device_printf(dev, "TSO Contexts Failed = %lld\n",
4005	    (long long)adapter->stats.tsctfc);
4006}
4007
4008static int
4009em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4010{
4011	struct adapter *adapter;
4012	int error;
4013	int result;
4014
4015	result = -1;
4016	error = sysctl_handle_int(oidp, &result, 0, req);
4017
4018	if (error || !req->newptr)
4019		return (error);
4020
4021	if (result == 1) {
4022		adapter = (struct adapter *)arg1;
4023		em_print_debug_info(adapter);
4024	}
4025
4026	return (error);
4027}
4028
4029
4030static int
4031em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4032{
4033	struct adapter *adapter;
4034	int error;
4035	int result;
4036
4037	result = -1;
4038	error = sysctl_handle_int(oidp, &result, 0, req);
4039
4040	if (error || !req->newptr)
4041		return (error);
4042
4043	if (result == 1) {
4044		adapter = (struct adapter *)arg1;
4045		em_print_hw_stats(adapter);
4046	}
4047
4048	return (error);
4049}
4050
4051static int
4052em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4053{
4054	struct em_int_delay_info *info;
4055	struct adapter *adapter;
4056	uint32_t regval;
4057	int error;
4058	int usecs;
4059	int ticks;
4060
4061	info = (struct em_int_delay_info *)arg1;
4062	usecs = info->value;
4063	error = sysctl_handle_int(oidp, &usecs, 0, req);
4064	if (error != 0 || req->newptr == NULL)
4065		return (error);
4066	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4067		return (EINVAL);
4068	info->value = usecs;
4069	ticks = E1000_USECS_TO_TICKS(usecs);
4070
4071	adapter = info->adapter;
4072
4073	EM_LOCK(adapter);
4074	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4075	regval = (regval & ~0xffff) | (ticks & 0xffff);
4076	/* Handle a few special cases. */
4077	switch (info->offset) {
4078	case E1000_RDTR:
4079	case E1000_82542_RDTR:
4080		regval |= E1000_RDT_FPDB;
4081		break;
4082	case E1000_TIDV:
4083	case E1000_82542_TIDV:
4084		if (ticks == 0) {
4085			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4086			/* Don't write 0 into the TIDV register. */
4087			regval++;
4088		} else
4089			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4090		break;
4091	}
4092	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4093	EM_UNLOCK(adapter);
4094	return (0);
4095}
4096
4097static void
4098em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4099	const char *description, struct em_int_delay_info *info,
4100	int offset, int value)
4101{
4102	info->adapter = adapter;
4103	info->offset = offset;
4104	info->value = value;
4105	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4106	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4107	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4108	    info, 0, em_sysctl_int_delay, "I", description);
4109}
4110
4111#ifndef DEVICE_POLLING
4112static void
4113em_add_int_process_limit(struct adapter *adapter, const char *name,
4114	const char *description, int *limit, int value)
4115{
4116	*limit = value;
4117	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4118	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4119	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4120}
4121#endif
4122