if_em.c revision 162785
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162785 2006-09-29 13:37:26Z andre $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.1.4 - TSO";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157
158	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
162
163	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
177
178	/* required last entry */
179	{ 0, 0, 0, 0, 0}
180};
181
182/*********************************************************************
183 *  Table of branding strings for all supported NICs.
184 *********************************************************************/
185
186static char *em_strings[] = {
187	"Intel(R) PRO/1000 Network Connection"
188};
189
190/*********************************************************************
191 *  Function prototypes
192 *********************************************************************/
193static int	em_probe(device_t);
194static int	em_attach(device_t);
195static int	em_detach(device_t);
196static int	em_shutdown(device_t);
197static int	em_suspend(device_t);
198static int	em_resume(device_t);
199static void	em_start(struct ifnet *);
200static void	em_start_locked(struct ifnet *ifp);
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_watchdog(struct ifnet *);
203static void	em_init(void *);
204static void	em_init_locked(struct adapter *);
205static void	em_stop(void *);
206static void	em_media_status(struct ifnet *, struct ifmediareq *);
207static int	em_media_change(struct ifnet *);
208static void	em_identify_hardware(struct adapter *);
209static int	em_allocate_pci_resources(struct adapter *);
210static int	em_allocate_intr(struct adapter *);
211static void	em_free_intr(struct adapter *);
212static void	em_free_pci_resources(struct adapter *);
213static void	em_local_timer(void *);
214static int	em_hardware_init(struct adapter *);
215static void	em_setup_interface(device_t, struct adapter *);
216static int	em_setup_transmit_structures(struct adapter *);
217static void	em_initialize_transmit_unit(struct adapter *);
218static int	em_setup_receive_structures(struct adapter *);
219static void	em_initialize_receive_unit(struct adapter *);
220static void	em_enable_intr(struct adapter *);
221static void	em_disable_intr(struct adapter *);
222static void	em_free_transmit_structures(struct adapter *);
223static void	em_free_receive_structures(struct adapter *);
224static void	em_update_stats_counters(struct adapter *);
225static void	em_txeof(struct adapter *);
226static int	em_allocate_receive_structures(struct adapter *);
227static int	em_allocate_transmit_structures(struct adapter *);
228static int	em_rxeof(struct adapter *, int);
229#ifndef __NO_STRICT_ALIGNMENT
230static int	em_fixup_rx(struct adapter *);
231#endif
232static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
233		    struct mbuf *);
234static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
235		    uint32_t *, uint32_t *);
236static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
237		    uint32_t *, uint32_t *);
238static void	em_set_promisc(struct adapter *);
239static void	em_disable_promisc(struct adapter *);
240static void	em_set_multi(struct adapter *);
241static void	em_print_hw_stats(struct adapter *);
242static void	em_update_link_status(struct adapter *);
243static int	em_get_buf(struct adapter *, int);
244static void	em_enable_vlans(struct adapter *);
245static void	em_disable_vlans(struct adapter *);
246static int	em_encap(struct adapter *, struct mbuf **);
247static void	em_smartspeed(struct adapter *);
248static int	em_82547_fifo_workaround(struct adapter *, int);
249static void	em_82547_update_fifo_head(struct adapter *, int);
250static int	em_82547_tx_fifo_reset(struct adapter *);
251static void	em_82547_move_tail(void *arg);
252static void	em_82547_move_tail_locked(struct adapter *);
253static int	em_dma_malloc(struct adapter *, bus_size_t,
254		struct em_dma_alloc *, int);
255static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
256static void	em_print_debug_info(struct adapter *);
257static int 	em_is_valid_ether_addr(uint8_t *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
261		    PDESC_ARRAY desc_array);
262static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
263static void	em_add_int_delay_sysctl(struct adapter *, const char *,
264		const char *, struct em_int_delay_info *, int, int);
265
266/*
267 * Fast interrupt handler and legacy ithread/polling modes are
268 * mutually exclusive.
269 */
270#ifdef DEVICE_POLLING
271static poll_handler_t em_poll;
272static void	em_intr(void *);
273#else
274static void	em_intr_fast(void *);
275static void	em_add_int_process_limit(struct adapter *, const char *,
276		const char *, int *, int);
277static void	em_handle_rxtx(void *context, int pending);
278static void	em_handle_link(void *context, int pending);
279#endif
280
281/*********************************************************************
282 *  FreeBSD Device Interface Entry Points
283 *********************************************************************/
284
285static device_method_t em_methods[] = {
286	/* Device interface */
287	DEVMETHOD(device_probe, em_probe),
288	DEVMETHOD(device_attach, em_attach),
289	DEVMETHOD(device_detach, em_detach),
290	DEVMETHOD(device_shutdown, em_shutdown),
291	DEVMETHOD(device_suspend, em_suspend),
292	DEVMETHOD(device_resume, em_resume),
293	{0, 0}
294};
295
296static driver_t em_driver = {
297	"em", em_methods, sizeof(struct adapter),
298};
299
300static devclass_t em_devclass;
301DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
302MODULE_DEPEND(em, pci, 1, 1, 1);
303MODULE_DEPEND(em, ether, 1, 1, 1);
304
305/*********************************************************************
306 *  Tunable default values.
307 *********************************************************************/
308
309#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
310#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
311#define M_TSO_LEN			66
312
313static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
314static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
315static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
316static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
317static int em_rxd = EM_DEFAULT_RXD;
318static int em_txd = EM_DEFAULT_TXD;
319static int em_smart_pwr_down = FALSE;
320
321TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
322TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
323TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
325TUNABLE_INT("hw.em.rxd", &em_rxd);
326TUNABLE_INT("hw.em.txd", &em_txd);
327TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
328#ifndef DEVICE_POLLING
329static int em_rx_process_limit = 100;
330TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
331#endif
332
333/*********************************************************************
334 *  Device identification routine
335 *
336 *  em_probe determines if the driver should be loaded on
337 *  adapter based on PCI vendor/device id of the adapter.
338 *
339 *  return BUS_PROBE_DEFAULT on success, positive on failure
340 *********************************************************************/
341
342static int
343em_probe(device_t dev)
344{
345	char		adapter_name[60];
346	uint16_t	pci_vendor_id = 0;
347	uint16_t	pci_device_id = 0;
348	uint16_t	pci_subvendor_id = 0;
349	uint16_t	pci_subdevice_id = 0;
350	em_vendor_info_t *ent;
351
352	INIT_DEBUGOUT("em_probe: begin");
353
354	pci_vendor_id = pci_get_vendor(dev);
355	if (pci_vendor_id != EM_VENDOR_ID)
356		return (ENXIO);
357
358	pci_device_id = pci_get_device(dev);
359	pci_subvendor_id = pci_get_subvendor(dev);
360	pci_subdevice_id = pci_get_subdevice(dev);
361
362	ent = em_vendor_info_array;
363	while (ent->vendor_id != 0) {
364		if ((pci_vendor_id == ent->vendor_id) &&
365		    (pci_device_id == ent->device_id) &&
366
367		    ((pci_subvendor_id == ent->subvendor_id) ||
368		    (ent->subvendor_id == PCI_ANY_ID)) &&
369
370		    ((pci_subdevice_id == ent->subdevice_id) ||
371		    (ent->subdevice_id == PCI_ANY_ID))) {
372			sprintf(adapter_name, "%s %s",
373				em_strings[ent->index],
374				em_driver_version);
375			device_set_desc_copy(dev, adapter_name);
376			return (BUS_PROBE_DEFAULT);
377		}
378		ent++;
379	}
380
381	return (ENXIO);
382}
383
384/*********************************************************************
385 *  Device initialization routine
386 *
387 *  The attach entry point is called when the driver is being loaded.
388 *  This routine identifies the type of hardware, allocates all resources
389 *  and initializes the hardware.
390 *
391 *  return 0 on success, positive on failure
392 *********************************************************************/
393
394static int
395em_attach(device_t dev)
396{
397	struct adapter	*adapter;
398	int		tsize, rsize;
399	int		error = 0;
400
401	INIT_DEBUGOUT("em_attach: begin");
402
403	adapter = device_get_softc(dev);
404	adapter->dev = adapter->osdep.dev = dev;
405	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
406
407	/* SYSCTL stuff */
408	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
409	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
410	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
411	    em_sysctl_debug_info, "I", "Debug Information");
412
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    em_sysctl_stats, "I", "Statistics");
417
418	callout_init(&adapter->timer, CALLOUT_MPSAFE);
419	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
420
421	/* Determine hardware revision */
422	em_identify_hardware(adapter);
423
424	/* Set up some sysctls for the tunable interrupt delays */
425	em_add_int_delay_sysctl(adapter, "rx_int_delay",
426	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
427	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
428	em_add_int_delay_sysctl(adapter, "tx_int_delay",
429	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
430	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
431	if (adapter->hw.mac_type >= em_82540) {
432		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
433		    "receive interrupt delay limit in usecs",
434		    &adapter->rx_abs_int_delay,
435		    E1000_REG_OFFSET(&adapter->hw, RADV),
436		    em_rx_abs_int_delay_dflt);
437		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
438		    "transmit interrupt delay limit in usecs",
439		    &adapter->tx_abs_int_delay,
440		    E1000_REG_OFFSET(&adapter->hw, TADV),
441		    em_tx_abs_int_delay_dflt);
442	}
443
444#ifndef DEVICE_POLLING
445	/* Sysctls for limiting the amount of work done in the taskqueue */
446	em_add_int_process_limit(adapter, "rx_processing_limit",
447	    "max number of rx packets to process", &adapter->rx_process_limit,
448	    em_rx_process_limit);
449#endif
450
451	/*
452	 * Validate number of transmit and receive descriptors. It
453	 * must not exceed hardware maximum, and must be multiple
454	 * of EM_DBA_ALIGN.
455	 */
456	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
457	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
458	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
459	    (em_txd < EM_MIN_TXD)) {
460		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
461		    EM_DEFAULT_TXD, em_txd);
462		adapter->num_tx_desc = EM_DEFAULT_TXD;
463	} else
464		adapter->num_tx_desc = em_txd;
465	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
466	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
467	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
468	    (em_rxd < EM_MIN_RXD)) {
469		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
470		    EM_DEFAULT_RXD, em_rxd);
471		adapter->num_rx_desc = EM_DEFAULT_RXD;
472	} else
473		adapter->num_rx_desc = em_rxd;
474
475	adapter->hw.autoneg = DO_AUTO_NEG;
476	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
477	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
478	adapter->hw.tbi_compatibility_en = TRUE;
479	adapter->rx_buffer_len = EM_RXBUFFER_2048;
480
481	adapter->hw.phy_init_script = 1;
482	adapter->hw.phy_reset_disable = FALSE;
483
484#ifndef EM_MASTER_SLAVE
485	adapter->hw.master_slave = em_ms_hw_default;
486#else
487	adapter->hw.master_slave = EM_MASTER_SLAVE;
488#endif
489	/*
490	 * Set the max frame size assuming standard ethernet
491	 * sized frames.
492	 */
493	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
494
495	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
496
497	/*
498	 * This controls when hardware reports transmit completion
499	 * status.
500	 */
501	adapter->hw.report_tx_early = 1;
502	if (em_allocate_pci_resources(adapter)) {
503		device_printf(dev, "Allocation of PCI resources failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	/* Initialize eeprom parameters */
509	em_init_eeprom_params(&adapter->hw);
510
511	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
512	    EM_DBA_ALIGN);
513
514	/* Allocate Transmit Descriptor ring */
515	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
516		device_printf(dev, "Unable to allocate tx_desc memory\n");
517		error = ENOMEM;
518		goto err_tx_desc;
519	}
520	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
521
522	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
523	    EM_DBA_ALIGN);
524
525	/* Allocate Receive Descriptor ring */
526	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
527		device_printf(dev, "Unable to allocate rx_desc memory\n");
528		error = ENOMEM;
529		goto err_rx_desc;
530	}
531	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
532
533	/* Initialize the hardware */
534	if (em_hardware_init(adapter)) {
535		device_printf(dev, "Unable to initialize the hardware\n");
536		error = EIO;
537		goto err_hw_init;
538	}
539
540	/* Copy the permanent MAC address out of the EEPROM */
541	if (em_read_mac_addr(&adapter->hw) < 0) {
542		device_printf(dev, "EEPROM read error while reading MAC"
543		    " address\n");
544		error = EIO;
545		goto err_hw_init;
546	}
547
548	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
549		device_printf(dev, "Invalid MAC address\n");
550		error = EIO;
551		goto err_hw_init;
552	}
553
554	/* Setup OS specific network interface */
555	em_setup_interface(dev, adapter);
556
557	em_allocate_intr(adapter);
558
559	/* Initialize statistics */
560	em_clear_hw_cntrs(&adapter->hw);
561	em_update_stats_counters(adapter);
562	adapter->hw.get_link_status = 1;
563	em_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (em_check_phy_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Identify 82544 on PCIX */
571	em_get_bus_info(&adapter->hw);
572	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
573		adapter->pcix_82544 = TRUE;
574	else
575		adapter->pcix_82544 = FALSE;
576
577	INIT_DEBUGOUT("em_attach: end");
578
579	return (0);
580
581err_hw_init:
582	em_dma_free(adapter, &adapter->rxdma);
583err_rx_desc:
584	em_dma_free(adapter, &adapter->txdma);
585err_tx_desc:
586err_pci:
587	em_free_intr(adapter);
588	em_free_pci_resources(adapter);
589	EM_LOCK_DESTROY(adapter);
590
591	return (error);
592}
593
594/*********************************************************************
595 *  Device removal routine
596 *
597 *  The detach entry point is called when the driver is being removed.
598 *  This routine stops the adapter and deallocates all the resources
599 *  that were allocated for driver operation.
600 *
601 *  return 0 on success, positive on failure
602 *********************************************************************/
603
604static int
605em_detach(device_t dev)
606{
607	struct adapter	*adapter = device_get_softc(dev);
608	struct ifnet	*ifp = adapter->ifp;
609
610	INIT_DEBUGOUT("em_detach: begin");
611
612#ifdef DEVICE_POLLING
613	if (ifp->if_capenable & IFCAP_POLLING)
614		ether_poll_deregister(ifp);
615#endif
616
617	em_free_intr(adapter);
618	EM_LOCK(adapter);
619	adapter->in_detach = 1;
620	em_stop(adapter);
621	em_phy_hw_reset(&adapter->hw);
622	EM_UNLOCK(adapter);
623	ether_ifdetach(adapter->ifp);
624
625	em_free_pci_resources(adapter);
626	bus_generic_detach(dev);
627	if_free(ifp);
628
629	/* Free Transmit Descriptor ring */
630	if (adapter->tx_desc_base) {
631		em_dma_free(adapter, &adapter->txdma);
632		adapter->tx_desc_base = NULL;
633	}
634
635	/* Free Receive Descriptor ring */
636	if (adapter->rx_desc_base) {
637		em_dma_free(adapter, &adapter->rxdma);
638		adapter->rx_desc_base = NULL;
639	}
640
641	EM_LOCK_DESTROY(adapter);
642
643	return (0);
644}
645
646/*********************************************************************
647 *
648 *  Shutdown entry point
649 *
650 **********************************************************************/
651
652static int
653em_shutdown(device_t dev)
654{
655	struct adapter *adapter = device_get_softc(dev);
656	EM_LOCK(adapter);
657	em_stop(adapter);
658	EM_UNLOCK(adapter);
659	return (0);
660}
661
662/*
663 * Suspend/resume device methods.
664 */
665static int
666em_suspend(device_t dev)
667{
668	struct adapter *adapter = device_get_softc(dev);
669
670	EM_LOCK(adapter);
671	em_stop(adapter);
672	EM_UNLOCK(adapter);
673
674	return bus_generic_suspend(dev);
675}
676
677static int
678em_resume(device_t dev)
679{
680	struct adapter *adapter = device_get_softc(dev);
681	struct ifnet *ifp = adapter->ifp;
682
683	EM_LOCK(adapter);
684	em_init_locked(adapter);
685	if ((ifp->if_flags & IFF_UP) &&
686	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
687		em_start_locked(ifp);
688	EM_UNLOCK(adapter);
689
690	return bus_generic_resume(dev);
691}
692
693
694/*********************************************************************
695 *  Transmit entry point
696 *
697 *  em_start is called by the stack to initiate a transmit.
698 *  The driver will remain in this routine as long as there are
699 *  packets to transmit and transmit resources are available.
700 *  In case resources are not available stack is notified and
701 *  the packet is requeued.
702 **********************************************************************/
703
704static void
705em_start_locked(struct ifnet *ifp)
706{
707	struct adapter	*adapter = ifp->if_softc;
708	struct mbuf	*m_head;
709
710	EM_LOCK_ASSERT(adapter);
711
712	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
713	    IFF_DRV_RUNNING)
714		return;
715	if (!adapter->link_active)
716		return;
717
718	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
719
720		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
721		if (m_head == NULL)
722			break;
723		/*
724		 * em_encap() can modify our pointer, and or make it NULL on
725		 * failure.  In that event, we can't requeue.
726		 */
727		if (em_encap(adapter, &m_head)) {
728			if (m_head == NULL)
729				break;
730			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
731			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
732			break;
733		}
734
735		/* Send a copy of the frame to the BPF listener */
736		BPF_MTAP(ifp, m_head);
737
738		/* Set timeout in case hardware has problems transmitting. */
739		ifp->if_timer = EM_TX_TIMEOUT;
740	}
741}
742
743static void
744em_start(struct ifnet *ifp)
745{
746	struct adapter *adapter = ifp->if_softc;
747
748	EM_LOCK(adapter);
749	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
750		em_start_locked(ifp);
751	EM_UNLOCK(adapter);
752}
753
754/*********************************************************************
755 *  Ioctl entry point
756 *
757 *  em_ioctl is called when the user wants to configure the
758 *  interface.
759 *
760 *  return 0 on success, positive on failure
761 **********************************************************************/
762
763static int
764em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
765{
766	struct adapter	*adapter = ifp->if_softc;
767	struct ifreq *ifr = (struct ifreq *)data;
768	struct ifaddr *ifa = (struct ifaddr *)data;
769	int error = 0;
770
771	if (adapter->in_detach)
772		return (error);
773
774	switch (command) {
775	case SIOCSIFADDR:
776	case SIOCGIFADDR:
777		if (ifa->ifa_addr->sa_family == AF_INET) {
778			/*
779			 * XXX
780			 * Since resetting hardware takes a very long time
781			 * and results in link renegotiation we only
782			 * initialize the hardware only when it is absolutely
783			 * required.
784			 */
785			ifp->if_flags |= IFF_UP;
786			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
787				EM_LOCK(adapter);
788				em_init_locked(adapter);
789				EM_UNLOCK(adapter);
790			}
791			arp_ifinit(ifp, ifa);
792		} else
793			error = ether_ioctl(ifp, command, data);
794		break;
795	case SIOCSIFMTU:
796	    {
797		int max_frame_size;
798		uint16_t eeprom_data = 0;
799
800		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
801
802		EM_LOCK(adapter);
803		switch (adapter->hw.mac_type) {
804		case em_82573:
805			/*
806			 * 82573 only supports jumbo frames
807			 * if ASPM is disabled.
808			 */
809			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
810			    &eeprom_data);
811			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
812				max_frame_size = ETHER_MAX_LEN;
813				break;
814			}
815			/* Allow Jumbo frames - fall thru */
816		case em_82571:
817		case em_82572:
818		case em_80003es2lan:	/* Limit Jumbo Frame size */
819			max_frame_size = 9234;
820			break;
821		case em_ich8lan:
822			/* ICH8 does not support jumbo frames */
823			max_frame_size = ETHER_MAX_LEN;
824			break;
825		default:
826			max_frame_size = MAX_JUMBO_FRAME_SIZE;
827		}
828		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
829		    ETHER_CRC_LEN) {
830			EM_UNLOCK(adapter);
831			error = EINVAL;
832			break;
833		}
834
835		ifp->if_mtu = ifr->ifr_mtu;
836		adapter->hw.max_frame_size =
837		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
838		em_init_locked(adapter);
839		EM_UNLOCK(adapter);
840		break;
841	    }
842	case SIOCSIFFLAGS:
843		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
844		EM_LOCK(adapter);
845		if (ifp->if_flags & IFF_UP) {
846			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
847				if ((ifp->if_flags ^ adapter->if_flags) &
848				    IFF_PROMISC) {
849					em_disable_promisc(adapter);
850					em_set_promisc(adapter);
851				}
852			} else
853				em_init_locked(adapter);
854		} else {
855			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
856				em_stop(adapter);
857			}
858		}
859		adapter->if_flags = ifp->if_flags;
860		EM_UNLOCK(adapter);
861		break;
862	case SIOCADDMULTI:
863	case SIOCDELMULTI:
864		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
865		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
866			EM_LOCK(adapter);
867			em_disable_intr(adapter);
868			em_set_multi(adapter);
869			if (adapter->hw.mac_type == em_82542_rev2_0) {
870				em_initialize_receive_unit(adapter);
871			}
872#ifdef DEVICE_POLLING
873			if (!(ifp->if_capenable & IFCAP_POLLING))
874#endif
875				em_enable_intr(adapter);
876			EM_UNLOCK(adapter);
877		}
878		break;
879	case SIOCSIFMEDIA:
880	case SIOCGIFMEDIA:
881		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
882		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
883		break;
884	case SIOCSIFCAP:
885	    {
886		int mask, reinit;
887
888		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
889		reinit = 0;
890		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
891#ifdef DEVICE_POLLING
892		if (mask & IFCAP_POLLING) {
893			if (ifr->ifr_reqcap & IFCAP_POLLING) {
894				error = ether_poll_register(em_poll, ifp);
895				if (error)
896					return (error);
897				EM_LOCK(adapter);
898				em_disable_intr(adapter);
899				ifp->if_capenable |= IFCAP_POLLING;
900				EM_UNLOCK(adapter);
901			} else {
902				error = ether_poll_deregister(ifp);
903				/* Enable interrupt even in error case */
904				EM_LOCK(adapter);
905				em_enable_intr(adapter);
906				ifp->if_capenable &= ~IFCAP_POLLING;
907				EM_UNLOCK(adapter);
908			}
909		}
910#endif
911		if (mask & IFCAP_HWCSUM) {
912			ifp->if_capenable ^= IFCAP_HWCSUM;
913			reinit = 1;
914		}
915		if (mask & IFCAP_TSO4) {
916			ifp->if_capenable ^= IFCAP_TSO4;
917			reinit = 1;
918		}
919		if (mask & IFCAP_VLAN_HWTAGGING) {
920			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
921			reinit = 1;
922		}
923		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
924			em_init(adapter);
925		VLAN_CAPABILITIES(ifp);
926		break;
927	    }
928	default:
929		error = ether_ioctl(ifp, command, data);
930		break;
931	}
932
933	return (error);
934}
935
936/*********************************************************************
937 *  Watchdog entry point
938 *
939 *  This routine is called whenever hardware quits transmitting.
940 *
941 **********************************************************************/
942
943static void
944em_watchdog(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	/* If we are in this routine because of pause frames, then
950	 * don't reset the hardware.
951	 */
952	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
953		ifp->if_timer = EM_TX_TIMEOUT;
954		EM_UNLOCK(adapter);
955		return;
956	}
957
958	/*
959	 * Reclaim first as there is a possibility of losing Tx completion
960	 * interrupts. Possible cause of missing Tx completion interrupts
961	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
962	 * or chipset bug.
963	 */
964	em_txeof(adapter);
965	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
966		EM_UNLOCK(adapter);
967		return;
968	}
969
970	if (em_check_for_link(&adapter->hw) == 0)
971		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
972
973	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
974	adapter->watchdog_events++;
975
976	em_init_locked(adapter);
977	EM_UNLOCK(adapter);
978}
979
980/*********************************************************************
981 *  Init entry point
982 *
983 *  This routine is used in two ways. It is used by the stack as
984 *  init entry point in network interface structure. It is also used
985 *  by the driver as a hw/sw initialization routine to get to a
986 *  consistent state.
987 *
988 *  return 0 on success, positive on failure
989 **********************************************************************/
990
991static void
992em_init_locked(struct adapter *adapter)
993{
994	struct ifnet	*ifp = adapter->ifp;
995	device_t	dev = adapter->dev;
996	uint32_t	pba;
997
998	INIT_DEBUGOUT("em_init: begin");
999
1000	EM_LOCK_ASSERT(adapter);
1001
1002	em_stop(adapter);
1003
1004	/*
1005	 * Packet Buffer Allocation (PBA)
1006	 * Writing PBA sets the receive portion of the buffer
1007	 * the remainder is used for the transmit buffer.
1008	 *
1009	 * Devices before the 82547 had a Packet Buffer of 64K.
1010	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1011	 * After the 82547 the buffer was reduced to 40K.
1012	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1013	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1014	 */
1015	switch (adapter->hw.mac_type) {
1016	case em_82547:
1017	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1018		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1019			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1020		else
1021			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1022		adapter->tx_fifo_head = 0;
1023		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1024		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1025		break;
1026	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1027	case em_82571: /* 82571: Total Packet Buffer is 48K */
1028	case em_82572: /* 82572: Total Packet Buffer is 48K */
1029			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1030		break;
1031	case em_82573: /* 82573: Total Packet Buffer is 32K */
1032		/* Jumbo frames not supported */
1033			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1034		break;
1035	case em_ich8lan:
1036		pba = E1000_PBA_8K;
1037		break;
1038	default:
1039		/* Devices before 82547 had a Packet Buffer of 64K.   */
1040		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1041			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1042		else
1043			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1044	}
1045
1046	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1047	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1048
1049	/* Get the latest mac address, User can use a LAA */
1050	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1051
1052	/* Initialize the hardware */
1053	if (em_hardware_init(adapter)) {
1054		device_printf(dev, "Unable to initialize the hardware\n");
1055		return;
1056	}
1057	em_update_link_status(adapter);
1058
1059	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1060		em_enable_vlans(adapter);
1061
1062	ifp->if_hwassist = 0;
1063	if (adapter->hw.mac_type >= em_82543) {
1064		if (ifp->if_capenable & IFCAP_TXCSUM)
1065			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1066		/*
1067		 * em_setup_transmit_structures() will behave differently
1068		 * based on the state of TSO.
1069		 */
1070		if (ifp->if_capenable & IFCAP_TSO)
1071			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1072	}
1073
1074	/* Prepare transmit descriptors and buffers */
1075	if (em_setup_transmit_structures(adapter)) {
1076		device_printf(dev, "Could not setup transmit structures\n");
1077		em_stop(adapter);
1078		return;
1079	}
1080	em_initialize_transmit_unit(adapter);
1081
1082	/* Setup Multicast table */
1083	em_set_multi(adapter);
1084
1085	/* Prepare receive descriptors and buffers */
1086	if (em_setup_receive_structures(adapter)) {
1087		device_printf(dev, "Could not setup receive structures\n");
1088		em_stop(adapter);
1089		return;
1090	}
1091	em_initialize_receive_unit(adapter);
1092
1093	/* Don't lose promiscuous settings */
1094	em_set_promisc(adapter);
1095
1096	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1097	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1098
1099	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1100	em_clear_hw_cntrs(&adapter->hw);
1101#ifdef DEVICE_POLLING
1102	/*
1103	 * Only enable interrupts if we are not polling, make sure
1104	 * they are off otherwise.
1105	 */
1106	if (ifp->if_capenable & IFCAP_POLLING)
1107		em_disable_intr(adapter);
1108	else
1109#endif /* DEVICE_POLLING */
1110		em_enable_intr(adapter);
1111
1112	/* Don't reset the phy next time init gets called */
1113	adapter->hw.phy_reset_disable = TRUE;
1114}
1115
1116static void
1117em_init(void *arg)
1118{
1119	struct adapter *adapter = arg;
1120
1121	EM_LOCK(adapter);
1122	em_init_locked(adapter);
1123	EM_UNLOCK(adapter);
1124}
1125
1126
1127#ifdef DEVICE_POLLING
1128/*********************************************************************
1129 *
1130 *  Legacy polling routine
1131 *
1132 *********************************************************************/
1133static void
1134em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1135{
1136	struct adapter *adapter = ifp->if_softc;
1137	uint32_t reg_icr;
1138
1139	EM_LOCK(adapter);
1140	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1141		EM_UNLOCK(adapter);
1142		return;
1143	}
1144
1145	if (cmd == POLL_AND_CHECK_STATUS) {
1146		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1147		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1148			callout_stop(&adapter->timer);
1149			adapter->hw.get_link_status = 1;
1150			em_check_for_link(&adapter->hw);
1151			em_update_link_status(adapter);
1152			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1153		}
1154	}
1155	em_rxeof(adapter, count);
1156	em_txeof(adapter);
1157
1158	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1159		em_start_locked(ifp);
1160	EM_UNLOCK(adapter);
1161}
1162
1163/*********************************************************************
1164 *
1165 *  Legacy Interrupt Service routine
1166 *
1167 *********************************************************************/
1168static void
1169em_intr(void *arg)
1170{
1171	struct adapter	*adapter = arg;
1172	struct ifnet	*ifp;
1173	uint32_t	reg_icr;
1174
1175	EM_LOCK(adapter);
1176
1177	ifp = adapter->ifp;
1178
1179	if (ifp->if_capenable & IFCAP_POLLING) {
1180		EM_UNLOCK(adapter);
1181		return;
1182	}
1183
1184	for (;;) {
1185		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1186		if (adapter->hw.mac_type >= em_82571 &&
1187		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1188			break;
1189		else if (reg_icr == 0)
1190			break;
1191
1192		/*
1193		 * XXX: some laptops trigger several spurious interrupts
1194		 * on em(4) when in the resume cycle. The ICR register
1195		 * reports all-ones value in this case. Processing such
1196		 * interrupts would lead to a freeze. I don't know why.
1197		 */
1198		if (reg_icr == 0xffffffff)
1199			break;
1200
1201		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1202			em_rxeof(adapter, -1);
1203			em_txeof(adapter);
1204		}
1205
1206		/* Link status change */
1207		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1208			callout_stop(&adapter->timer);
1209			adapter->hw.get_link_status = 1;
1210			em_check_for_link(&adapter->hw);
1211			em_update_link_status(adapter);
1212			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1213		}
1214
1215		if (reg_icr & E1000_ICR_RXO)
1216			adapter->rx_overruns++;
1217	}
1218
1219	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1220	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1221		em_start_locked(ifp);
1222
1223	EM_UNLOCK(adapter);
1224}
1225
1226#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1227
1228static void
1229em_handle_link(void *context, int pending)
1230{
1231	struct adapter	*adapter = context;
1232	struct ifnet *ifp;
1233
1234	ifp = adapter->ifp;
1235
1236	EM_LOCK(adapter);
1237
1238	callout_stop(&adapter->timer);
1239	adapter->hw.get_link_status = 1;
1240	em_check_for_link(&adapter->hw);
1241	em_update_link_status(adapter);
1242	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1243	EM_UNLOCK(adapter);
1244}
1245
1246static void
1247em_handle_rxtx(void *context, int pending)
1248{
1249	struct adapter	*adapter = context;
1250	struct ifnet	*ifp;
1251
1252	NET_LOCK_GIANT();
1253	ifp = adapter->ifp;
1254
1255	/*
1256	 * TODO:
1257	 * It should be possible to run the tx clean loop without the lock.
1258	 */
1259	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1260		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1261			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1262		EM_LOCK(adapter);
1263		em_txeof(adapter);
1264
1265		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1266			em_start_locked(ifp);
1267		EM_UNLOCK(adapter);
1268	}
1269
1270	em_enable_intr(adapter);
1271	NET_UNLOCK_GIANT();
1272}
1273
1274/*********************************************************************
1275 *
1276 *  Fast Interrupt Service routine
1277 *
1278 *********************************************************************/
1279static void
1280em_intr_fast(void *arg)
1281{
1282	struct adapter	*adapter = arg;
1283	struct ifnet	*ifp;
1284	uint32_t	reg_icr;
1285
1286	ifp = adapter->ifp;
1287
1288	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1289
1290	/* Hot eject?  */
1291	if (reg_icr == 0xffffffff)
1292		return;
1293
1294	/* Definitely not our interrupt.  */
1295	if (reg_icr == 0x0)
1296		return;
1297
1298	/*
1299	 * Starting with the 82571 chip, bit 31 should be used to
1300	 * determine whether the interrupt belongs to us.
1301	 */
1302	if (adapter->hw.mac_type >= em_82571 &&
1303	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1304		return;
1305
1306	/*
1307	 * Mask interrupts until the taskqueue is finished running.  This is
1308	 * cheap, just assume that it is needed.  This also works around the
1309	 * MSI message reordering errata on certain systems.
1310	 */
1311	em_disable_intr(adapter);
1312	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1313
1314	/* Link status change */
1315	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1316		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1317
1318	if (reg_icr & E1000_ICR_RXO)
1319		adapter->rx_overruns++;
1320}
1321#endif /* ! DEVICE_POLLING */
1322
1323/*********************************************************************
1324 *
1325 *  Media Ioctl callback
1326 *
1327 *  This routine is called whenever the user queries the status of
1328 *  the interface using ifconfig.
1329 *
1330 **********************************************************************/
1331static void
1332em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1333{
1334	struct adapter *adapter = ifp->if_softc;
1335
1336	INIT_DEBUGOUT("em_media_status: begin");
1337
1338	EM_LOCK(adapter);
1339	em_check_for_link(&adapter->hw);
1340	em_update_link_status(adapter);
1341
1342	ifmr->ifm_status = IFM_AVALID;
1343	ifmr->ifm_active = IFM_ETHER;
1344
1345	if (!adapter->link_active) {
1346		EM_UNLOCK(adapter);
1347		return;
1348	}
1349
1350	ifmr->ifm_status |= IFM_ACTIVE;
1351
1352	if ((adapter->hw.media_type == em_media_type_fiber) ||
1353	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1354		if (adapter->hw.mac_type == em_82545)
1355			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1356		else
1357			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1358	} else {
1359		switch (adapter->link_speed) {
1360		case 10:
1361			ifmr->ifm_active |= IFM_10_T;
1362			break;
1363		case 100:
1364			ifmr->ifm_active |= IFM_100_TX;
1365			break;
1366		case 1000:
1367			ifmr->ifm_active |= IFM_1000_T;
1368			break;
1369		}
1370		if (adapter->link_duplex == FULL_DUPLEX)
1371			ifmr->ifm_active |= IFM_FDX;
1372		else
1373			ifmr->ifm_active |= IFM_HDX;
1374	}
1375	EM_UNLOCK(adapter);
1376}
1377
1378/*********************************************************************
1379 *
1380 *  Media Ioctl callback
1381 *
1382 *  This routine is called when the user changes speed/duplex using
1383 *  media/mediopt option with ifconfig.
1384 *
1385 **********************************************************************/
1386static int
1387em_media_change(struct ifnet *ifp)
1388{
1389	struct adapter *adapter = ifp->if_softc;
1390	struct ifmedia  *ifm = &adapter->media;
1391
1392	INIT_DEBUGOUT("em_media_change: begin");
1393
1394	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1395		return (EINVAL);
1396
1397	EM_LOCK(adapter);
1398	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1399	case IFM_AUTO:
1400		adapter->hw.autoneg = DO_AUTO_NEG;
1401		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1402		break;
1403	case IFM_1000_LX:
1404	case IFM_1000_SX:
1405	case IFM_1000_T:
1406		adapter->hw.autoneg = DO_AUTO_NEG;
1407		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1408		break;
1409	case IFM_100_TX:
1410		adapter->hw.autoneg = FALSE;
1411		adapter->hw.autoneg_advertised = 0;
1412		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1413			adapter->hw.forced_speed_duplex = em_100_full;
1414		else
1415			adapter->hw.forced_speed_duplex = em_100_half;
1416		break;
1417	case IFM_10_T:
1418		adapter->hw.autoneg = FALSE;
1419		adapter->hw.autoneg_advertised = 0;
1420		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1421			adapter->hw.forced_speed_duplex = em_10_full;
1422		else
1423			adapter->hw.forced_speed_duplex = em_10_half;
1424		break;
1425	default:
1426		device_printf(adapter->dev, "Unsupported media type\n");
1427	}
1428
1429	/* As the speed/duplex settings my have changed we need to
1430	 * reset the PHY.
1431	 */
1432	adapter->hw.phy_reset_disable = FALSE;
1433
1434	em_init_locked(adapter);
1435	EM_UNLOCK(adapter);
1436
1437	return (0);
1438}
1439
1440/*********************************************************************
1441 *
1442 *  This routine maps the mbufs to tx descriptors.
1443 *
1444 *  return 0 on success, positive on failure
1445 **********************************************************************/
1446static int
1447em_encap(struct adapter *adapter, struct mbuf **m_headp)
1448{
1449	struct ifnet		*ifp = adapter->ifp;
1450	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1451	bus_dmamap_t		map;
1452	struct em_buffer	*tx_buffer, *tx_buffer_last;
1453	struct em_tx_desc	*current_tx_desc;
1454	struct mbuf		*m_head;
1455	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1456	int			nsegs, i, j;
1457	int			error, do_tso, tso_desc = 0;
1458
1459	m_head = *m_headp;
1460	current_tx_desc = NULL;
1461	txd_upper = txd_lower = txd_used = txd_saved = 0;
1462
1463	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1464
1465	/*
1466	 * Force a cleanup if number of TX descriptors
1467	 * available hits the threshold.
1468	 */
1469	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1470		em_txeof(adapter);
1471		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1472			adapter->no_tx_desc_avail1++;
1473			return (ENOBUFS);
1474		}
1475	}
1476
1477	/*
1478	 * When operating in promiscuous mode, hardware encapsulation for
1479	 * packets is disabled.  This means we have to add the vlan
1480	 * encapsulation in the driver, since it will have come down from the
1481	 * VLAN layer with a tag instead of a VLAN header.
1482	 */
1483	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1484		struct ether_vlan_header *evl;
1485		struct ether_header eh;
1486
1487		m_head = m_pullup(m_head, sizeof(eh));
1488		if (m_head == NULL) {
1489			*m_headp = NULL;
1490			return (ENOBUFS);
1491		}
1492		eh = *mtod(m_head, struct ether_header *);
1493		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1494		if (m_head == NULL) {
1495			*m_headp = NULL;
1496			return (ENOBUFS);
1497		}
1498		m_head = m_pullup(m_head, sizeof(*evl));
1499		if (m_head == NULL) {
1500			*m_headp = NULL;
1501			return (ENOBUFS);
1502		}
1503		evl = mtod(m_head, struct ether_vlan_header *);
1504		bcopy(&eh, evl, sizeof(*evl));
1505		evl->evl_proto = evl->evl_encap_proto;
1506		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1507		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1508		*m_headp = m_head;
1509	}
1510
1511	/*
1512	 * TSO workaround:
1513	 *  If an mbuf is only header we need
1514	 *     to pull 4 bytes of data into it.
1515	 */
1516	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1517		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1518		*m_headp = m_head;
1519		if (m_head == NULL) {
1520			return (ENOBUFS);
1521		}
1522	}
1523
1524	/*
1525	 * Map the packet for DMA.
1526	 */
1527	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1528	tx_buffer_last = tx_buffer;
1529	map = tx_buffer->map;
1530
1531	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1532	    &nsegs, BUS_DMA_NOWAIT);
1533
1534	/*
1535	 * There are two types of errors we can (try) to handle:
1536	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1537	 *   out of segments.  Defragment the mbuf chain and try again.
1538	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1539	 *   at this point in time.  Defer sending and try again later.
1540	 * All other errors, in particular EINVAL, are fatal and prevent the
1541	 * mbuf chain from ever going through.  Drop it and report error.
1542	 */
1543	if (error == EFBIG) {
1544		struct mbuf *m;
1545
1546		m = m_defrag(*m_headp, M_DONTWAIT);
1547		if (m == NULL) {
1548			/* Assume m_defrag(9) used only m_get(9). */
1549			adapter->mbuf_alloc_failed++;
1550			m_freem(*m_headp);
1551			*m_headp = NULL;
1552			return (ENOBUFS);
1553		}
1554		*m_headp = m;
1555
1556		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1557		    segs, &nsegs, BUS_DMA_NOWAIT);
1558
1559		if (error == ENOMEM) {
1560			adapter->no_tx_dma_setup++;
1561			return (error);
1562		} else if (error != 0) {
1563			adapter->no_tx_dma_setup++;
1564			m_freem(*m_headp);
1565			*m_headp = NULL;
1566			return (error);
1567		}
1568	} else if (error == ENOMEM) {
1569		adapter->no_tx_dma_setup++;
1570		return (error);
1571	} else if (error != 0) {
1572		adapter->no_tx_dma_setup++;
1573		m_freem(*m_headp);
1574		*m_headp = NULL;
1575		return (error);
1576	}
1577
1578	/*
1579	 * TSO Hardware workaround, if this packet is not
1580	 * TSO, and is only a single descriptor long, and
1581	 * it follows a TSO burst, then we need to add a
1582	 * sentinel descriptor to prevent premature writeback.
1583	 */
1584	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1585		if (nsegs == 1)
1586			tso_desc = TRUE;
1587		adapter->tx_tso = FALSE;
1588	}
1589
1590	if (nsegs > adapter->num_tx_desc_avail - 2) {
1591		adapter->no_tx_desc_avail2++;
1592		bus_dmamap_unload(adapter->txtag, map);
1593		return (ENOBUFS);
1594	}
1595	m_head = *m_headp;
1596
1597	/* Do hardware assists */
1598	if (ifp->if_hwassist) {
1599		if (do_tso &&
1600		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1601			/* we need to make a final sentinel transmit desc */
1602			tso_desc = TRUE;
1603		} else
1604			em_transmit_checksum_setup(adapter,  m_head,
1605			    &txd_upper, &txd_lower);
1606	}
1607
1608	i = adapter->next_avail_tx_desc;
1609	if (adapter->pcix_82544)
1610		txd_saved = i;
1611
1612	for (j = 0; j < nsegs; j++) {
1613		bus_size_t seg_len;
1614		bus_addr_t seg_addr;
1615		/* If adapter is 82544 and on PCIX bus. */
1616		if(adapter->pcix_82544) {
1617			DESC_ARRAY	desc_array;
1618			uint32_t	array_elements, counter;
1619
1620			/*
1621			 * Check the Address and Length combination and
1622			 * split the data accordingly
1623			 */
1624			array_elements = em_fill_descriptors(segs[j].ds_addr,
1625			    segs[j].ds_len, &desc_array);
1626			for (counter = 0; counter < array_elements; counter++) {
1627				if (txd_used == adapter->num_tx_desc_avail) {
1628					adapter->next_avail_tx_desc = txd_saved;
1629					adapter->no_tx_desc_avail2++;
1630					bus_dmamap_unload(adapter->txtag, map);
1631					return (ENOBUFS);
1632				}
1633				tx_buffer = &adapter->tx_buffer_area[i];
1634				current_tx_desc = &adapter->tx_desc_base[i];
1635				current_tx_desc->buffer_addr = htole64(
1636					desc_array.descriptor[counter].address);
1637				current_tx_desc->lower.data = htole32(
1638					(adapter->txd_cmd | txd_lower |
1639					(uint16_t)desc_array.descriptor[counter].length));
1640				current_tx_desc->upper.data = htole32((txd_upper));
1641				if (++i == adapter->num_tx_desc)
1642					i = 0;
1643
1644				tx_buffer->m_head = NULL;
1645				txd_used++;
1646			}
1647		} else {
1648			tx_buffer = &adapter->tx_buffer_area[i];
1649			current_tx_desc = &adapter->tx_desc_base[i];
1650			seg_addr = htole64(segs[j].ds_addr);
1651			seg_len  = segs[j].ds_len;
1652			/*
1653			** TSO Workaround:
1654			** If this is the last descriptor, we want to
1655			** split it so we have a small final sentinel
1656			*/
1657			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1658				seg_len -= 4;
1659				current_tx_desc->buffer_addr = seg_addr;
1660				current_tx_desc->lower.data = htole32(
1661				adapter->txd_cmd | txd_lower | seg_len);
1662				current_tx_desc->upper.data =
1663				    htole32(txd_upper);
1664				if (++i == adapter->num_tx_desc)
1665					i = 0;
1666				/* Now make the sentinel */
1667				++txd_used; /* using an extra txd */
1668				current_tx_desc = &adapter->tx_desc_base[i];
1669				tx_buffer = &adapter->tx_buffer_area[i];
1670				current_tx_desc->buffer_addr =
1671				    seg_addr + seg_len;
1672				current_tx_desc->lower.data = htole32(
1673				adapter->txd_cmd | txd_lower | 4);
1674				current_tx_desc->upper.data =
1675				    htole32(txd_upper);
1676				if (++i == adapter->num_tx_desc)
1677					i = 0;
1678			} else {
1679				current_tx_desc->buffer_addr = seg_addr;
1680				current_tx_desc->lower.data = htole32(
1681				adapter->txd_cmd | txd_lower | seg_len);
1682				current_tx_desc->upper.data =
1683				    htole32(txd_upper);
1684				if (++i == adapter->num_tx_desc)
1685					i = 0;
1686			}
1687			tx_buffer->m_head = NULL;
1688		}
1689	}
1690
1691	adapter->next_avail_tx_desc = i;
1692	if (adapter->pcix_82544)
1693		adapter->num_tx_desc_avail -= txd_used;
1694	else {
1695		adapter->num_tx_desc_avail -= nsegs;
1696		if (tso_desc) /* TSO used an extra for sentinel */
1697			adapter->num_tx_desc_avail -= txd_used;
1698	}
1699
1700	if (m_head->m_flags & M_VLANTAG) {
1701		/* Set the vlan id. */
1702		current_tx_desc->upper.fields.special =
1703		    htole16(m_head->m_pkthdr.ether_vtag);
1704
1705		/* Tell hardware to add tag. */
1706		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1707	}
1708
1709	tx_buffer->m_head = m_head;
1710	tx_buffer_last->map = tx_buffer->map;
1711	tx_buffer->map = map;
1712	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1713
1714	/*
1715	 * Last Descriptor of Packet needs End Of Packet (EOP).
1716	 */
1717	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1718
1719	/*
1720	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1721	 * that this frame is available to transmit.
1722	 */
1723	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1724	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1725	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1726		em_82547_move_tail_locked(adapter);
1727	else {
1728		E1000_WRITE_REG(&adapter->hw, TDT, i);
1729		if (adapter->hw.mac_type == em_82547)
1730			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1731	}
1732
1733	return (0);
1734}
1735
1736/*********************************************************************
1737 *
1738 * 82547 workaround to avoid controller hang in half-duplex environment.
1739 * The workaround is to avoid queuing a large packet that would span
1740 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1741 * in this case. We do that only when FIFO is quiescent.
1742 *
1743 **********************************************************************/
1744static void
1745em_82547_move_tail_locked(struct adapter *adapter)
1746{
1747	uint16_t hw_tdt;
1748	uint16_t sw_tdt;
1749	struct em_tx_desc *tx_desc;
1750	uint16_t length = 0;
1751	boolean_t eop = 0;
1752
1753	EM_LOCK_ASSERT(adapter);
1754
1755	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1756	sw_tdt = adapter->next_avail_tx_desc;
1757
1758	while (hw_tdt != sw_tdt) {
1759		tx_desc = &adapter->tx_desc_base[hw_tdt];
1760		length += tx_desc->lower.flags.length;
1761		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1762		if(++hw_tdt == adapter->num_tx_desc)
1763			hw_tdt = 0;
1764
1765		if (eop) {
1766			if (em_82547_fifo_workaround(adapter, length)) {
1767				adapter->tx_fifo_wrk_cnt++;
1768				callout_reset(&adapter->tx_fifo_timer, 1,
1769					em_82547_move_tail, adapter);
1770				break;
1771			}
1772			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1773			em_82547_update_fifo_head(adapter, length);
1774			length = 0;
1775		}
1776	}
1777}
1778
1779static void
1780em_82547_move_tail(void *arg)
1781{
1782	struct adapter *adapter = arg;
1783
1784	EM_LOCK(adapter);
1785	em_82547_move_tail_locked(adapter);
1786	EM_UNLOCK(adapter);
1787}
1788
1789static int
1790em_82547_fifo_workaround(struct adapter *adapter, int len)
1791{
1792	int fifo_space, fifo_pkt_len;
1793
1794	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1795
1796	if (adapter->link_duplex == HALF_DUPLEX) {
1797		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1798
1799		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1800			if (em_82547_tx_fifo_reset(adapter))
1801				return (0);
1802			else
1803				return (1);
1804		}
1805	}
1806
1807	return (0);
1808}
1809
1810static void
1811em_82547_update_fifo_head(struct adapter *adapter, int len)
1812{
1813	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1814
1815	/* tx_fifo_head is always 16 byte aligned */
1816	adapter->tx_fifo_head += fifo_pkt_len;
1817	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1818		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1819	}
1820}
1821
1822
1823static int
1824em_82547_tx_fifo_reset(struct adapter *adapter)
1825{
1826	uint32_t tctl;
1827
1828	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1829	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1830	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1831	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1832
1833		/* Disable TX unit */
1834		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1835		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1836
1837		/* Reset FIFO pointers */
1838		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1839		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1840		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1841		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1842
1843		/* Re-enable TX unit */
1844		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1845		E1000_WRITE_FLUSH(&adapter->hw);
1846
1847		adapter->tx_fifo_head = 0;
1848		adapter->tx_fifo_reset_cnt++;
1849
1850		return (TRUE);
1851	}
1852	else {
1853		return (FALSE);
1854	}
1855}
1856
1857static void
1858em_set_promisc(struct adapter *adapter)
1859{
1860	struct ifnet	*ifp = adapter->ifp;
1861	uint32_t	reg_rctl;
1862
1863	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1864
1865	if (ifp->if_flags & IFF_PROMISC) {
1866		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1867		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1868		/* Disable VLAN stripping in promiscous mode
1869		 * This enables bridging of vlan tagged frames to occur
1870		 * and also allows vlan tags to be seen in tcpdump
1871		 */
1872		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1873			em_disable_vlans(adapter);
1874		adapter->em_insert_vlan_header = 1;
1875	} else if (ifp->if_flags & IFF_ALLMULTI) {
1876		reg_rctl |= E1000_RCTL_MPE;
1877		reg_rctl &= ~E1000_RCTL_UPE;
1878		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1879		adapter->em_insert_vlan_header = 0;
1880	} else
1881		adapter->em_insert_vlan_header = 0;
1882}
1883
1884static void
1885em_disable_promisc(struct adapter *adapter)
1886{
1887	struct ifnet	*ifp = adapter->ifp;
1888	uint32_t	reg_rctl;
1889
1890	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1891
1892	reg_rctl &=  (~E1000_RCTL_UPE);
1893	reg_rctl &=  (~E1000_RCTL_MPE);
1894	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1895
1896	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1897		em_enable_vlans(adapter);
1898	adapter->em_insert_vlan_header = 0;
1899}
1900
1901
1902/*********************************************************************
1903 *  Multicast Update
1904 *
1905 *  This routine is called whenever multicast address list is updated.
1906 *
1907 **********************************************************************/
1908
1909static void
1910em_set_multi(struct adapter *adapter)
1911{
1912	struct ifnet	*ifp = adapter->ifp;
1913	struct ifmultiaddr *ifma;
1914	uint32_t reg_rctl = 0;
1915	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1916	int mcnt = 0;
1917
1918	IOCTL_DEBUGOUT("em_set_multi: begin");
1919
1920	if (adapter->hw.mac_type == em_82542_rev2_0) {
1921		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1922		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1923			em_pci_clear_mwi(&adapter->hw);
1924		reg_rctl |= E1000_RCTL_RST;
1925		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1926		msec_delay(5);
1927	}
1928
1929	IF_ADDR_LOCK(ifp);
1930	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1931		if (ifma->ifma_addr->sa_family != AF_LINK)
1932			continue;
1933
1934		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1935			break;
1936
1937		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1938		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1939		mcnt++;
1940	}
1941	IF_ADDR_UNLOCK(ifp);
1942
1943	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1944		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1945		reg_rctl |= E1000_RCTL_MPE;
1946		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1947	} else
1948		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1949
1950	if (adapter->hw.mac_type == em_82542_rev2_0) {
1951		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1952		reg_rctl &= ~E1000_RCTL_RST;
1953		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1954		msec_delay(5);
1955		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1956			em_pci_set_mwi(&adapter->hw);
1957	}
1958}
1959
1960
1961/*********************************************************************
1962 *  Timer routine
1963 *
1964 *  This routine checks for link status and updates statistics.
1965 *
1966 **********************************************************************/
1967
1968static void
1969em_local_timer(void *arg)
1970{
1971	struct adapter	*adapter = arg;
1972	struct ifnet	*ifp = adapter->ifp;
1973
1974	EM_LOCK(adapter);
1975
1976	em_check_for_link(&adapter->hw);
1977	em_update_link_status(adapter);
1978	em_update_stats_counters(adapter);
1979	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1980		em_print_hw_stats(adapter);
1981	em_smartspeed(adapter);
1982
1983	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1984
1985	EM_UNLOCK(adapter);
1986}
1987
1988static void
1989em_update_link_status(struct adapter *adapter)
1990{
1991	struct ifnet *ifp = adapter->ifp;
1992	device_t dev = adapter->dev;
1993
1994	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1995		if (adapter->link_active == 0) {
1996			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1997			    &adapter->link_duplex);
1998			/* Check if we may set SPEED_MODE bit on PCI-E */
1999			if ((adapter->link_speed == SPEED_1000) &&
2000			    ((adapter->hw.mac_type == em_82571) ||
2001			    (adapter->hw.mac_type == em_82572))) {
2002				int tarc0;
2003
2004				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2005				tarc0 |= SPEED_MODE_BIT;
2006				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2007			}
2008			if (bootverbose)
2009				device_printf(dev, "Link is up %d Mbps %s\n",
2010				    adapter->link_speed,
2011				    ((adapter->link_duplex == FULL_DUPLEX) ?
2012				    "Full Duplex" : "Half Duplex"));
2013			adapter->link_active = 1;
2014			adapter->smartspeed = 0;
2015			ifp->if_baudrate = adapter->link_speed * 1000000;
2016			if_link_state_change(ifp, LINK_STATE_UP);
2017		}
2018	} else {
2019		if (adapter->link_active == 1) {
2020			ifp->if_baudrate = adapter->link_speed = 0;
2021			adapter->link_duplex = 0;
2022			if (bootverbose)
2023				device_printf(dev, "Link is Down\n");
2024			adapter->link_active = 0;
2025			if_link_state_change(ifp, LINK_STATE_DOWN);
2026		}
2027	}
2028}
2029
2030/*********************************************************************
2031 *
2032 *  This routine disables all traffic on the adapter by issuing a
2033 *  global reset on the MAC and deallocates TX/RX buffers.
2034 *
2035 **********************************************************************/
2036
2037static void
2038em_stop(void *arg)
2039{
2040	struct adapter	*adapter = arg;
2041	struct ifnet	*ifp = adapter->ifp;
2042
2043	EM_LOCK_ASSERT(adapter);
2044
2045	INIT_DEBUGOUT("em_stop: begin");
2046
2047	em_disable_intr(adapter);
2048	em_reset_hw(&adapter->hw);
2049	callout_stop(&adapter->timer);
2050	callout_stop(&adapter->tx_fifo_timer);
2051	em_free_transmit_structures(adapter);
2052	em_free_receive_structures(adapter);
2053
2054	/* Tell the stack that the interface is no longer active */
2055	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2056}
2057
2058
2059/********************************************************************
2060 *
2061 *  Determine hardware revision.
2062 *
2063 **********************************************************************/
2064static void
2065em_identify_hardware(struct adapter *adapter)
2066{
2067	device_t dev = adapter->dev;
2068
2069	/* Make sure our PCI config space has the necessary stuff set */
2070	pci_enable_busmaster(dev);
2071	pci_enable_io(dev, SYS_RES_MEMORY);
2072	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2073
2074	/* Save off the information about this board */
2075	adapter->hw.vendor_id = pci_get_vendor(dev);
2076	adapter->hw.device_id = pci_get_device(dev);
2077	adapter->hw.revision_id = pci_get_revid(dev);
2078	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2079	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2080
2081	/* Identify the MAC */
2082	if (em_set_mac_type(&adapter->hw))
2083		device_printf(dev, "Unknown MAC Type\n");
2084
2085	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2086	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2087		adapter->hw.phy_init_script = TRUE;
2088}
2089
2090static int
2091em_allocate_pci_resources(struct adapter *adapter)
2092{
2093	device_t	dev = adapter->dev;
2094	int		val, rid;
2095
2096	rid = PCIR_BAR(0);
2097	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2098	    &rid, RF_ACTIVE);
2099	if (adapter->res_memory == NULL) {
2100		device_printf(dev, "Unable to allocate bus resource: memory\n");
2101		return (ENXIO);
2102	}
2103	adapter->osdep.mem_bus_space_tag =
2104	rman_get_bustag(adapter->res_memory);
2105	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2106	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2107
2108	if (adapter->hw.mac_type > em_82543) {
2109		/* Figure our where our IO BAR is ? */
2110		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2111			val = pci_read_config(dev, rid, 4);
2112			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2113				adapter->io_rid = rid;
2114				break;
2115			}
2116			rid += 4;
2117			/* check for 64bit BAR */
2118			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2119				rid += 4;
2120		}
2121		if (rid >= PCIR_CIS) {
2122			device_printf(dev, "Unable to locate IO BAR\n");
2123			return (ENXIO);
2124		}
2125		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2126		    &adapter->io_rid, RF_ACTIVE);
2127		if (adapter->res_ioport == NULL) {
2128			device_printf(dev, "Unable to allocate bus resource: "
2129			    "ioport\n");
2130			return (ENXIO);
2131		}
2132		adapter->hw.io_base = 0;
2133		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2134		adapter->osdep.io_bus_space_handle =
2135		    rman_get_bushandle(adapter->res_ioport);
2136	}
2137
2138	/* For ICH8 we need to find the flash memory. */
2139	if (adapter->hw.mac_type == em_ich8lan) {
2140		rid = EM_FLASH;
2141
2142		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2143		    &rid, RF_ACTIVE);
2144		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2145		adapter->osdep.flash_bus_space_handle =
2146		    rman_get_bushandle(adapter->flash_mem);
2147	}
2148
2149	rid = 0x0;
2150	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2151	    RF_SHAREABLE | RF_ACTIVE);
2152	if (adapter->res_interrupt == NULL) {
2153		device_printf(dev, "Unable to allocate bus resource: "
2154		    "interrupt\n");
2155		return (ENXIO);
2156	}
2157
2158	adapter->hw.back = &adapter->osdep;
2159
2160	return (0);
2161}
2162
2163int
2164em_allocate_intr(struct adapter *adapter)
2165{
2166	device_t dev = adapter->dev;
2167	int error;
2168
2169	/* Manually turn off all interrupts */
2170	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2171
2172#ifdef DEVICE_POLLING
2173	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2174	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2175	    &adapter->int_handler_tag)) != 0) {
2176		device_printf(dev, "Failed to register interrupt handler");
2177		return (error);
2178	}
2179#else
2180	/*
2181	 * Try allocating a fast interrupt and the associated deferred
2182	 * processing contexts.
2183	 */
2184	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2185	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2186	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2187	    taskqueue_thread_enqueue, &adapter->tq);
2188	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2189	    device_get_nameunit(adapter->dev));
2190	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2191	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2192	    &adapter->int_handler_tag)) != 0) {
2193		device_printf(dev, "Failed to register fast interrupt "
2194			    "handler: %d\n", error);
2195		taskqueue_free(adapter->tq);
2196		adapter->tq = NULL;
2197		return (error);
2198	}
2199#endif
2200
2201	em_enable_intr(adapter);
2202	return (0);
2203}
2204
2205static void
2206em_free_intr(struct adapter *adapter)
2207{
2208	device_t dev = adapter->dev;
2209
2210	if (adapter->int_handler_tag != NULL) {
2211		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2212		adapter->int_handler_tag = NULL;
2213	}
2214	if (adapter->tq != NULL) {
2215		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2216		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2217		taskqueue_free(adapter->tq);
2218		adapter->tq = NULL;
2219	}
2220}
2221
2222static void
2223em_free_pci_resources(struct adapter *adapter)
2224{
2225	device_t dev = adapter->dev;
2226
2227	if (adapter->res_interrupt != NULL)
2228		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2229
2230	if (adapter->res_memory != NULL)
2231		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2232		    adapter->res_memory);
2233
2234	if (adapter->flash_mem != NULL)
2235		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2236		    adapter->flash_mem);
2237
2238	if (adapter->res_ioport != NULL)
2239		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2240		    adapter->res_ioport);
2241}
2242
2243/*********************************************************************
2244 *
2245 *  Initialize the hardware to a configuration as specified by the
2246 *  adapter structure. The controller is reset, the EEPROM is
2247 *  verified, the MAC address is set, then the shared initialization
2248 *  routines are called.
2249 *
2250 **********************************************************************/
2251static int
2252em_hardware_init(struct adapter *adapter)
2253{
2254	device_t dev = adapter->dev;
2255	uint16_t rx_buffer_size;
2256
2257	INIT_DEBUGOUT("em_hardware_init: begin");
2258	/* Issue a global reset */
2259	em_reset_hw(&adapter->hw);
2260
2261	/* When hardware is reset, fifo_head is also reset */
2262	adapter->tx_fifo_head = 0;
2263
2264	/* Make sure we have a good EEPROM before we read from it */
2265	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2266		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2267		return (EIO);
2268	}
2269
2270	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2271		device_printf(dev, "EEPROM read error while reading part "
2272		    "number\n");
2273		return (EIO);
2274	}
2275
2276	/* Set up smart power down as default off on newer adapters. */
2277	if (!em_smart_pwr_down &&
2278	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2279		uint16_t phy_tmp = 0;
2280
2281		/* Speed up time to link by disabling smart power down. */
2282		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2283		phy_tmp &= ~IGP02E1000_PM_SPD;
2284		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2285	}
2286
2287	/*
2288	 * These parameters control the automatic generation (Tx) and
2289	 * response (Rx) to Ethernet PAUSE frames.
2290	 * - High water mark should allow for at least two frames to be
2291	 *   received after sending an XOFF.
2292	 * - Low water mark works best when it is very near the high water mark.
2293	 *   This allows the receiver to restart by sending XON when it has
2294	 *   drained a bit. Here we use an arbitary value of 1500 which will
2295	 *   restart after one full frame is pulled from the buffer. There
2296	 *   could be several smaller frames in the buffer and if so they will
2297	 *   not trigger the XON until their total number reduces the buffer
2298	 *   by 1500.
2299	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2300	 */
2301	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2302
2303	adapter->hw.fc_high_water = rx_buffer_size -
2304	    roundup2(adapter->hw.max_frame_size, 1024);
2305	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2306	if (adapter->hw.mac_type == em_80003es2lan)
2307		adapter->hw.fc_pause_time = 0xFFFF;
2308	else
2309		adapter->hw.fc_pause_time = 0x1000;
2310	adapter->hw.fc_send_xon = TRUE;
2311	adapter->hw.fc = em_fc_full;
2312
2313	if (em_init_hw(&adapter->hw) < 0) {
2314		device_printf(dev, "Hardware Initialization Failed");
2315		return (EIO);
2316	}
2317
2318	em_check_for_link(&adapter->hw);
2319
2320	return (0);
2321}
2322
2323/*********************************************************************
2324 *
2325 *  Setup networking device structure and register an interface.
2326 *
2327 **********************************************************************/
2328static void
2329em_setup_interface(device_t dev, struct adapter *adapter)
2330{
2331	struct ifnet   *ifp;
2332	INIT_DEBUGOUT("em_setup_interface: begin");
2333
2334	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2335	if (ifp == NULL)
2336		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2337	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2338	ifp->if_mtu = ETHERMTU;
2339	ifp->if_init =  em_init;
2340	ifp->if_softc = adapter;
2341	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2342	ifp->if_ioctl = em_ioctl;
2343	ifp->if_start = em_start;
2344	ifp->if_watchdog = em_watchdog;
2345	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2346	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2347	IFQ_SET_READY(&ifp->if_snd);
2348
2349	ether_ifattach(ifp, adapter->hw.mac_addr);
2350
2351	ifp->if_capabilities = ifp->if_capenable = 0;
2352
2353	if (adapter->hw.mac_type >= em_82543) {
2354		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2355		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2356	}
2357
2358	/* Enable TSO if available */
2359	if ((adapter->hw.mac_type > em_82544) &&
2360	    (adapter->hw.mac_type != em_82547)) {
2361		ifp->if_capabilities |= IFCAP_TSO4;
2362		ifp->if_capenable |= IFCAP_TSO4;
2363	}
2364
2365	/*
2366	 * Tell the upper layer(s) we support long frames.
2367	 */
2368	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2369	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2370	ifp->if_capenable |= IFCAP_VLAN_MTU;
2371
2372#ifdef DEVICE_POLLING
2373	ifp->if_capabilities |= IFCAP_POLLING;
2374#endif
2375
2376	/*
2377	 * Specify the media types supported by this adapter and register
2378	 * callbacks to update media and link information
2379	 */
2380	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2381	    em_media_status);
2382	if ((adapter->hw.media_type == em_media_type_fiber) ||
2383	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2384		u_char fiber_type = IFM_1000_SX;	/* default type; */
2385
2386		if (adapter->hw.mac_type == em_82545)
2387			fiber_type = IFM_1000_LX;
2388		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2389		    0, NULL);
2390		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2391	} else {
2392		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2393		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2394			    0, NULL);
2395		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2396			    0, NULL);
2397		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2398			    0, NULL);
2399		if (adapter->hw.phy_type != em_phy_ife) {
2400			ifmedia_add(&adapter->media,
2401				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2402			ifmedia_add(&adapter->media,
2403				IFM_ETHER | IFM_1000_T, 0, NULL);
2404		}
2405	}
2406	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2407	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2408}
2409
2410
2411/*********************************************************************
2412 *
2413 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2414 *
2415 **********************************************************************/
2416static void
2417em_smartspeed(struct adapter *adapter)
2418{
2419	uint16_t phy_tmp;
2420
2421	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2422	    adapter->hw.autoneg == 0 ||
2423	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2424		return;
2425
2426	if (adapter->smartspeed == 0) {
2427		/* If Master/Slave config fault is asserted twice,
2428		 * we assume back-to-back */
2429		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2430		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2431			return;
2432		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2433		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2434			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2435			if(phy_tmp & CR_1000T_MS_ENABLE) {
2436				phy_tmp &= ~CR_1000T_MS_ENABLE;
2437				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2438				    phy_tmp);
2439				adapter->smartspeed++;
2440				if(adapter->hw.autoneg &&
2441				   !em_phy_setup_autoneg(&adapter->hw) &&
2442				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2443				    &phy_tmp)) {
2444					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2445						    MII_CR_RESTART_AUTO_NEG);
2446					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2447					    phy_tmp);
2448				}
2449			}
2450		}
2451		return;
2452	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2453		/* If still no link, perhaps using 2/3 pair cable */
2454		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2455		phy_tmp |= CR_1000T_MS_ENABLE;
2456		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2457		if(adapter->hw.autoneg &&
2458		   !em_phy_setup_autoneg(&adapter->hw) &&
2459		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2460			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2461				    MII_CR_RESTART_AUTO_NEG);
2462			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2463		}
2464	}
2465	/* Restart process after EM_SMARTSPEED_MAX iterations */
2466	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2467		adapter->smartspeed = 0;
2468}
2469
2470
2471/*
2472 * Manage DMA'able memory.
2473 */
2474static void
2475em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2476{
2477	if (error)
2478		return;
2479	*(bus_addr_t *) arg = segs[0].ds_addr;
2480}
2481
2482static int
2483em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2484	int mapflags)
2485{
2486	int error;
2487
2488	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2489				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2490				BUS_SPACE_MAXADDR,	/* lowaddr */
2491				BUS_SPACE_MAXADDR,	/* highaddr */
2492				NULL, NULL,		/* filter, filterarg */
2493				size,			/* maxsize */
2494				1,			/* nsegments */
2495				size,			/* maxsegsize */
2496				0,			/* flags */
2497				NULL,			/* lockfunc */
2498				NULL,			/* lockarg */
2499				&dma->dma_tag);
2500	if (error) {
2501		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2502		    __func__, error);
2503		goto fail_0;
2504	}
2505
2506	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2507	    BUS_DMA_NOWAIT, &dma->dma_map);
2508	if (error) {
2509		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2510		    __func__, (uintmax_t)size, error);
2511		goto fail_2;
2512	}
2513
2514	dma->dma_paddr = 0;
2515	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2516	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2517	if (error || dma->dma_paddr == 0) {
2518		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2519		    __func__, error);
2520		goto fail_3;
2521	}
2522
2523	return (0);
2524
2525fail_3:
2526	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2527fail_2:
2528	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2529	bus_dma_tag_destroy(dma->dma_tag);
2530fail_0:
2531	dma->dma_map = NULL;
2532	dma->dma_tag = NULL;
2533
2534	return (error);
2535}
2536
2537static void
2538em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2539{
2540	if (dma->dma_tag == NULL)
2541		return;
2542	if (dma->dma_map != NULL) {
2543		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2544		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2545		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2546		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2547		dma->dma_map = NULL;
2548	}
2549	bus_dma_tag_destroy(dma->dma_tag);
2550	dma->dma_tag = NULL;
2551}
2552
2553
2554/*********************************************************************
2555 *
2556 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2557 *  the information needed to transmit a packet on the wire.
2558 *
2559 **********************************************************************/
2560static int
2561em_allocate_transmit_structures(struct adapter *adapter)
2562{
2563	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2564	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2565	if (adapter->tx_buffer_area == NULL) {
2566		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2567		return (ENOMEM);
2568	}
2569
2570	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2571
2572	return (0);
2573}
2574
2575/*********************************************************************
2576 *
2577 *  Allocate and initialize transmit structures.
2578 *
2579 **********************************************************************/
2580static int
2581em_setup_transmit_structures(struct adapter *adapter)
2582{
2583	struct ifnet   *ifp = adapter->ifp;
2584	device_t dev = adapter->dev;
2585	struct em_buffer *tx_buffer;
2586	bus_size_t size, segsize;
2587	int error, i;
2588
2589	/*
2590	 * Setup DMA descriptor areas.
2591	 */
2592	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2593
2594	/* Overrides for TSO - want large sizes */
2595	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2596		size = EM_TSO_SIZE;
2597		segsize = PAGE_SIZE;
2598	}
2599
2600	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2601				1, 0,			/* alignment, bounds */
2602				BUS_SPACE_MAXADDR,	/* lowaddr */
2603				BUS_SPACE_MAXADDR,	/* highaddr */
2604				NULL, NULL,		/* filter, filterarg */
2605				size,			/* maxsize */
2606				EM_MAX_SCATTER,		/* nsegments */
2607				segsize,		/* maxsegsize */
2608				0,			/* flags */
2609				NULL,		/* lockfunc */
2610				NULL,		/* lockarg */
2611				&adapter->txtag)) != 0) {
2612		device_printf(dev, "Unable to allocate TX DMA tag\n");
2613		goto fail;
2614	}
2615
2616	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2617		goto fail;
2618
2619	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2620	tx_buffer = adapter->tx_buffer_area;
2621	for (i = 0; i < adapter->num_tx_desc; i++) {
2622		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2623		if (error != 0) {
2624			device_printf(dev, "Unable to create TX DMA map\n");
2625			goto fail;
2626		}
2627		tx_buffer++;
2628	}
2629
2630	adapter->next_avail_tx_desc = 0;
2631	adapter->oldest_used_tx_desc = 0;
2632
2633	/* Set number of descriptors available */
2634	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2635
2636	/* Set checksum context */
2637	adapter->active_checksum_context = OFFLOAD_NONE;
2638	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2639	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2640
2641	return (0);
2642
2643fail:
2644	em_free_transmit_structures(adapter);
2645	return (error);
2646}
2647
2648/*********************************************************************
2649 *
2650 *  Enable transmit unit.
2651 *
2652 **********************************************************************/
2653static void
2654em_initialize_transmit_unit(struct adapter *adapter)
2655{
2656	uint32_t	reg_tctl, reg_tarc;
2657	uint32_t	reg_tipg = 0;
2658	uint64_t	bus_addr;
2659
2660	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2661	/* Setup the Base and Length of the Tx Descriptor Ring */
2662	bus_addr = adapter->txdma.dma_paddr;
2663	E1000_WRITE_REG(&adapter->hw, TDLEN,
2664	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2665	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2666	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2667
2668	/* Setup the HW Tx Head and Tail descriptor pointers */
2669	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2670	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2671
2672
2673	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2674	    E1000_READ_REG(&adapter->hw, TDLEN));
2675
2676	/* Set the default values for the Tx Inter Packet Gap timer */
2677	switch (adapter->hw.mac_type) {
2678	case em_82542_rev2_0:
2679	case em_82542_rev2_1:
2680		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2681		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2682		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2683		break;
2684	case em_80003es2lan:
2685		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2686		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2687		    E1000_TIPG_IPGR2_SHIFT;
2688		break;
2689	default:
2690		if ((adapter->hw.media_type == em_media_type_fiber) ||
2691		    (adapter->hw.media_type == em_media_type_internal_serdes))
2692			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2693		else
2694			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2695		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2696		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2697	}
2698
2699	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2700	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2701	if(adapter->hw.mac_type >= em_82540)
2702		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2703
2704	/* Do adapter specific tweaks before we enable the transmitter. */
2705	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2706		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2707		reg_tarc |= (1 << 25);
2708		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2709		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2710		reg_tarc |= (1 << 25);
2711		reg_tarc &= ~(1 << 28);
2712		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2713	} else if (adapter->hw.mac_type == em_80003es2lan) {
2714		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2715		reg_tarc |= 1;
2716		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2717		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2718		reg_tarc |= 1;
2719		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2720	}
2721
2722	/* Program the Transmit Control Register */
2723	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2724		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2725	if (adapter->hw.mac_type >= em_82571)
2726		reg_tctl |= E1000_TCTL_MULR;
2727	if (adapter->link_duplex == FULL_DUPLEX) {
2728		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2729	} else {
2730		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2731	}
2732	/* This write will effectively turn on the transmit unit. */
2733	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2734
2735	/* Setup Transmit Descriptor Settings for this adapter */
2736	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2737
2738	if (adapter->tx_int_delay.value > 0)
2739		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2740}
2741
2742/*********************************************************************
2743 *
2744 *  Free all transmit related data structures.
2745 *
2746 **********************************************************************/
2747static void
2748em_free_transmit_structures(struct adapter *adapter)
2749{
2750	struct em_buffer *tx_buffer;
2751	int i;
2752
2753	INIT_DEBUGOUT("free_transmit_structures: begin");
2754
2755	if (adapter->tx_buffer_area != NULL) {
2756		tx_buffer = adapter->tx_buffer_area;
2757		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2758			if (tx_buffer->m_head != NULL) {
2759				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2760				    BUS_DMASYNC_POSTWRITE);
2761				bus_dmamap_unload(adapter->txtag,
2762				    tx_buffer->map);
2763				m_freem(tx_buffer->m_head);
2764				tx_buffer->m_head = NULL;
2765			} else if (tx_buffer->map != NULL)
2766				bus_dmamap_unload(adapter->txtag,
2767				    tx_buffer->map);
2768			if (tx_buffer->map != NULL) {
2769				bus_dmamap_destroy(adapter->txtag,
2770				    tx_buffer->map);
2771				tx_buffer->map = NULL;
2772			}
2773		}
2774	}
2775	if (adapter->tx_buffer_area != NULL) {
2776		free(adapter->tx_buffer_area, M_DEVBUF);
2777		adapter->tx_buffer_area = NULL;
2778	}
2779	if (adapter->txtag != NULL) {
2780		bus_dma_tag_destroy(adapter->txtag);
2781		adapter->txtag = NULL;
2782	}
2783}
2784
2785/*********************************************************************
2786 *
2787 *  The offload context needs to be set when we transfer the first
2788 *  packet of a particular protocol (TCP/UDP). We change the
2789 *  context only if the protocol type changes.
2790 *
2791 **********************************************************************/
2792static void
2793em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2794    uint32_t *txd_upper, uint32_t *txd_lower)
2795{
2796	struct em_context_desc *TXD;
2797	struct em_buffer *tx_buffer;
2798	struct ether_vlan_header *eh;
2799	struct ip *ip;
2800	struct ip6_hdr *ip6;
2801	struct tcp_hdr *th;
2802	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2803	uint32_t cmd = 0;
2804	uint16_t etype;
2805	uint8_t ipproto;
2806
2807	/* Setup checksum offload context. */
2808	curr_txd = adapter->next_avail_tx_desc;
2809	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2810	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2811
2812	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2813		     E1000_TXD_DTYP_D;		/* Data descr */
2814
2815	/*
2816	 * Determine where frame payload starts.
2817	 * Jump over vlan headers if already present,
2818	 * helpful for QinQ too.
2819	 */
2820	eh = mtod(mp, struct ether_vlan_header *);
2821	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2822		etype = ntohs(eh->evl_proto);
2823		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2824	} else {
2825		etype = ntohs(eh->evl_encap_proto);
2826		ehdrlen = ETHER_HDR_LEN;
2827	}
2828
2829	/*
2830	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2831	 * TODO: Support SCTP too when it hits the tree.
2832	 */
2833	switch (etype) {
2834	case ETHERTYPE_IP:
2835		ip = (struct ip *)(mp->m_data + ehdrlen);
2836		ip_hlen = ip->ip_hl << 2;
2837
2838		/* Setup of IP header checksum. */
2839		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2840			/*
2841			 * Start offset for header checksum calculation.
2842			 * End offset for header checksum calculation.
2843			 * Offset of place to put the checksum.
2844			 */
2845			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2846			TXD->lower_setup.ip_fields.ipcse =
2847			    htole16(ehdrlen + ip_hlen);
2848			TXD->lower_setup.ip_fields.ipcso =
2849			    ehdrlen + offsetof(struct ip, ip_sum);
2850			cmd |= E1000_TXD_CMD_IP;
2851			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2852		}
2853
2854		if (mp->m_len < ehdrlen + ip_hlen)
2855			return;	/* failure */
2856
2857		hdr_len = ehdrlen + ip_hlen;
2858		ipproto = ip->ip_p;
2859
2860		break;
2861	case ETHERTYPE_IPV6:
2862		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2863		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2864
2865		if (mp->m_len < ehdrlen + ip_hlen)
2866			return;	/* failure */
2867
2868		/* IPv6 doesn't have a header checksum. */
2869
2870		hdr_len = ehdrlen + ip_hlen;
2871		ipproto = ip6->ip6_nxt;
2872
2873		break;
2874	default:
2875		*txd_upper = 0;
2876		*txd_lower = 0;
2877		return;
2878	}
2879
2880	switch (ipproto) {
2881	case IPPROTO_TCP:
2882		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2883			/*
2884			 * Start offset for payload checksum calculation.
2885			 * End offset for payload checksum calculation.
2886			 * Offset of place to put the checksum.
2887			 */
2888			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2889			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2890			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2891			TXD->upper_setup.tcp_fields.tucso =
2892			    hdr_len + offsetof(struct tcphdr, th_sum);
2893			cmd |= E1000_TXD_CMD_TCP;
2894			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2895		}
2896		break;
2897	case IPPROTO_UDP:
2898		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2899			/*
2900			 * Start offset for header checksum calculation.
2901			 * End offset for header checksum calculation.
2902			 * Offset of place to put the checksum.
2903			 */
2904			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2905			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2906			TXD->upper_setup.tcp_fields.tucso =
2907			    hdr_len + offsetof(struct udphdr, uh_sum);
2908			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2909		}
2910		break;
2911	default:
2912		break;
2913	}
2914
2915	TXD->tcp_seg_setup.data = htole32(0);
2916	TXD->cmd_and_length =
2917	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2918	tx_buffer->m_head = NULL;
2919
2920	if (++curr_txd == adapter->num_tx_desc)
2921		curr_txd = 0;
2922
2923	adapter->num_tx_desc_avail--;
2924	adapter->next_avail_tx_desc = curr_txd;
2925}
2926
2927/**********************************************************************
2928 *
2929 *  Setup work for hardware segmentation offload (TSO)
2930 *
2931 **********************************************************************/
2932static boolean_t
2933em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2934   uint32_t *txd_lower)
2935{
2936	struct em_context_desc *TXD;
2937	struct em_buffer *tx_buffer;
2938	struct ether_vlan_header *eh;
2939	struct ip *ip;
2940	struct ip6_hdr *ip6;
2941	struct tcphdr *th;
2942	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2943	uint16_t etype;
2944
2945	/*
2946	 * XXX: This is not really correct as the stack would not have
2947	 * set up all checksums.
2948	 * XXX: Return FALSE is not sufficient as we may have to return
2949	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2950	 * and 1 (success).
2951	 */
2952	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2953		return FALSE;	/* 0 */
2954
2955	/*
2956	 * This function could/should be extended to support IP/IPv6
2957	 * fragmentation as well.  But as they say, one step at a time.
2958	 */
2959
2960	/*
2961	 * Determine where frame payload starts.
2962	 * Jump over vlan headers if already present,
2963	 * helpful for QinQ too.
2964	 */
2965	eh = mtod(mp, struct ether_vlan_header *);
2966	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2967		etype = ntohs(eh->evl_proto);
2968		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2969	} else {
2970		etype = ntohs(eh->evl_encap_proto);
2971		ehdrlen = ETHER_HDR_LEN;
2972	}
2973
2974	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2975	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2976		return FALSE;	/* -1 */
2977
2978	/*
2979	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
2980	 * TODO: Support SCTP too when it hits the tree.
2981	 */
2982	switch (etype) {
2983	case ETHERTYPE_IP:
2984		isip6 = 0;
2985		ip = (struct ip *)(mp->m_data + ehdrlen);
2986		if (ip->ip_p != IPPROTO_TCP)
2987			return FALSE;	/* 0 */
2988		ip->ip_len = 0;
2989		ip->ip_sum = 0;
2990		ip_hlen = ip->ip_hl << 2;
2991		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
2992			return FALSE;	/* -1 */
2993		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2994#if 1
2995		th->th_sum = in_pseudo(ip->ip_src.s_addr,
2996		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2997#else
2998		th->th_sum = mp->m_pkthdr.csum_data;
2999#endif
3000		break;
3001	case ETHERTYPE_IPV6:
3002		isip6 = 1;
3003		return FALSE;			/* Not supported yet. */
3004		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3005		if (ip6->ip6_nxt != IPPROTO_TCP)
3006			return FALSE;	/* 0 */
3007		ip6->ip6_plen = 0;
3008		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3009		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3010			return FALSE;	/* -1 */
3011		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3012#if 0
3013		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3014		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3015#else
3016		th->th_sum = mp->m_pkthdr.csum_data;
3017#endif
3018		break;
3019	default:
3020		return FALSE;
3021	}
3022	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3023
3024	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3025		      E1000_TXD_DTYP_D |	/* Data descr type */
3026		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3027
3028	/* IP and/or TCP header checksum calculation and insertion. */
3029	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3030		      E1000_TXD_POPTS_TXSM) << 8;
3031
3032	curr_txd = adapter->next_avail_tx_desc;
3033	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3034	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3035
3036	/* IPv6 doesn't have a header checksum. */
3037	if (!isip6) {
3038		/*
3039		 * Start offset for header checksum calculation.
3040		 * End offset for header checksum calculation.
3041		 * Offset of place put the checksum.
3042		 */
3043		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3044		TXD->lower_setup.ip_fields.ipcse =
3045		    htole16(ehdrlen + ip_hlen - 1);
3046		TXD->lower_setup.ip_fields.ipcso =
3047		    ehdrlen + offsetof(struct ip, ip_sum);
3048	}
3049	/*
3050	 * Start offset for payload checksum calculation.
3051	 * End offset for payload checksum calculation.
3052	 * Offset of place to put the checksum.
3053	 */
3054	TXD->upper_setup.tcp_fields.tucss =
3055	    ehdrlen + ip_hlen;
3056	TXD->upper_setup.tcp_fields.tucse = 0;
3057	TXD->upper_setup.tcp_fields.tucso =
3058	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3059	/*
3060	 * Payload size per packet w/o any headers.
3061	 * Length of all headers up to payload.
3062	 */
3063	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3064	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3065
3066	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3067				E1000_TXD_CMD_DEXT |	/* Extended descr */
3068				E1000_TXD_CMD_TSE |	/* TSE context */
3069				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3070				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3071				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3072
3073	tx_buffer->m_head = NULL;
3074
3075	if (++curr_txd == adapter->num_tx_desc)
3076		curr_txd = 0;
3077
3078	adapter->num_tx_desc_avail--;
3079	adapter->next_avail_tx_desc = curr_txd;
3080	adapter->tx_tso = TRUE;
3081
3082	return TRUE;
3083}
3084
3085/**********************************************************************
3086 *
3087 *  Examine each tx_buffer in the used queue. If the hardware is done
3088 *  processing the packet then free associated resources. The
3089 *  tx_buffer is put back on the free queue.
3090 *
3091 **********************************************************************/
3092static void
3093em_txeof(struct adapter *adapter)
3094{
3095	int i, num_avail;
3096	struct em_buffer *tx_buffer;
3097	struct em_tx_desc   *tx_desc;
3098	struct ifnet   *ifp = adapter->ifp;
3099
3100	EM_LOCK_ASSERT(adapter);
3101
3102	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3103		return;
3104
3105	num_avail = adapter->num_tx_desc_avail;
3106	i = adapter->oldest_used_tx_desc;
3107
3108	tx_buffer = &adapter->tx_buffer_area[i];
3109	tx_desc = &adapter->tx_desc_base[i];
3110
3111	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3112	    BUS_DMASYNC_POSTREAD);
3113	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3114
3115		tx_desc->upper.data = 0;
3116		num_avail++;
3117
3118		if (tx_buffer->m_head) {
3119			ifp->if_opackets++;
3120			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3121			    BUS_DMASYNC_POSTWRITE);
3122			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
3123
3124			m_freem(tx_buffer->m_head);
3125			tx_buffer->m_head = NULL;
3126		}
3127
3128		if (++i == adapter->num_tx_desc)
3129			i = 0;
3130
3131		tx_buffer = &adapter->tx_buffer_area[i];
3132		tx_desc = &adapter->tx_desc_base[i];
3133	}
3134	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3135	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3136
3137	adapter->oldest_used_tx_desc = i;
3138
3139	/*
3140	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3141	 * that it is OK to send packets.
3142	 * If there are no pending descriptors, clear the timeout. Otherwise,
3143	 * if some descriptors have been freed, restart the timeout.
3144	 */
3145	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3146		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3147		if (num_avail == adapter->num_tx_desc)
3148			ifp->if_timer = 0;
3149		else if (num_avail != adapter->num_tx_desc_avail)
3150			ifp->if_timer = EM_TX_TIMEOUT;
3151	}
3152	adapter->num_tx_desc_avail = num_avail;
3153}
3154
3155/*********************************************************************
3156 *
3157 *  Get a buffer from system mbuf buffer pool.
3158 *
3159 **********************************************************************/
3160static int
3161em_get_buf(struct adapter *adapter, int i)
3162{
3163	struct mbuf		*m;
3164	bus_dma_segment_t	segs[1];
3165	bus_dmamap_t		map;
3166	struct em_buffer	*rx_buffer;
3167	int			error, nsegs;
3168
3169	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3170	if (m == NULL) {
3171		adapter->mbuf_cluster_failed++;
3172		return (ENOBUFS);
3173	}
3174	m->m_len = m->m_pkthdr.len = MCLBYTES;
3175	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3176		m_adj(m, ETHER_ALIGN);
3177
3178	/*
3179	 * Using memory from the mbuf cluster pool, invoke the
3180	 * bus_dma machinery to arrange the memory mapping.
3181	 */
3182	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3183	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3184	if (error != 0) {
3185		m_free(m);
3186		return (error);
3187	}
3188	/* If nsegs is wrong then the stack is corrupt. */
3189	KASSERT(nsegs == 1, ("Too many segments returned!"));
3190
3191	rx_buffer = &adapter->rx_buffer_area[i];
3192	if (rx_buffer->m_head != NULL)
3193		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3194
3195	map = rx_buffer->map;
3196	rx_buffer->map = adapter->rx_sparemap;
3197	adapter->rx_sparemap = map;
3198	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3199	rx_buffer->m_head = m;
3200
3201	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3202
3203	return (0);
3204}
3205
3206/*********************************************************************
3207 *
3208 *  Allocate memory for rx_buffer structures. Since we use one
3209 *  rx_buffer per received packet, the maximum number of rx_buffer's
3210 *  that we'll need is equal to the number of receive descriptors
3211 *  that we've allocated.
3212 *
3213 **********************************************************************/
3214static int
3215em_allocate_receive_structures(struct adapter *adapter)
3216{
3217	device_t dev = adapter->dev;
3218	struct em_buffer *rx_buffer;
3219	int i, error;
3220
3221	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3222	    M_DEVBUF, M_NOWAIT);
3223	if (adapter->rx_buffer_area == NULL) {
3224		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3225		return (ENOMEM);
3226	}
3227
3228	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3229
3230	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3231				1, 0,			/* alignment, bounds */
3232				BUS_SPACE_MAXADDR,	/* lowaddr */
3233				BUS_SPACE_MAXADDR,	/* highaddr */
3234				NULL, NULL,		/* filter, filterarg */
3235				MCLBYTES,		/* maxsize */
3236				1,			/* nsegments */
3237				MCLBYTES,		/* maxsegsize */
3238				0,			/* flags */
3239				NULL,			/* lockfunc */
3240				NULL,			/* lockarg */
3241				&adapter->rxtag);
3242	if (error) {
3243		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3244		    __func__, error);
3245		goto fail;
3246	}
3247
3248	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3249	    &adapter->rx_sparemap);
3250	if (error) {
3251		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3252		    __func__, error);
3253		goto fail;
3254	}
3255	rx_buffer = adapter->rx_buffer_area;
3256	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3257		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3258		    &rx_buffer->map);
3259		if (error) {
3260			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3261			    __func__, error);
3262			goto fail;
3263		}
3264	}
3265
3266	for (i = 0; i < adapter->num_rx_desc; i++) {
3267		error = em_get_buf(adapter, i);
3268		if (error)
3269			goto fail;
3270	}
3271	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3272	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3273
3274	return (0);
3275
3276fail:
3277	em_free_receive_structures(adapter);
3278	return (error);
3279}
3280
3281/*********************************************************************
3282 *
3283 *  Allocate and initialize receive structures.
3284 *
3285 **********************************************************************/
3286static int
3287em_setup_receive_structures(struct adapter *adapter)
3288{
3289	int error;
3290
3291	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3292
3293	if ((error = em_allocate_receive_structures(adapter)) != 0)
3294		return (error);
3295
3296	/* Setup our descriptor pointers */
3297	adapter->next_rx_desc_to_check = 0;
3298
3299	return (0);
3300}
3301
3302/*********************************************************************
3303 *
3304 *  Enable receive unit.
3305 *
3306 **********************************************************************/
3307static void
3308em_initialize_receive_unit(struct adapter *adapter)
3309{
3310	struct ifnet	*ifp = adapter->ifp;
3311	uint64_t	bus_addr;
3312	uint32_t	reg_rctl;
3313	uint32_t	reg_rxcsum;
3314
3315	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3316
3317	/*
3318	 * Make sure receives are disabled while setting
3319	 * up the descriptor ring
3320	 */
3321	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3322
3323	/* Set the Receive Delay Timer Register */
3324	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3325
3326	if(adapter->hw.mac_type >= em_82540) {
3327		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3328
3329		/*
3330		 * Set the interrupt throttling rate. Value is calculated
3331		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3332		 */
3333#define MAX_INTS_PER_SEC	8000
3334#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3335		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3336	}
3337
3338	/* Setup the Base and Length of the Rx Descriptor Ring */
3339	bus_addr = adapter->rxdma.dma_paddr;
3340	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3341			sizeof(struct em_rx_desc));
3342	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3343	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3344
3345	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3346	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3347	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3348
3349	/* Setup the Receive Control Register */
3350	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3351		   E1000_RCTL_RDMTS_HALF |
3352		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3353
3354	if (adapter->hw.tbi_compatibility_on == TRUE)
3355		reg_rctl |= E1000_RCTL_SBP;
3356
3357
3358	switch (adapter->rx_buffer_len) {
3359	default:
3360	case EM_RXBUFFER_2048:
3361		reg_rctl |= E1000_RCTL_SZ_2048;
3362		break;
3363	case EM_RXBUFFER_4096:
3364		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3365		break;
3366	case EM_RXBUFFER_8192:
3367		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3368		break;
3369	case EM_RXBUFFER_16384:
3370		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3371		break;
3372	}
3373
3374	if (ifp->if_mtu > ETHERMTU)
3375		reg_rctl |= E1000_RCTL_LPE;
3376
3377	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3378	if ((adapter->hw.mac_type >= em_82543) &&
3379	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3380		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3381		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3382		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3383	}
3384
3385	/* Enable Receives */
3386	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3387}
3388
3389/*********************************************************************
3390 *
3391 *  Free receive related data structures.
3392 *
3393 **********************************************************************/
3394static void
3395em_free_receive_structures(struct adapter *adapter)
3396{
3397	struct em_buffer *rx_buffer;
3398	int i;
3399
3400	INIT_DEBUGOUT("free_receive_structures: begin");
3401
3402	if (adapter->rx_sparemap) {
3403		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3404		adapter->rx_sparemap = NULL;
3405	}
3406	if (adapter->rx_buffer_area != NULL) {
3407		rx_buffer = adapter->rx_buffer_area;
3408		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3409			if (rx_buffer->m_head != NULL) {
3410				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3411				    BUS_DMASYNC_POSTREAD);
3412				bus_dmamap_unload(adapter->rxtag,
3413				    rx_buffer->map);
3414				m_freem(rx_buffer->m_head);
3415				rx_buffer->m_head = NULL;
3416			} else if (rx_buffer->map != NULL)
3417				bus_dmamap_unload(adapter->rxtag,
3418				    rx_buffer->map);
3419			if (rx_buffer->map != NULL) {
3420				bus_dmamap_destroy(adapter->rxtag,
3421				    rx_buffer->map);
3422				rx_buffer->map = NULL;
3423			}
3424		}
3425	}
3426	if (adapter->rx_buffer_area != NULL) {
3427		free(adapter->rx_buffer_area, M_DEVBUF);
3428		adapter->rx_buffer_area = NULL;
3429	}
3430	if (adapter->rxtag != NULL) {
3431		bus_dma_tag_destroy(adapter->rxtag);
3432		adapter->rxtag = NULL;
3433	}
3434}
3435
3436/*********************************************************************
3437 *
3438 *  This routine executes in interrupt context. It replenishes
3439 *  the mbufs in the descriptor and sends data which has been
3440 *  dma'ed into host memory to upper layer.
3441 *
3442 *  We loop at most count times if count is > 0, or until done if
3443 *  count < 0.
3444 *
3445 *********************************************************************/
3446static int
3447em_rxeof(struct adapter *adapter, int count)
3448{
3449	struct ifnet	*ifp;
3450	struct mbuf	*mp;
3451	uint8_t		accept_frame = 0;
3452	uint8_t		eop = 0;
3453	uint16_t 	len, desc_len, prev_len_adj;
3454	int		i;
3455
3456	/* Pointer to the receive descriptor being examined. */
3457	struct em_rx_desc   *current_desc;
3458	uint8_t		status;
3459
3460	ifp = adapter->ifp;
3461	i = adapter->next_rx_desc_to_check;
3462	current_desc = &adapter->rx_desc_base[i];
3463	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3464	    BUS_DMASYNC_POSTREAD);
3465
3466	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3467		return (0);
3468
3469	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3470	    (count != 0) &&
3471	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3472		struct mbuf *m = NULL;
3473
3474		mp = adapter->rx_buffer_area[i].m_head;
3475		/*
3476		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3477		 * needs to access the last received byte in the mbuf.
3478		 */
3479		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3480		    BUS_DMASYNC_POSTREAD);
3481
3482		accept_frame = 1;
3483		prev_len_adj = 0;
3484		desc_len = le16toh(current_desc->length);
3485		status = current_desc->status;
3486		if (status & E1000_RXD_STAT_EOP) {
3487			count--;
3488			eop = 1;
3489			if (desc_len < ETHER_CRC_LEN) {
3490				len = 0;
3491				prev_len_adj = ETHER_CRC_LEN - desc_len;
3492			} else
3493				len = desc_len - ETHER_CRC_LEN;
3494		} else {
3495			eop = 0;
3496			len = desc_len;
3497		}
3498
3499		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3500			uint8_t		last_byte;
3501			uint32_t	pkt_len = desc_len;
3502
3503			if (adapter->fmp != NULL)
3504				pkt_len += adapter->fmp->m_pkthdr.len;
3505
3506			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3507			if (TBI_ACCEPT(&adapter->hw, status,
3508			    current_desc->errors, pkt_len, last_byte)) {
3509				em_tbi_adjust_stats(&adapter->hw,
3510				    &adapter->stats, pkt_len,
3511				    adapter->hw.mac_addr);
3512				if (len > 0)
3513					len--;
3514			} else
3515				accept_frame = 0;
3516		}
3517
3518		if (accept_frame) {
3519			if (em_get_buf(adapter, i) != 0) {
3520				ifp->if_iqdrops++;
3521				goto discard;
3522			}
3523
3524			/* Assign correct length to the current fragment */
3525			mp->m_len = len;
3526
3527			if (adapter->fmp == NULL) {
3528				mp->m_pkthdr.len = len;
3529				adapter->fmp = mp; /* Store the first mbuf */
3530				adapter->lmp = mp;
3531			} else {
3532				/* Chain mbuf's together */
3533				mp->m_flags &= ~M_PKTHDR;
3534				/*
3535				 * Adjust length of previous mbuf in chain if
3536				 * we received less than 4 bytes in the last
3537				 * descriptor.
3538				 */
3539				if (prev_len_adj > 0) {
3540					adapter->lmp->m_len -= prev_len_adj;
3541					adapter->fmp->m_pkthdr.len -=
3542					    prev_len_adj;
3543				}
3544				adapter->lmp->m_next = mp;
3545				adapter->lmp = adapter->lmp->m_next;
3546				adapter->fmp->m_pkthdr.len += len;
3547			}
3548
3549			if (eop) {
3550				adapter->fmp->m_pkthdr.rcvif = ifp;
3551				ifp->if_ipackets++;
3552				em_receive_checksum(adapter, current_desc,
3553				    adapter->fmp);
3554#ifndef __NO_STRICT_ALIGNMENT
3555				if (adapter->hw.max_frame_size >
3556				    (MCLBYTES - ETHER_ALIGN) &&
3557				    em_fixup_rx(adapter) != 0)
3558					goto skip;
3559#endif
3560				if (status & E1000_RXD_STAT_VP) {
3561					adapter->fmp->m_pkthdr.ether_vtag =
3562					    (le16toh(current_desc->special) &
3563					    E1000_RXD_SPC_VLAN_MASK);
3564					adapter->fmp->m_flags |= M_VLANTAG;
3565				}
3566#ifndef __NO_STRICT_ALIGNMENT
3567skip:
3568#endif
3569				m = adapter->fmp;
3570				adapter->fmp = NULL;
3571				adapter->lmp = NULL;
3572			}
3573		} else {
3574			ifp->if_ierrors++;
3575discard:
3576			/* Reuse loaded DMA map and just update mbuf chain */
3577			mp = adapter->rx_buffer_area[i].m_head;
3578			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3579			mp->m_data = mp->m_ext.ext_buf;
3580			mp->m_next = NULL;
3581			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3582				m_adj(mp, ETHER_ALIGN);
3583			if (adapter->fmp != NULL) {
3584				m_freem(adapter->fmp);
3585				adapter->fmp = NULL;
3586				adapter->lmp = NULL;
3587			}
3588			m = NULL;
3589		}
3590
3591		/* Zero out the receive descriptors status. */
3592		current_desc->status = 0;
3593		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3594		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3595
3596		/* Advance our pointers to the next descriptor. */
3597		if (++i == adapter->num_rx_desc)
3598			i = 0;
3599		if (m != NULL) {
3600			adapter->next_rx_desc_to_check = i;
3601#ifdef DEVICE_POLLING
3602			EM_UNLOCK(adapter);
3603			(*ifp->if_input)(ifp, m);
3604			EM_LOCK(adapter);
3605#else
3606			(*ifp->if_input)(ifp, m);
3607#endif
3608			i = adapter->next_rx_desc_to_check;
3609		}
3610		current_desc = &adapter->rx_desc_base[i];
3611	}
3612	adapter->next_rx_desc_to_check = i;
3613
3614	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3615	if (--i < 0)
3616		i = adapter->num_rx_desc - 1;
3617	E1000_WRITE_REG(&adapter->hw, RDT, i);
3618	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3619		return (0);
3620
3621	return (1);
3622}
3623
3624#ifndef __NO_STRICT_ALIGNMENT
3625/*
3626 * When jumbo frames are enabled we should realign entire payload on
3627 * architecures with strict alignment. This is serious design mistake of 8254x
3628 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3629 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3630 * payload. On architecures without strict alignment restrictions 8254x still
3631 * performs unaligned memory access which would reduce the performance too.
3632 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3633 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3634 * existing mbuf chain.
3635 *
3636 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3637 * not used at all on architectures with strict alignment.
3638 */
3639static int
3640em_fixup_rx(struct adapter *adapter)
3641{
3642	struct mbuf *m, *n;
3643	int error;
3644
3645	error = 0;
3646	m = adapter->fmp;
3647	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3648		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3649		m->m_data += ETHER_HDR_LEN;
3650	} else {
3651		MGETHDR(n, M_DONTWAIT, MT_DATA);
3652		if (n != NULL) {
3653			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3654			m->m_data += ETHER_HDR_LEN;
3655			m->m_len -= ETHER_HDR_LEN;
3656			n->m_len = ETHER_HDR_LEN;
3657			M_MOVE_PKTHDR(n, m);
3658			n->m_next = m;
3659			adapter->fmp = n;
3660		} else {
3661			adapter->ifp->if_iqdrops++;
3662			adapter->mbuf_alloc_failed++;
3663			m_freem(adapter->fmp);
3664			adapter->fmp = NULL;
3665			adapter->lmp = NULL;
3666			error = ENOBUFS;
3667		}
3668	}
3669
3670	return (error);
3671}
3672#endif
3673
3674/*********************************************************************
3675 *
3676 *  Verify that the hardware indicated that the checksum is valid.
3677 *  Inform the stack about the status of checksum so that stack
3678 *  doesn't spend time verifying the checksum.
3679 *
3680 *********************************************************************/
3681static void
3682em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3683		    struct mbuf *mp)
3684{
3685	/* 82543 or newer only */
3686	if ((adapter->hw.mac_type < em_82543) ||
3687	    /* Ignore Checksum bit is set */
3688	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3689		mp->m_pkthdr.csum_flags = 0;
3690		return;
3691	}
3692
3693	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3694		/* Did it pass? */
3695		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3696			/* IP Checksum Good */
3697			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3698			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3699
3700		} else {
3701			mp->m_pkthdr.csum_flags = 0;
3702		}
3703	}
3704
3705	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3706		/* Did it pass? */
3707		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3708			mp->m_pkthdr.csum_flags |=
3709			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3710			mp->m_pkthdr.csum_data = htons(0xffff);
3711		}
3712	}
3713}
3714
3715
3716static void
3717em_enable_vlans(struct adapter *adapter)
3718{
3719	uint32_t ctrl;
3720
3721	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3722
3723	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3724	ctrl |= E1000_CTRL_VME;
3725	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3726}
3727
3728static void
3729em_disable_vlans(struct adapter *adapter)
3730{
3731	uint32_t ctrl;
3732
3733	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3734	ctrl &= ~E1000_CTRL_VME;
3735	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3736}
3737
3738static void
3739em_enable_intr(struct adapter *adapter)
3740{
3741	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3742}
3743
3744static void
3745em_disable_intr(struct adapter *adapter)
3746{
3747	/*
3748	 * The first version of 82542 had an errata where when link was forced
3749	 * it would stay up even up even if the cable was disconnected.
3750	 * Sequence errors were used to detect the disconnect and then the
3751	 * driver would unforce the link. This code in the in the ISR. For this
3752	 * to work correctly the Sequence error interrupt had to be enabled
3753	 * all the time.
3754	 */
3755
3756	if (adapter->hw.mac_type == em_82542_rev2_0)
3757	    E1000_WRITE_REG(&adapter->hw, IMC,
3758		(0xffffffff & ~E1000_IMC_RXSEQ));
3759	else
3760	    E1000_WRITE_REG(&adapter->hw, IMC,
3761		0xffffffff);
3762}
3763
3764static int
3765em_is_valid_ether_addr(uint8_t *addr)
3766{
3767	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3768
3769	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3770		return (FALSE);
3771	}
3772
3773	return (TRUE);
3774}
3775
3776void
3777em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3778{
3779	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3780}
3781
3782void
3783em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3784{
3785	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3786}
3787
3788void
3789em_pci_set_mwi(struct em_hw *hw)
3790{
3791	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3792	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3793}
3794
3795void
3796em_pci_clear_mwi(struct em_hw *hw)
3797{
3798	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3799	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3800}
3801
3802/*********************************************************************
3803* 82544 Coexistence issue workaround.
3804*    There are 2 issues.
3805*       1. Transmit Hang issue.
3806*    To detect this issue, following equation can be used...
3807*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3808*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3809*
3810*       2. DAC issue.
3811*    To detect this issue, following equation can be used...
3812*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3813*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3814*
3815*
3816*    WORKAROUND:
3817*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3818*
3819*** *********************************************************************/
3820static uint32_t
3821em_fill_descriptors (bus_addr_t address, uint32_t length,
3822		PDESC_ARRAY desc_array)
3823{
3824	/* Since issue is sensitive to length and address.*/
3825	/* Let us first check the address...*/
3826	uint32_t safe_terminator;
3827	if (length <= 4) {
3828		desc_array->descriptor[0].address = address;
3829		desc_array->descriptor[0].length = length;
3830		desc_array->elements = 1;
3831		return (desc_array->elements);
3832	}
3833	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3834	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3835	if (safe_terminator == 0   ||
3836	(safe_terminator > 4   &&
3837	safe_terminator < 9)   ||
3838	(safe_terminator > 0xC &&
3839	safe_terminator <= 0xF)) {
3840		desc_array->descriptor[0].address = address;
3841		desc_array->descriptor[0].length = length;
3842		desc_array->elements = 1;
3843		return (desc_array->elements);
3844	}
3845
3846	desc_array->descriptor[0].address = address;
3847	desc_array->descriptor[0].length = length - 4;
3848	desc_array->descriptor[1].address = address + (length - 4);
3849	desc_array->descriptor[1].length = 4;
3850	desc_array->elements = 2;
3851	return (desc_array->elements);
3852}
3853
3854/**********************************************************************
3855 *
3856 *  Update the board statistics counters.
3857 *
3858 **********************************************************************/
3859static void
3860em_update_stats_counters(struct adapter *adapter)
3861{
3862	struct ifnet   *ifp;
3863
3864	if(adapter->hw.media_type == em_media_type_copper ||
3865	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3866		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3867		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3868	}
3869	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3870	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3871	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3872	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3873
3874	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3875	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3876	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3877	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3878	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3879	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3880	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3881	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3882	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3883	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3884	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3885	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3886	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3887	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3888	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3889	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3890	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3891	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3892	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3893	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3894
3895	/* For the 64-bit byte counters the low dword must be read first. */
3896	/* Both registers clear on the read of the high dword */
3897
3898	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3899	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3900	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3901	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3902
3903	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3904	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3905	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3906	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3907	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3908
3909	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3910	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3911	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3912	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3913
3914	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3915	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3916	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3917	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3918	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3919	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3920	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3921	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3922	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3923	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3924
3925	if (adapter->hw.mac_type >= em_82543) {
3926		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3927		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3928		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3929		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3930		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3931		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3932	}
3933	ifp = adapter->ifp;
3934
3935	ifp->if_collisions = adapter->stats.colc;
3936
3937	/* Rx Errors */
3938	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3939	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3940	    adapter->stats.mpc + adapter->stats.cexterr;
3941
3942	/* Tx Errors */
3943	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3944	    adapter->watchdog_events;
3945}
3946
3947
3948/**********************************************************************
3949 *
3950 *  This routine is called only when em_display_debug_stats is enabled.
3951 *  This routine provides a way to take a look at important statistics
3952 *  maintained by the driver and hardware.
3953 *
3954 **********************************************************************/
3955static void
3956em_print_debug_info(struct adapter *adapter)
3957{
3958	device_t dev = adapter->dev;
3959	uint8_t *hw_addr = adapter->hw.hw_addr;
3960
3961	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3962	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3963	    E1000_READ_REG(&adapter->hw, CTRL),
3964	    E1000_READ_REG(&adapter->hw, RCTL));
3965	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3966	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3967	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3968	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3969	    adapter->hw.fc_high_water,
3970	    adapter->hw.fc_low_water);
3971	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3972	    E1000_READ_REG(&adapter->hw, TIDV),
3973	    E1000_READ_REG(&adapter->hw, TADV));
3974	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3975	    E1000_READ_REG(&adapter->hw, RDTR),
3976	    E1000_READ_REG(&adapter->hw, RADV));
3977	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3978	    (long long)adapter->tx_fifo_wrk_cnt,
3979	    (long long)adapter->tx_fifo_reset_cnt);
3980	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3981	    E1000_READ_REG(&adapter->hw, TDH),
3982	    E1000_READ_REG(&adapter->hw, TDT));
3983	device_printf(dev, "Num Tx descriptors avail = %d\n",
3984	    adapter->num_tx_desc_avail);
3985	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3986	    adapter->no_tx_desc_avail1);
3987	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3988	    adapter->no_tx_desc_avail2);
3989	device_printf(dev, "Std mbuf failed = %ld\n",
3990	    adapter->mbuf_alloc_failed);
3991	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3992	    adapter->mbuf_cluster_failed);
3993}
3994
3995static void
3996em_print_hw_stats(struct adapter *adapter)
3997{
3998	device_t dev = adapter->dev;
3999
4000	device_printf(dev, "Excessive collisions = %lld\n",
4001	    (long long)adapter->stats.ecol);
4002	device_printf(dev, "Symbol errors = %lld\n",
4003	    (long long)adapter->stats.symerrs);
4004	device_printf(dev, "Sequence errors = %lld\n",
4005	    (long long)adapter->stats.sec);
4006	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4007
4008	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4009	device_printf(dev, "Receive No Buffers = %lld\n",
4010	    (long long)adapter->stats.rnbc);
4011	/* RLEC is inaccurate on some hardware, calculate our own. */
4012	device_printf(dev, "Receive Length Errors = %lld\n",
4013	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4014	device_printf(dev, "Receive errors = %lld\n",
4015	    (long long)adapter->stats.rxerrc);
4016	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4017	device_printf(dev, "Alignment errors = %lld\n",
4018	    (long long)adapter->stats.algnerrc);
4019	device_printf(dev, "Carrier extension errors = %lld\n",
4020	    (long long)adapter->stats.cexterr);
4021	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4022	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4023
4024	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4025	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4026	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4027	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4028
4029	device_printf(dev, "Good Packets Rcvd = %lld\n",
4030	    (long long)adapter->stats.gprc);
4031	device_printf(dev, "Good Packets Xmtd = %lld\n",
4032	    (long long)adapter->stats.gptc);
4033	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4034	    (long long)adapter->stats.tsctc);
4035	device_printf(dev, "TSO Contexts Failed = %lld\n",
4036	    (long long)adapter->stats.tsctfc);
4037}
4038
4039static int
4040em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4041{
4042	struct adapter *adapter;
4043	int error;
4044	int result;
4045
4046	result = -1;
4047	error = sysctl_handle_int(oidp, &result, 0, req);
4048
4049	if (error || !req->newptr)
4050		return (error);
4051
4052	if (result == 1) {
4053		adapter = (struct adapter *)arg1;
4054		em_print_debug_info(adapter);
4055	}
4056
4057	return (error);
4058}
4059
4060
4061static int
4062em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4063{
4064	struct adapter *adapter;
4065	int error;
4066	int result;
4067
4068	result = -1;
4069	error = sysctl_handle_int(oidp, &result, 0, req);
4070
4071	if (error || !req->newptr)
4072		return (error);
4073
4074	if (result == 1) {
4075		adapter = (struct adapter *)arg1;
4076		em_print_hw_stats(adapter);
4077	}
4078
4079	return (error);
4080}
4081
4082static int
4083em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4084{
4085	struct em_int_delay_info *info;
4086	struct adapter *adapter;
4087	uint32_t regval;
4088	int error;
4089	int usecs;
4090	int ticks;
4091
4092	info = (struct em_int_delay_info *)arg1;
4093	usecs = info->value;
4094	error = sysctl_handle_int(oidp, &usecs, 0, req);
4095	if (error != 0 || req->newptr == NULL)
4096		return (error);
4097	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4098		return (EINVAL);
4099	info->value = usecs;
4100	ticks = E1000_USECS_TO_TICKS(usecs);
4101
4102	adapter = info->adapter;
4103
4104	EM_LOCK(adapter);
4105	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4106	regval = (regval & ~0xffff) | (ticks & 0xffff);
4107	/* Handle a few special cases. */
4108	switch (info->offset) {
4109	case E1000_RDTR:
4110	case E1000_82542_RDTR:
4111		regval |= E1000_RDT_FPDB;
4112		break;
4113	case E1000_TIDV:
4114	case E1000_82542_TIDV:
4115		if (ticks == 0) {
4116			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4117			/* Don't write 0 into the TIDV register. */
4118			regval++;
4119		} else
4120			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4121		break;
4122	}
4123	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4124	EM_UNLOCK(adapter);
4125	return (0);
4126}
4127
4128static void
4129em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4130	const char *description, struct em_int_delay_info *info,
4131	int offset, int value)
4132{
4133	info->adapter = adapter;
4134	info->offset = offset;
4135	info->value = value;
4136	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4137	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4138	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4139	    info, 0, em_sysctl_int_delay, "I", description);
4140}
4141
4142#ifndef DEVICE_POLLING
4143static void
4144em_add_int_process_limit(struct adapter *adapter, const char *name,
4145	const char *description, int *limit, int value)
4146{
4147	*limit = value;
4148	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4149	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4150	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4151}
4152#endif
4153