if_em.c revision 163730
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 163730 2006-10-28 08:11:07Z jfv $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.1.4 - TSO";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157
158	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
162
163	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
177
178	/* required last entry */
179	{ 0, 0, 0, 0, 0}
180};
181
182/*********************************************************************
183 *  Table of branding strings for all supported NICs.
184 *********************************************************************/
185
186static char *em_strings[] = {
187	"Intel(R) PRO/1000 Network Connection"
188};
189
190/*********************************************************************
191 *  Function prototypes
192 *********************************************************************/
193static int	em_probe(device_t);
194static int	em_attach(device_t);
195static int	em_detach(device_t);
196static int	em_shutdown(device_t);
197static int	em_suspend(device_t);
198static int	em_resume(device_t);
199static void	em_start(struct ifnet *);
200static void	em_start_locked(struct ifnet *ifp);
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_watchdog(struct ifnet *);
203static void	em_init(void *);
204static void	em_init_locked(struct adapter *);
205static void	em_stop(void *);
206static void	em_media_status(struct ifnet *, struct ifmediareq *);
207static int	em_media_change(struct ifnet *);
208static void	em_identify_hardware(struct adapter *);
209static int	em_allocate_pci_resources(struct adapter *);
210static int	em_allocate_intr(struct adapter *);
211static void	em_free_intr(struct adapter *);
212static void	em_free_pci_resources(struct adapter *);
213static void	em_local_timer(void *);
214static int	em_hardware_init(struct adapter *);
215static void	em_setup_interface(device_t, struct adapter *);
216static int	em_setup_transmit_structures(struct adapter *);
217static void	em_initialize_transmit_unit(struct adapter *);
218static int	em_setup_receive_structures(struct adapter *);
219static void	em_initialize_receive_unit(struct adapter *);
220static void	em_enable_intr(struct adapter *);
221static void	em_disable_intr(struct adapter *);
222static void	em_free_transmit_structures(struct adapter *);
223static void	em_free_receive_structures(struct adapter *);
224static void	em_update_stats_counters(struct adapter *);
225static void	em_txeof(struct adapter *);
226static int	em_allocate_receive_structures(struct adapter *);
227static int	em_allocate_transmit_structures(struct adapter *);
228static int	em_rxeof(struct adapter *, int);
229#ifndef __NO_STRICT_ALIGNMENT
230static int	em_fixup_rx(struct adapter *);
231#endif
232static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
233		    struct mbuf *);
234static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
235		    uint32_t *, uint32_t *);
236static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
237		    uint32_t *, uint32_t *);
238static void	em_set_promisc(struct adapter *);
239static void	em_disable_promisc(struct adapter *);
240static void	em_set_multi(struct adapter *);
241static void	em_print_hw_stats(struct adapter *);
242static void	em_update_link_status(struct adapter *);
243static int	em_get_buf(struct adapter *, int);
244static void	em_enable_vlans(struct adapter *);
245static void	em_disable_vlans(struct adapter *);
246static int	em_encap(struct adapter *, struct mbuf **);
247static void	em_smartspeed(struct adapter *);
248static int	em_82547_fifo_workaround(struct adapter *, int);
249static void	em_82547_update_fifo_head(struct adapter *, int);
250static int	em_82547_tx_fifo_reset(struct adapter *);
251static void	em_82547_move_tail(void *arg);
252static void	em_82547_move_tail_locked(struct adapter *);
253static int	em_dma_malloc(struct adapter *, bus_size_t,
254		struct em_dma_alloc *, int);
255static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
256static void	em_print_debug_info(struct adapter *);
257static int 	em_is_valid_ether_addr(uint8_t *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
261		    PDESC_ARRAY desc_array);
262static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
263static void	em_add_int_delay_sysctl(struct adapter *, const char *,
264		const char *, struct em_int_delay_info *, int, int);
265
266/*
267 * Fast interrupt handler and legacy ithread/polling modes are
268 * mutually exclusive.
269 */
270#ifdef DEVICE_POLLING
271static poll_handler_t em_poll;
272static void	em_intr(void *);
273#else
274static void	em_intr_fast(void *);
275static void	em_add_int_process_limit(struct adapter *, const char *,
276		const char *, int *, int);
277static void	em_handle_rxtx(void *context, int pending);
278static void	em_handle_link(void *context, int pending);
279#endif
280
281/*********************************************************************
282 *  FreeBSD Device Interface Entry Points
283 *********************************************************************/
284
285static device_method_t em_methods[] = {
286	/* Device interface */
287	DEVMETHOD(device_probe, em_probe),
288	DEVMETHOD(device_attach, em_attach),
289	DEVMETHOD(device_detach, em_detach),
290	DEVMETHOD(device_shutdown, em_shutdown),
291	DEVMETHOD(device_suspend, em_suspend),
292	DEVMETHOD(device_resume, em_resume),
293	{0, 0}
294};
295
296static driver_t em_driver = {
297	"em", em_methods, sizeof(struct adapter),
298};
299
300static devclass_t em_devclass;
301DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
302MODULE_DEPEND(em, pci, 1, 1, 1);
303MODULE_DEPEND(em, ether, 1, 1, 1);
304
305/*********************************************************************
306 *  Tunable default values.
307 *********************************************************************/
308
309#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
310#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
311#define M_TSO_LEN			66
312
313static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
314static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
315static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
316static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
317static int em_rxd = EM_DEFAULT_RXD;
318static int em_txd = EM_DEFAULT_TXD;
319static int em_smart_pwr_down = FALSE;
320
321TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
322TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
323TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
325TUNABLE_INT("hw.em.rxd", &em_rxd);
326TUNABLE_INT("hw.em.txd", &em_txd);
327TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
328#ifndef DEVICE_POLLING
329static int em_rx_process_limit = 100;
330TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
331#endif
332
333/*********************************************************************
334 *  Device identification routine
335 *
336 *  em_probe determines if the driver should be loaded on
337 *  adapter based on PCI vendor/device id of the adapter.
338 *
339 *  return BUS_PROBE_DEFAULT on success, positive on failure
340 *********************************************************************/
341
342static int
343em_probe(device_t dev)
344{
345	char		adapter_name[60];
346	uint16_t	pci_vendor_id = 0;
347	uint16_t	pci_device_id = 0;
348	uint16_t	pci_subvendor_id = 0;
349	uint16_t	pci_subdevice_id = 0;
350	em_vendor_info_t *ent;
351
352	INIT_DEBUGOUT("em_probe: begin");
353
354	pci_vendor_id = pci_get_vendor(dev);
355	if (pci_vendor_id != EM_VENDOR_ID)
356		return (ENXIO);
357
358	pci_device_id = pci_get_device(dev);
359	pci_subvendor_id = pci_get_subvendor(dev);
360	pci_subdevice_id = pci_get_subdevice(dev);
361
362	ent = em_vendor_info_array;
363	while (ent->vendor_id != 0) {
364		if ((pci_vendor_id == ent->vendor_id) &&
365		    (pci_device_id == ent->device_id) &&
366
367		    ((pci_subvendor_id == ent->subvendor_id) ||
368		    (ent->subvendor_id == PCI_ANY_ID)) &&
369
370		    ((pci_subdevice_id == ent->subdevice_id) ||
371		    (ent->subdevice_id == PCI_ANY_ID))) {
372			sprintf(adapter_name, "%s %s",
373				em_strings[ent->index],
374				em_driver_version);
375			device_set_desc_copy(dev, adapter_name);
376			return (BUS_PROBE_DEFAULT);
377		}
378		ent++;
379	}
380
381	return (ENXIO);
382}
383
384/*********************************************************************
385 *  Device initialization routine
386 *
387 *  The attach entry point is called when the driver is being loaded.
388 *  This routine identifies the type of hardware, allocates all resources
389 *  and initializes the hardware.
390 *
391 *  return 0 on success, positive on failure
392 *********************************************************************/
393
394static int
395em_attach(device_t dev)
396{
397	struct adapter	*adapter;
398	int		tsize, rsize;
399	int		error = 0;
400
401	INIT_DEBUGOUT("em_attach: begin");
402
403	adapter = device_get_softc(dev);
404	adapter->dev = adapter->osdep.dev = dev;
405	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
406
407	/* SYSCTL stuff */
408	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
409	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
410	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
411	    em_sysctl_debug_info, "I", "Debug Information");
412
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    em_sysctl_stats, "I", "Statistics");
417
418	callout_init(&adapter->timer, CALLOUT_MPSAFE);
419	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
420
421	/* Determine hardware revision */
422	em_identify_hardware(adapter);
423
424	/* Set up some sysctls for the tunable interrupt delays */
425	em_add_int_delay_sysctl(adapter, "rx_int_delay",
426	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
427	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
428	em_add_int_delay_sysctl(adapter, "tx_int_delay",
429	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
430	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
431	if (adapter->hw.mac_type >= em_82540) {
432		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
433		    "receive interrupt delay limit in usecs",
434		    &adapter->rx_abs_int_delay,
435		    E1000_REG_OFFSET(&adapter->hw, RADV),
436		    em_rx_abs_int_delay_dflt);
437		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
438		    "transmit interrupt delay limit in usecs",
439		    &adapter->tx_abs_int_delay,
440		    E1000_REG_OFFSET(&adapter->hw, TADV),
441		    em_tx_abs_int_delay_dflt);
442	}
443
444#ifndef DEVICE_POLLING
445	/* Sysctls for limiting the amount of work done in the taskqueue */
446	em_add_int_process_limit(adapter, "rx_processing_limit",
447	    "max number of rx packets to process", &adapter->rx_process_limit,
448	    em_rx_process_limit);
449#endif
450
451	/*
452	 * Validate number of transmit and receive descriptors. It
453	 * must not exceed hardware maximum, and must be multiple
454	 * of EM_DBA_ALIGN.
455	 */
456	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
457	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
458	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
459	    (em_txd < EM_MIN_TXD)) {
460		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
461		    EM_DEFAULT_TXD, em_txd);
462		adapter->num_tx_desc = EM_DEFAULT_TXD;
463	} else
464		adapter->num_tx_desc = em_txd;
465	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
466	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
467	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
468	    (em_rxd < EM_MIN_RXD)) {
469		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
470		    EM_DEFAULT_RXD, em_rxd);
471		adapter->num_rx_desc = EM_DEFAULT_RXD;
472	} else
473		adapter->num_rx_desc = em_rxd;
474
475	adapter->hw.autoneg = DO_AUTO_NEG;
476	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
477	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
478	adapter->hw.tbi_compatibility_en = TRUE;
479	adapter->rx_buffer_len = EM_RXBUFFER_2048;
480
481	adapter->hw.phy_init_script = 1;
482	adapter->hw.phy_reset_disable = FALSE;
483
484#ifndef EM_MASTER_SLAVE
485	adapter->hw.master_slave = em_ms_hw_default;
486#else
487	adapter->hw.master_slave = EM_MASTER_SLAVE;
488#endif
489	/*
490	 * Set the max frame size assuming standard ethernet
491	 * sized frames.
492	 */
493	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
494
495	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
496
497	/*
498	 * This controls when hardware reports transmit completion
499	 * status.
500	 */
501	adapter->hw.report_tx_early = 1;
502	if (em_allocate_pci_resources(adapter)) {
503		device_printf(dev, "Allocation of PCI resources failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	/* Initialize eeprom parameters */
509	em_init_eeprom_params(&adapter->hw);
510
511	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
512	    EM_DBA_ALIGN);
513
514	/* Allocate Transmit Descriptor ring */
515	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
516		device_printf(dev, "Unable to allocate tx_desc memory\n");
517		error = ENOMEM;
518		goto err_tx_desc;
519	}
520	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
521
522	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
523	    EM_DBA_ALIGN);
524
525	/* Allocate Receive Descriptor ring */
526	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
527		device_printf(dev, "Unable to allocate rx_desc memory\n");
528		error = ENOMEM;
529		goto err_rx_desc;
530	}
531	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
532
533	/* Initialize the hardware */
534	if (em_hardware_init(adapter)) {
535		device_printf(dev, "Unable to initialize the hardware\n");
536		error = EIO;
537		goto err_hw_init;
538	}
539
540	/* Copy the permanent MAC address out of the EEPROM */
541	if (em_read_mac_addr(&adapter->hw) < 0) {
542		device_printf(dev, "EEPROM read error while reading MAC"
543		    " address\n");
544		error = EIO;
545		goto err_hw_init;
546	}
547
548	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
549		device_printf(dev, "Invalid MAC address\n");
550		error = EIO;
551		goto err_hw_init;
552	}
553
554	/* Setup OS specific network interface */
555	em_setup_interface(dev, adapter);
556
557	em_allocate_intr(adapter);
558
559	/* Initialize statistics */
560	em_clear_hw_cntrs(&adapter->hw);
561	em_update_stats_counters(adapter);
562	adapter->hw.get_link_status = 1;
563	em_update_link_status(adapter);
564
565	/* Indicate SOL/IDER usage */
566	if (em_check_phy_reset_block(&adapter->hw))
567		device_printf(dev,
568		    "PHY reset is blocked due to SOL/IDER session.\n");
569
570	/* Identify 82544 on PCIX */
571	em_get_bus_info(&adapter->hw);
572	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
573		adapter->pcix_82544 = TRUE;
574	else
575		adapter->pcix_82544 = FALSE;
576
577	INIT_DEBUGOUT("em_attach: end");
578
579	return (0);
580
581err_hw_init:
582	em_dma_free(adapter, &adapter->rxdma);
583err_rx_desc:
584	em_dma_free(adapter, &adapter->txdma);
585err_tx_desc:
586err_pci:
587	em_free_intr(adapter);
588	em_free_pci_resources(adapter);
589	EM_LOCK_DESTROY(adapter);
590
591	return (error);
592}
593
594/*********************************************************************
595 *  Device removal routine
596 *
597 *  The detach entry point is called when the driver is being removed.
598 *  This routine stops the adapter and deallocates all the resources
599 *  that were allocated for driver operation.
600 *
601 *  return 0 on success, positive on failure
602 *********************************************************************/
603
604static int
605em_detach(device_t dev)
606{
607	struct adapter	*adapter = device_get_softc(dev);
608	struct ifnet	*ifp = adapter->ifp;
609
610	INIT_DEBUGOUT("em_detach: begin");
611
612#ifdef DEVICE_POLLING
613	if (ifp->if_capenable & IFCAP_POLLING)
614		ether_poll_deregister(ifp);
615#endif
616
617	em_free_intr(adapter);
618	EM_LOCK(adapter);
619	adapter->in_detach = 1;
620	em_stop(adapter);
621	em_phy_hw_reset(&adapter->hw);
622	EM_UNLOCK(adapter);
623	ether_ifdetach(adapter->ifp);
624
625	em_free_pci_resources(adapter);
626	bus_generic_detach(dev);
627	if_free(ifp);
628
629	/* Free Transmit Descriptor ring */
630	if (adapter->tx_desc_base) {
631		em_dma_free(adapter, &adapter->txdma);
632		adapter->tx_desc_base = NULL;
633	}
634
635	/* Free Receive Descriptor ring */
636	if (adapter->rx_desc_base) {
637		em_dma_free(adapter, &adapter->rxdma);
638		adapter->rx_desc_base = NULL;
639	}
640
641	EM_LOCK_DESTROY(adapter);
642
643	return (0);
644}
645
646/*********************************************************************
647 *
648 *  Shutdown entry point
649 *
650 **********************************************************************/
651
652static int
653em_shutdown(device_t dev)
654{
655	struct adapter *adapter = device_get_softc(dev);
656	EM_LOCK(adapter);
657	em_stop(adapter);
658	EM_UNLOCK(adapter);
659	return (0);
660}
661
662/*
663 * Suspend/resume device methods.
664 */
665static int
666em_suspend(device_t dev)
667{
668	struct adapter *adapter = device_get_softc(dev);
669
670	EM_LOCK(adapter);
671	em_stop(adapter);
672	EM_UNLOCK(adapter);
673
674	return bus_generic_suspend(dev);
675}
676
677static int
678em_resume(device_t dev)
679{
680	struct adapter *adapter = device_get_softc(dev);
681	struct ifnet *ifp = adapter->ifp;
682
683	EM_LOCK(adapter);
684	em_init_locked(adapter);
685	if ((ifp->if_flags & IFF_UP) &&
686	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
687		em_start_locked(ifp);
688	EM_UNLOCK(adapter);
689
690	return bus_generic_resume(dev);
691}
692
693
694/*********************************************************************
695 *  Transmit entry point
696 *
697 *  em_start is called by the stack to initiate a transmit.
698 *  The driver will remain in this routine as long as there are
699 *  packets to transmit and transmit resources are available.
700 *  In case resources are not available stack is notified and
701 *  the packet is requeued.
702 **********************************************************************/
703
704static void
705em_start_locked(struct ifnet *ifp)
706{
707	struct adapter	*adapter = ifp->if_softc;
708	struct mbuf	*m_head;
709
710	EM_LOCK_ASSERT(adapter);
711
712	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
713	    IFF_DRV_RUNNING)
714		return;
715	if (!adapter->link_active)
716		return;
717
718	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
719
720		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
721		if (m_head == NULL)
722			break;
723		/*
724		 * em_encap() can modify our pointer, and or make it NULL on
725		 * failure.  In that event, we can't requeue.
726		 */
727		if (em_encap(adapter, &m_head)) {
728			if (m_head == NULL)
729				break;
730			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
731			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
732			break;
733		}
734
735		/* Send a copy of the frame to the BPF listener */
736		BPF_MTAP(ifp, m_head);
737
738		/* Set timeout in case hardware has problems transmitting. */
739		ifp->if_timer = EM_TX_TIMEOUT;
740	}
741}
742
743static void
744em_start(struct ifnet *ifp)
745{
746	struct adapter *adapter = ifp->if_softc;
747
748	EM_LOCK(adapter);
749	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
750		em_start_locked(ifp);
751	EM_UNLOCK(adapter);
752}
753
754/*********************************************************************
755 *  Ioctl entry point
756 *
757 *  em_ioctl is called when the user wants to configure the
758 *  interface.
759 *
760 *  return 0 on success, positive on failure
761 **********************************************************************/
762
763static int
764em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
765{
766	struct adapter	*adapter = ifp->if_softc;
767	struct ifreq *ifr = (struct ifreq *)data;
768	struct ifaddr *ifa = (struct ifaddr *)data;
769	int error = 0;
770
771	if (adapter->in_detach)
772		return (error);
773
774	switch (command) {
775	case SIOCSIFADDR:
776	case SIOCGIFADDR:
777		if (ifa->ifa_addr->sa_family == AF_INET) {
778			/*
779			 * XXX
780			 * Since resetting hardware takes a very long time
781			 * and results in link renegotiation we only
782			 * initialize the hardware only when it is absolutely
783			 * required.
784			 */
785			ifp->if_flags |= IFF_UP;
786			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
787				EM_LOCK(adapter);
788				em_init_locked(adapter);
789				EM_UNLOCK(adapter);
790			}
791			arp_ifinit(ifp, ifa);
792		} else
793			error = ether_ioctl(ifp, command, data);
794		break;
795	case SIOCSIFMTU:
796	    {
797		int max_frame_size;
798		uint16_t eeprom_data = 0;
799
800		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
801
802		EM_LOCK(adapter);
803		switch (adapter->hw.mac_type) {
804		case em_82573:
805			/*
806			 * 82573 only supports jumbo frames
807			 * if ASPM is disabled.
808			 */
809			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
810			    &eeprom_data);
811			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
812				max_frame_size = ETHER_MAX_LEN;
813				break;
814			}
815			/* Allow Jumbo frames - fall thru */
816		case em_82571:
817		case em_82572:
818		case em_80003es2lan:	/* Limit Jumbo Frame size */
819			max_frame_size = 9234;
820			break;
821		case em_ich8lan:
822			/* ICH8 does not support jumbo frames */
823			max_frame_size = ETHER_MAX_LEN;
824			break;
825		default:
826			max_frame_size = MAX_JUMBO_FRAME_SIZE;
827		}
828		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
829		    ETHER_CRC_LEN) {
830			EM_UNLOCK(adapter);
831			error = EINVAL;
832			break;
833		}
834
835		ifp->if_mtu = ifr->ifr_mtu;
836		adapter->hw.max_frame_size =
837		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
838		em_init_locked(adapter);
839		EM_UNLOCK(adapter);
840		break;
841	    }
842	case SIOCSIFFLAGS:
843		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
844		EM_LOCK(adapter);
845		if (ifp->if_flags & IFF_UP) {
846			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
847				if ((ifp->if_flags ^ adapter->if_flags) &
848				    IFF_PROMISC) {
849					em_disable_promisc(adapter);
850					em_set_promisc(adapter);
851				}
852			} else
853				em_init_locked(adapter);
854		} else {
855			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
856				em_stop(adapter);
857			}
858		}
859		adapter->if_flags = ifp->if_flags;
860		EM_UNLOCK(adapter);
861		break;
862	case SIOCADDMULTI:
863	case SIOCDELMULTI:
864		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
865		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
866			EM_LOCK(adapter);
867			em_disable_intr(adapter);
868			em_set_multi(adapter);
869			if (adapter->hw.mac_type == em_82542_rev2_0) {
870				em_initialize_receive_unit(adapter);
871			}
872#ifdef DEVICE_POLLING
873			if (!(ifp->if_capenable & IFCAP_POLLING))
874#endif
875				em_enable_intr(adapter);
876			EM_UNLOCK(adapter);
877		}
878		break;
879	case SIOCSIFMEDIA:
880	case SIOCGIFMEDIA:
881		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
882		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
883		break;
884	case SIOCSIFCAP:
885	    {
886		int mask, reinit;
887
888		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
889		reinit = 0;
890		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
891#ifdef DEVICE_POLLING
892		if (mask & IFCAP_POLLING) {
893			if (ifr->ifr_reqcap & IFCAP_POLLING) {
894				error = ether_poll_register(em_poll, ifp);
895				if (error)
896					return (error);
897				EM_LOCK(adapter);
898				em_disable_intr(adapter);
899				ifp->if_capenable |= IFCAP_POLLING;
900				EM_UNLOCK(adapter);
901			} else {
902				error = ether_poll_deregister(ifp);
903				/* Enable interrupt even in error case */
904				EM_LOCK(adapter);
905				em_enable_intr(adapter);
906				ifp->if_capenable &= ~IFCAP_POLLING;
907				EM_UNLOCK(adapter);
908			}
909		}
910#endif
911		if (mask & IFCAP_HWCSUM) {
912			ifp->if_capenable ^= IFCAP_HWCSUM;
913			reinit = 1;
914		}
915		if (mask & IFCAP_TSO4) {
916			ifp->if_capenable ^= IFCAP_TSO4;
917			reinit = 1;
918		}
919		if (mask & IFCAP_VLAN_HWTAGGING) {
920			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
921			reinit = 1;
922		}
923		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
924			em_init(adapter);
925		VLAN_CAPABILITIES(ifp);
926		break;
927	    }
928	default:
929		error = ether_ioctl(ifp, command, data);
930		break;
931	}
932
933	return (error);
934}
935
936/*********************************************************************
937 *  Watchdog entry point
938 *
939 *  This routine is called whenever hardware quits transmitting.
940 *
941 **********************************************************************/
942
943static void
944em_watchdog(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	/* If we are in this routine because of pause frames, then
950	 * don't reset the hardware.
951	 */
952	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
953		ifp->if_timer = EM_TX_TIMEOUT;
954		EM_UNLOCK(adapter);
955		return;
956	}
957
958	/*
959	 * Reclaim first as there is a possibility of losing Tx completion
960	 * interrupts. Possible cause of missing Tx completion interrupts
961	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
962	 * or chipset bug.
963	 */
964	em_txeof(adapter);
965	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
966		EM_UNLOCK(adapter);
967		return;
968	}
969
970	if (em_check_for_link(&adapter->hw) == 0)
971		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
972
973	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
974	adapter->watchdog_events++;
975
976	em_init_locked(adapter);
977	EM_UNLOCK(adapter);
978}
979
980/*********************************************************************
981 *  Init entry point
982 *
983 *  This routine is used in two ways. It is used by the stack as
984 *  init entry point in network interface structure. It is also used
985 *  by the driver as a hw/sw initialization routine to get to a
986 *  consistent state.
987 *
988 *  return 0 on success, positive on failure
989 **********************************************************************/
990
991static void
992em_init_locked(struct adapter *adapter)
993{
994	struct ifnet	*ifp = adapter->ifp;
995	device_t	dev = adapter->dev;
996	uint32_t	pba;
997
998	INIT_DEBUGOUT("em_init: begin");
999
1000	EM_LOCK_ASSERT(adapter);
1001
1002	em_stop(adapter);
1003
1004	/*
1005	 * Packet Buffer Allocation (PBA)
1006	 * Writing PBA sets the receive portion of the buffer
1007	 * the remainder is used for the transmit buffer.
1008	 *
1009	 * Devices before the 82547 had a Packet Buffer of 64K.
1010	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1011	 * After the 82547 the buffer was reduced to 40K.
1012	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1013	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1014	 */
1015	switch (adapter->hw.mac_type) {
1016	case em_82547:
1017	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1018		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1019			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1020		else
1021			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1022		adapter->tx_fifo_head = 0;
1023		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1024		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1025		break;
1026	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1027	case em_82571: /* 82571: Total Packet Buffer is 48K */
1028	case em_82572: /* 82572: Total Packet Buffer is 48K */
1029			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1030		break;
1031	case em_82573: /* 82573: Total Packet Buffer is 32K */
1032		/* Jumbo frames not supported */
1033			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1034		break;
1035	case em_ich8lan:
1036		pba = E1000_PBA_8K;
1037		break;
1038	default:
1039		/* Devices before 82547 had a Packet Buffer of 64K.   */
1040		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1041			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1042		else
1043			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1044	}
1045
1046	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1047	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1048
1049	/* Get the latest mac address, User can use a LAA */
1050	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1051
1052	/* Initialize the hardware */
1053	if (em_hardware_init(adapter)) {
1054		device_printf(dev, "Unable to initialize the hardware\n");
1055		return;
1056	}
1057	em_update_link_status(adapter);
1058
1059	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1060		em_enable_vlans(adapter);
1061
1062	ifp->if_hwassist = 0;
1063	if (adapter->hw.mac_type >= em_82543) {
1064		if (ifp->if_capenable & IFCAP_TXCSUM)
1065			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1066		/*
1067		 * em_setup_transmit_structures() will behave differently
1068		 * based on the state of TSO.
1069		 */
1070		if (ifp->if_capenable & IFCAP_TSO)
1071			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1072	}
1073
1074	/* Prepare transmit descriptors and buffers */
1075	if (em_setup_transmit_structures(adapter)) {
1076		device_printf(dev, "Could not setup transmit structures\n");
1077		em_stop(adapter);
1078		return;
1079	}
1080	em_initialize_transmit_unit(adapter);
1081
1082	/* Setup Multicast table */
1083	em_set_multi(adapter);
1084
1085	/* Prepare receive descriptors and buffers */
1086	if (em_setup_receive_structures(adapter)) {
1087		device_printf(dev, "Could not setup receive structures\n");
1088		em_stop(adapter);
1089		return;
1090	}
1091	em_initialize_receive_unit(adapter);
1092
1093	/* Don't lose promiscuous settings */
1094	em_set_promisc(adapter);
1095
1096	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1097	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1098
1099	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1100	em_clear_hw_cntrs(&adapter->hw);
1101#ifdef DEVICE_POLLING
1102	/*
1103	 * Only enable interrupts if we are not polling, make sure
1104	 * they are off otherwise.
1105	 */
1106	if (ifp->if_capenable & IFCAP_POLLING)
1107		em_disable_intr(adapter);
1108	else
1109#endif /* DEVICE_POLLING */
1110		em_enable_intr(adapter);
1111
1112	/* Don't reset the phy next time init gets called */
1113	adapter->hw.phy_reset_disable = TRUE;
1114}
1115
1116static void
1117em_init(void *arg)
1118{
1119	struct adapter *adapter = arg;
1120
1121	EM_LOCK(adapter);
1122	em_init_locked(adapter);
1123	EM_UNLOCK(adapter);
1124}
1125
1126
1127#ifdef DEVICE_POLLING
1128/*********************************************************************
1129 *
1130 *  Legacy polling routine
1131 *
1132 *********************************************************************/
1133static void
1134em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1135{
1136	struct adapter *adapter = ifp->if_softc;
1137	uint32_t reg_icr;
1138
1139	EM_LOCK(adapter);
1140	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1141		EM_UNLOCK(adapter);
1142		return;
1143	}
1144
1145	if (cmd == POLL_AND_CHECK_STATUS) {
1146		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1147		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1148			callout_stop(&adapter->timer);
1149			adapter->hw.get_link_status = 1;
1150			em_check_for_link(&adapter->hw);
1151			em_update_link_status(adapter);
1152			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1153		}
1154	}
1155	em_rxeof(adapter, count);
1156	em_txeof(adapter);
1157
1158	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1159		em_start_locked(ifp);
1160	EM_UNLOCK(adapter);
1161}
1162
1163/*********************************************************************
1164 *
1165 *  Legacy Interrupt Service routine
1166 *
1167 *********************************************************************/
1168static void
1169em_intr(void *arg)
1170{
1171	struct adapter	*adapter = arg;
1172	struct ifnet	*ifp;
1173	uint32_t	reg_icr;
1174
1175	EM_LOCK(adapter);
1176
1177	ifp = adapter->ifp;
1178
1179	if (ifp->if_capenable & IFCAP_POLLING) {
1180		EM_UNLOCK(adapter);
1181		return;
1182	}
1183
1184	for (;;) {
1185		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1186		if (adapter->hw.mac_type >= em_82571 &&
1187		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1188			break;
1189		else if (reg_icr == 0)
1190			break;
1191
1192		/*
1193		 * XXX: some laptops trigger several spurious interrupts
1194		 * on em(4) when in the resume cycle. The ICR register
1195		 * reports all-ones value in this case. Processing such
1196		 * interrupts would lead to a freeze. I don't know why.
1197		 */
1198		if (reg_icr == 0xffffffff)
1199			break;
1200
1201		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1202			em_rxeof(adapter, -1);
1203			em_txeof(adapter);
1204		}
1205
1206		/* Link status change */
1207		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1208			callout_stop(&adapter->timer);
1209			adapter->hw.get_link_status = 1;
1210			em_check_for_link(&adapter->hw);
1211			em_update_link_status(adapter);
1212			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1213		}
1214
1215		if (reg_icr & E1000_ICR_RXO)
1216			adapter->rx_overruns++;
1217	}
1218
1219	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1220	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1221		em_start_locked(ifp);
1222
1223	EM_UNLOCK(adapter);
1224}
1225
1226#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1227
1228static void
1229em_handle_link(void *context, int pending)
1230{
1231	struct adapter	*adapter = context;
1232	struct ifnet *ifp;
1233
1234	ifp = adapter->ifp;
1235
1236	EM_LOCK(adapter);
1237
1238	callout_stop(&adapter->timer);
1239	adapter->hw.get_link_status = 1;
1240	em_check_for_link(&adapter->hw);
1241	em_update_link_status(adapter);
1242	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1243	EM_UNLOCK(adapter);
1244}
1245
1246static void
1247em_handle_rxtx(void *context, int pending)
1248{
1249	struct adapter	*adapter = context;
1250	struct ifnet	*ifp;
1251
1252	NET_LOCK_GIANT();
1253	ifp = adapter->ifp;
1254
1255	/*
1256	 * TODO:
1257	 * It should be possible to run the tx clean loop without the lock.
1258	 */
1259	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1260		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1261			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1262		EM_LOCK(adapter);
1263		em_txeof(adapter);
1264
1265		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1266			em_start_locked(ifp);
1267		EM_UNLOCK(adapter);
1268	}
1269
1270	em_enable_intr(adapter);
1271	NET_UNLOCK_GIANT();
1272}
1273
1274/*********************************************************************
1275 *
1276 *  Fast Interrupt Service routine
1277 *
1278 *********************************************************************/
1279static void
1280em_intr_fast(void *arg)
1281{
1282	struct adapter	*adapter = arg;
1283	struct ifnet	*ifp;
1284	uint32_t	reg_icr;
1285
1286	ifp = adapter->ifp;
1287
1288	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1289
1290	/* Hot eject?  */
1291	if (reg_icr == 0xffffffff)
1292		return;
1293
1294	/* Definitely not our interrupt.  */
1295	if (reg_icr == 0x0)
1296		return;
1297
1298	/*
1299	 * Starting with the 82571 chip, bit 31 should be used to
1300	 * determine whether the interrupt belongs to us.
1301	 */
1302	if (adapter->hw.mac_type >= em_82571 &&
1303	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1304		return;
1305
1306	/*
1307	 * Mask interrupts until the taskqueue is finished running.  This is
1308	 * cheap, just assume that it is needed.  This also works around the
1309	 * MSI message reordering errata on certain systems.
1310	 */
1311	em_disable_intr(adapter);
1312	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1313
1314	/* Link status change */
1315	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1316		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1317
1318	if (reg_icr & E1000_ICR_RXO)
1319		adapter->rx_overruns++;
1320}
1321#endif /* ! DEVICE_POLLING */
1322
1323/*********************************************************************
1324 *
1325 *  Media Ioctl callback
1326 *
1327 *  This routine is called whenever the user queries the status of
1328 *  the interface using ifconfig.
1329 *
1330 **********************************************************************/
1331static void
1332em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1333{
1334	struct adapter *adapter = ifp->if_softc;
1335
1336	INIT_DEBUGOUT("em_media_status: begin");
1337
1338	EM_LOCK(adapter);
1339	em_check_for_link(&adapter->hw);
1340	em_update_link_status(adapter);
1341
1342	ifmr->ifm_status = IFM_AVALID;
1343	ifmr->ifm_active = IFM_ETHER;
1344
1345	if (!adapter->link_active) {
1346		EM_UNLOCK(adapter);
1347		return;
1348	}
1349
1350	ifmr->ifm_status |= IFM_ACTIVE;
1351
1352	if ((adapter->hw.media_type == em_media_type_fiber) ||
1353	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1354		if (adapter->hw.mac_type == em_82545)
1355			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1356		else
1357			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1358	} else {
1359		switch (adapter->link_speed) {
1360		case 10:
1361			ifmr->ifm_active |= IFM_10_T;
1362			break;
1363		case 100:
1364			ifmr->ifm_active |= IFM_100_TX;
1365			break;
1366		case 1000:
1367			ifmr->ifm_active |= IFM_1000_T;
1368			break;
1369		}
1370		if (adapter->link_duplex == FULL_DUPLEX)
1371			ifmr->ifm_active |= IFM_FDX;
1372		else
1373			ifmr->ifm_active |= IFM_HDX;
1374	}
1375	EM_UNLOCK(adapter);
1376}
1377
1378/*********************************************************************
1379 *
1380 *  Media Ioctl callback
1381 *
1382 *  This routine is called when the user changes speed/duplex using
1383 *  media/mediopt option with ifconfig.
1384 *
1385 **********************************************************************/
1386static int
1387em_media_change(struct ifnet *ifp)
1388{
1389	struct adapter *adapter = ifp->if_softc;
1390	struct ifmedia  *ifm = &adapter->media;
1391
1392	INIT_DEBUGOUT("em_media_change: begin");
1393
1394	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1395		return (EINVAL);
1396
1397	EM_LOCK(adapter);
1398	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1399	case IFM_AUTO:
1400		adapter->hw.autoneg = DO_AUTO_NEG;
1401		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1402		break;
1403	case IFM_1000_LX:
1404	case IFM_1000_SX:
1405	case IFM_1000_T:
1406		adapter->hw.autoneg = DO_AUTO_NEG;
1407		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1408		break;
1409	case IFM_100_TX:
1410		adapter->hw.autoneg = FALSE;
1411		adapter->hw.autoneg_advertised = 0;
1412		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1413			adapter->hw.forced_speed_duplex = em_100_full;
1414		else
1415			adapter->hw.forced_speed_duplex = em_100_half;
1416		break;
1417	case IFM_10_T:
1418		adapter->hw.autoneg = FALSE;
1419		adapter->hw.autoneg_advertised = 0;
1420		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1421			adapter->hw.forced_speed_duplex = em_10_full;
1422		else
1423			adapter->hw.forced_speed_duplex = em_10_half;
1424		break;
1425	default:
1426		device_printf(adapter->dev, "Unsupported media type\n");
1427	}
1428
1429	/* As the speed/duplex settings my have changed we need to
1430	 * reset the PHY.
1431	 */
1432	adapter->hw.phy_reset_disable = FALSE;
1433
1434	em_init_locked(adapter);
1435	EM_UNLOCK(adapter);
1436
1437	return (0);
1438}
1439
1440/*********************************************************************
1441 *
1442 *  This routine maps the mbufs to tx descriptors.
1443 *
1444 *  return 0 on success, positive on failure
1445 **********************************************************************/
1446static int
1447em_encap(struct adapter *adapter, struct mbuf **m_headp)
1448{
1449	struct ifnet		*ifp = adapter->ifp;
1450	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1451	bus_dmamap_t		map;
1452	struct em_buffer	*tx_buffer, *tx_buffer_last;
1453	struct em_tx_desc	*current_tx_desc;
1454	struct mbuf		*m_head;
1455	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1456	int			nsegs, i, j;
1457	int			error, do_tso, tso_desc = 0;
1458
1459	m_head = *m_headp;
1460	current_tx_desc = NULL;
1461	txd_upper = txd_lower = txd_used = txd_saved = 0;
1462
1463	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1464
1465	/*
1466	 * Force a cleanup if number of TX descriptors
1467	 * available hits the threshold.
1468	 */
1469	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1470		em_txeof(adapter);
1471		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1472			adapter->no_tx_desc_avail1++;
1473			return (ENOBUFS);
1474		}
1475	}
1476
1477	/*
1478	 * When operating in promiscuous mode, hardware stripping of the
1479	 * VLAN tag on receive is disabled.  This should not prevent us
1480	 * from doing hardware insertion of the VLAN tag here as that
1481	 * is controlled by the dma descriptor flags and not the receive
1482	 * tag strip setting.  Unfortunatly this hardware switches the
1483	 * VLAN encapsulation type from 802.1q to ISL when stripping om
1484	 * receive is disabled.  This means we have to add the vlan
1485	 * encapsulation here in the driver, since it will have come down
1486	 * from the VLAN layer with a tag instead of a VLAN header.
1487	 */
1488	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1489		struct ether_vlan_header *evl;
1490		struct ether_header eh;
1491
1492		m_head = m_pullup(m_head, sizeof(eh));
1493		if (m_head == NULL) {
1494			*m_headp = NULL;
1495			return (ENOBUFS);
1496		}
1497		eh = *mtod(m_head, struct ether_header *);
1498		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1499		if (m_head == NULL) {
1500			*m_headp = NULL;
1501			return (ENOBUFS);
1502		}
1503		m_head = m_pullup(m_head, sizeof(*evl));
1504		if (m_head == NULL) {
1505			*m_headp = NULL;
1506			return (ENOBUFS);
1507		}
1508		evl = mtod(m_head, struct ether_vlan_header *);
1509		bcopy(&eh, evl, sizeof(*evl));
1510		evl->evl_proto = evl->evl_encap_proto;
1511		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1512		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1513		*m_headp = m_head;
1514	}
1515
1516	/*
1517	 * TSO workaround:
1518	 *  If an mbuf contains only the IP and TCP header we have
1519	 *  to pull 4 bytes of data into it.
1520	 */
1521	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1522		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1523		*m_headp = m_head;
1524		if (m_head == NULL) {
1525			return (ENOBUFS);
1526		}
1527	}
1528
1529	/*
1530	 * Map the packet for DMA.
1531	 */
1532	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1533	tx_buffer_last = tx_buffer;
1534	map = tx_buffer->map;
1535
1536	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1537	    &nsegs, BUS_DMA_NOWAIT);
1538
1539	/*
1540	 * There are two types of errors we can (try) to handle:
1541	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1542	 *   out of segments.  Defragment the mbuf chain and try again.
1543	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1544	 *   at this point in time.  Defer sending and try again later.
1545	 * All other errors, in particular EINVAL, are fatal and prevent the
1546	 * mbuf chain from ever going through.  Drop it and report error.
1547	 */
1548	if (error == EFBIG) {
1549		struct mbuf *m;
1550
1551		m = m_defrag(*m_headp, M_DONTWAIT);
1552		if (m == NULL) {
1553			/* Assume m_defrag(9) used only m_get(9). */
1554			adapter->mbuf_alloc_failed++;
1555			m_freem(*m_headp);
1556			*m_headp = NULL;
1557			return (ENOBUFS);
1558		}
1559		*m_headp = m;
1560
1561		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1562		    segs, &nsegs, BUS_DMA_NOWAIT);
1563
1564		if (error == ENOMEM) {
1565			adapter->no_tx_dma_setup++;
1566			return (error);
1567		} else if (error != 0) {
1568			adapter->no_tx_dma_setup++;
1569			m_freem(*m_headp);
1570			*m_headp = NULL;
1571			return (error);
1572		}
1573	} else if (error == ENOMEM) {
1574		adapter->no_tx_dma_setup++;
1575		return (error);
1576	} else if (error != 0) {
1577		adapter->no_tx_dma_setup++;
1578		m_freem(*m_headp);
1579		*m_headp = NULL;
1580		return (error);
1581	}
1582
1583	/*
1584	 * TSO Hardware workaround, if this packet is not
1585	 * TSO, and is only a single descriptor long, and
1586	 * it follows a TSO burst, then we need to add a
1587	 * sentinel descriptor to prevent premature writeback.
1588	 */
1589	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1590		if (nsegs == 1)
1591			tso_desc = TRUE;
1592		adapter->tx_tso = FALSE;
1593	}
1594
1595	if (nsegs > adapter->num_tx_desc_avail - 2) {
1596		adapter->no_tx_desc_avail2++;
1597		bus_dmamap_unload(adapter->txtag, map);
1598		return (ENOBUFS);
1599	}
1600	m_head = *m_headp;
1601
1602	/* Do hardware assists */
1603	if (ifp->if_hwassist) {
1604		if (do_tso &&
1605		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1606			/* we need to make a final sentinel transmit desc */
1607			tso_desc = TRUE;
1608		} else
1609			em_transmit_checksum_setup(adapter,  m_head,
1610			    &txd_upper, &txd_lower);
1611	}
1612
1613	i = adapter->next_avail_tx_desc;
1614	if (adapter->pcix_82544)
1615		txd_saved = i;
1616
1617	for (j = 0; j < nsegs; j++) {
1618		bus_size_t seg_len;
1619		bus_addr_t seg_addr;
1620		/* If adapter is 82544 and on PCIX bus. */
1621		if(adapter->pcix_82544) {
1622			DESC_ARRAY	desc_array;
1623			uint32_t	array_elements, counter;
1624
1625			/*
1626			 * Check the Address and Length combination and
1627			 * split the data accordingly
1628			 */
1629			array_elements = em_fill_descriptors(segs[j].ds_addr,
1630			    segs[j].ds_len, &desc_array);
1631			for (counter = 0; counter < array_elements; counter++) {
1632				if (txd_used == adapter->num_tx_desc_avail) {
1633					adapter->next_avail_tx_desc = txd_saved;
1634					adapter->no_tx_desc_avail2++;
1635					bus_dmamap_unload(adapter->txtag, map);
1636					return (ENOBUFS);
1637				}
1638				tx_buffer = &adapter->tx_buffer_area[i];
1639				current_tx_desc = &adapter->tx_desc_base[i];
1640				current_tx_desc->buffer_addr = htole64(
1641					desc_array.descriptor[counter].address);
1642				current_tx_desc->lower.data = htole32(
1643					(adapter->txd_cmd | txd_lower |
1644					(uint16_t)desc_array.descriptor[counter].length));
1645				current_tx_desc->upper.data = htole32((txd_upper));
1646				if (++i == adapter->num_tx_desc)
1647					i = 0;
1648
1649				tx_buffer->m_head = NULL;
1650				txd_used++;
1651			}
1652		} else {
1653			tx_buffer = &adapter->tx_buffer_area[i];
1654			current_tx_desc = &adapter->tx_desc_base[i];
1655			seg_addr = htole64(segs[j].ds_addr);
1656			seg_len  = segs[j].ds_len;
1657			/*
1658			** TSO Workaround:
1659			** If this is the last descriptor, we want to
1660			** split it so we have a small final sentinel
1661			*/
1662			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1663				seg_len -= 4;
1664				current_tx_desc->buffer_addr = seg_addr;
1665				current_tx_desc->lower.data = htole32(
1666				adapter->txd_cmd | txd_lower | seg_len);
1667				current_tx_desc->upper.data =
1668				    htole32(txd_upper);
1669				if (++i == adapter->num_tx_desc)
1670					i = 0;
1671				/* Now make the sentinel */
1672				++txd_used; /* using an extra txd */
1673				current_tx_desc = &adapter->tx_desc_base[i];
1674				tx_buffer = &adapter->tx_buffer_area[i];
1675				current_tx_desc->buffer_addr =
1676				    seg_addr + seg_len;
1677				current_tx_desc->lower.data = htole32(
1678				adapter->txd_cmd | txd_lower | 4);
1679				current_tx_desc->upper.data =
1680				    htole32(txd_upper);
1681				if (++i == adapter->num_tx_desc)
1682					i = 0;
1683			} else {
1684				current_tx_desc->buffer_addr = seg_addr;
1685				current_tx_desc->lower.data = htole32(
1686				adapter->txd_cmd | txd_lower | seg_len);
1687				current_tx_desc->upper.data =
1688				    htole32(txd_upper);
1689				if (++i == adapter->num_tx_desc)
1690					i = 0;
1691			}
1692			tx_buffer->m_head = NULL;
1693		}
1694	}
1695
1696	adapter->next_avail_tx_desc = i;
1697	if (adapter->pcix_82544)
1698		adapter->num_tx_desc_avail -= txd_used;
1699	else {
1700		adapter->num_tx_desc_avail -= nsegs;
1701		if (tso_desc) /* TSO used an extra for sentinel */
1702			adapter->num_tx_desc_avail -= txd_used;
1703	}
1704
1705	if (m_head->m_flags & M_VLANTAG) {
1706		/* Set the vlan id. */
1707		current_tx_desc->upper.fields.special =
1708		    htole16(m_head->m_pkthdr.ether_vtag);
1709
1710		/* Tell hardware to add tag. */
1711		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1712	}
1713
1714	tx_buffer->m_head = m_head;
1715	tx_buffer_last->map = tx_buffer->map;
1716	tx_buffer->map = map;
1717	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1718
1719	/*
1720	 * Last Descriptor of Packet needs End Of Packet (EOP).
1721	 */
1722	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1723
1724	/*
1725	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1726	 * that this frame is available to transmit.
1727	 */
1728	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1729	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1730
1731	if (adapter->hw.mac_type == em_82547 &&
1732	    adapter->link_duplex == HALF_DUPLEX)
1733		em_82547_move_tail_locked(adapter);
1734	else {
1735		E1000_WRITE_REG(&adapter->hw, TDT, i);
1736		if (adapter->hw.mac_type == em_82547)
1737			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1738	}
1739
1740	return (0);
1741}
1742
1743/*********************************************************************
1744 *
1745 * 82547 workaround to avoid controller hang in half-duplex environment.
1746 * The workaround is to avoid queuing a large packet that would span
1747 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1748 * in this case. We do that only when FIFO is quiescent.
1749 *
1750 **********************************************************************/
1751static void
1752em_82547_move_tail_locked(struct adapter *adapter)
1753{
1754	uint16_t hw_tdt;
1755	uint16_t sw_tdt;
1756	struct em_tx_desc *tx_desc;
1757	uint16_t length = 0;
1758	boolean_t eop = 0;
1759
1760	EM_LOCK_ASSERT(adapter);
1761
1762	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1763	sw_tdt = adapter->next_avail_tx_desc;
1764
1765	while (hw_tdt != sw_tdt) {
1766		tx_desc = &adapter->tx_desc_base[hw_tdt];
1767		length += tx_desc->lower.flags.length;
1768		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1769		if(++hw_tdt == adapter->num_tx_desc)
1770			hw_tdt = 0;
1771
1772		if (eop) {
1773			if (em_82547_fifo_workaround(adapter, length)) {
1774				adapter->tx_fifo_wrk_cnt++;
1775				callout_reset(&adapter->tx_fifo_timer, 1,
1776					em_82547_move_tail, adapter);
1777				break;
1778			}
1779			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1780			em_82547_update_fifo_head(adapter, length);
1781			length = 0;
1782		}
1783	}
1784}
1785
1786static void
1787em_82547_move_tail(void *arg)
1788{
1789	struct adapter *adapter = arg;
1790
1791	EM_LOCK(adapter);
1792	em_82547_move_tail_locked(adapter);
1793	EM_UNLOCK(adapter);
1794}
1795
1796static int
1797em_82547_fifo_workaround(struct adapter *adapter, int len)
1798{
1799	int fifo_space, fifo_pkt_len;
1800
1801	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1802
1803	if (adapter->link_duplex == HALF_DUPLEX) {
1804		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1805
1806		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1807			if (em_82547_tx_fifo_reset(adapter))
1808				return (0);
1809			else
1810				return (1);
1811		}
1812	}
1813
1814	return (0);
1815}
1816
1817static void
1818em_82547_update_fifo_head(struct adapter *adapter, int len)
1819{
1820	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1821
1822	/* tx_fifo_head is always 16 byte aligned */
1823	adapter->tx_fifo_head += fifo_pkt_len;
1824	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1825		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1826	}
1827}
1828
1829
1830static int
1831em_82547_tx_fifo_reset(struct adapter *adapter)
1832{
1833	uint32_t tctl;
1834
1835	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1836	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1837	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1838	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1839
1840		/* Disable TX unit */
1841		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1842		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1843
1844		/* Reset FIFO pointers */
1845		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1846		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1847		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1848		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1849
1850		/* Re-enable TX unit */
1851		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1852		E1000_WRITE_FLUSH(&adapter->hw);
1853
1854		adapter->tx_fifo_head = 0;
1855		adapter->tx_fifo_reset_cnt++;
1856
1857		return (TRUE);
1858	}
1859	else {
1860		return (FALSE);
1861	}
1862}
1863
1864static void
1865em_set_promisc(struct adapter *adapter)
1866{
1867	struct ifnet	*ifp = adapter->ifp;
1868	uint32_t	reg_rctl;
1869
1870	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1871
1872	if (ifp->if_flags & IFF_PROMISC) {
1873		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1874		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1875		/*
1876		 * Disable VLAN stripping in promiscous mode.
1877		 * This enables bridging of vlan tagged frames to occur
1878		 * and also allows vlan tags to be seen in tcpdump.
1879		 * XXX: This is a bit bogus as tcpdump may be used
1880		 * w/o promisc mode as well.
1881		 */
1882		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1883			em_disable_vlans(adapter);
1884		adapter->em_insert_vlan_header = 1;
1885	} else if (ifp->if_flags & IFF_ALLMULTI) {
1886		reg_rctl |= E1000_RCTL_MPE;
1887		reg_rctl &= ~E1000_RCTL_UPE;
1888		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1889		adapter->em_insert_vlan_header = 0;
1890	} else
1891		adapter->em_insert_vlan_header = 0;
1892}
1893
1894static void
1895em_disable_promisc(struct adapter *adapter)
1896{
1897	struct ifnet	*ifp = adapter->ifp;
1898	uint32_t	reg_rctl;
1899
1900	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1901
1902	reg_rctl &=  (~E1000_RCTL_UPE);
1903	reg_rctl &=  (~E1000_RCTL_MPE);
1904	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1905
1906	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1907		em_enable_vlans(adapter);
1908	adapter->em_insert_vlan_header = 0;
1909}
1910
1911
1912/*********************************************************************
1913 *  Multicast Update
1914 *
1915 *  This routine is called whenever multicast address list is updated.
1916 *
1917 **********************************************************************/
1918
1919static void
1920em_set_multi(struct adapter *adapter)
1921{
1922	struct ifnet	*ifp = adapter->ifp;
1923	struct ifmultiaddr *ifma;
1924	uint32_t reg_rctl = 0;
1925	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1926	int mcnt = 0;
1927
1928	IOCTL_DEBUGOUT("em_set_multi: begin");
1929
1930	if (adapter->hw.mac_type == em_82542_rev2_0) {
1931		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1932		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1933			em_pci_clear_mwi(&adapter->hw);
1934		reg_rctl |= E1000_RCTL_RST;
1935		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1936		msec_delay(5);
1937	}
1938
1939	IF_ADDR_LOCK(ifp);
1940	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1941		if (ifma->ifma_addr->sa_family != AF_LINK)
1942			continue;
1943
1944		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1945			break;
1946
1947		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1948		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1949		mcnt++;
1950	}
1951	IF_ADDR_UNLOCK(ifp);
1952
1953	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1954		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1955		reg_rctl |= E1000_RCTL_MPE;
1956		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1957	} else
1958		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1959
1960	if (adapter->hw.mac_type == em_82542_rev2_0) {
1961		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1962		reg_rctl &= ~E1000_RCTL_RST;
1963		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1964		msec_delay(5);
1965		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1966			em_pci_set_mwi(&adapter->hw);
1967	}
1968}
1969
1970
1971/*********************************************************************
1972 *  Timer routine
1973 *
1974 *  This routine checks for link status and updates statistics.
1975 *
1976 **********************************************************************/
1977
1978static void
1979em_local_timer(void *arg)
1980{
1981	struct adapter	*adapter = arg;
1982	struct ifnet	*ifp = adapter->ifp;
1983
1984	EM_LOCK(adapter);
1985
1986	em_check_for_link(&adapter->hw);
1987	em_update_link_status(adapter);
1988	em_update_stats_counters(adapter);
1989	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1990		em_print_hw_stats(adapter);
1991	em_smartspeed(adapter);
1992
1993	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1994
1995	EM_UNLOCK(adapter);
1996}
1997
1998static void
1999em_update_link_status(struct adapter *adapter)
2000{
2001	struct ifnet *ifp = adapter->ifp;
2002	device_t dev = adapter->dev;
2003
2004	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
2005		if (adapter->link_active == 0) {
2006			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
2007			    &adapter->link_duplex);
2008			/* Check if we may set SPEED_MODE bit on PCI-E */
2009			if ((adapter->link_speed == SPEED_1000) &&
2010			    ((adapter->hw.mac_type == em_82571) ||
2011			    (adapter->hw.mac_type == em_82572))) {
2012				int tarc0;
2013
2014				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2015				tarc0 |= SPEED_MODE_BIT;
2016				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2017			}
2018			if (bootverbose)
2019				device_printf(dev, "Link is up %d Mbps %s\n",
2020				    adapter->link_speed,
2021				    ((adapter->link_duplex == FULL_DUPLEX) ?
2022				    "Full Duplex" : "Half Duplex"));
2023			adapter->link_active = 1;
2024			adapter->smartspeed = 0;
2025			ifp->if_baudrate = adapter->link_speed * 1000000;
2026			if_link_state_change(ifp, LINK_STATE_UP);
2027		}
2028	} else {
2029		if (adapter->link_active == 1) {
2030			ifp->if_baudrate = adapter->link_speed = 0;
2031			adapter->link_duplex = 0;
2032			if (bootverbose)
2033				device_printf(dev, "Link is Down\n");
2034			adapter->link_active = 0;
2035			if_link_state_change(ifp, LINK_STATE_DOWN);
2036		}
2037	}
2038}
2039
2040/*********************************************************************
2041 *
2042 *  This routine disables all traffic on the adapter by issuing a
2043 *  global reset on the MAC and deallocates TX/RX buffers.
2044 *
2045 **********************************************************************/
2046
2047static void
2048em_stop(void *arg)
2049{
2050	struct adapter	*adapter = arg;
2051	struct ifnet	*ifp = adapter->ifp;
2052
2053	EM_LOCK_ASSERT(adapter);
2054
2055	INIT_DEBUGOUT("em_stop: begin");
2056
2057	em_disable_intr(adapter);
2058	em_reset_hw(&adapter->hw);
2059	callout_stop(&adapter->timer);
2060	callout_stop(&adapter->tx_fifo_timer);
2061	em_free_transmit_structures(adapter);
2062	em_free_receive_structures(adapter);
2063
2064	/* Tell the stack that the interface is no longer active */
2065	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2066}
2067
2068
2069/********************************************************************
2070 *
2071 *  Determine hardware revision.
2072 *
2073 **********************************************************************/
2074static void
2075em_identify_hardware(struct adapter *adapter)
2076{
2077	device_t dev = adapter->dev;
2078
2079	/* Make sure our PCI config space has the necessary stuff set */
2080	pci_enable_busmaster(dev);
2081	pci_enable_io(dev, SYS_RES_MEMORY);
2082	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2083
2084	/* Save off the information about this board */
2085	adapter->hw.vendor_id = pci_get_vendor(dev);
2086	adapter->hw.device_id = pci_get_device(dev);
2087	adapter->hw.revision_id = pci_get_revid(dev);
2088	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2089	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2090
2091	/* Identify the MAC */
2092	if (em_set_mac_type(&adapter->hw))
2093		device_printf(dev, "Unknown MAC Type\n");
2094
2095	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2096	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2097		adapter->hw.phy_init_script = TRUE;
2098}
2099
2100static int
2101em_allocate_pci_resources(struct adapter *adapter)
2102{
2103	device_t	dev = adapter->dev;
2104	int		val, rid;
2105
2106	rid = PCIR_BAR(0);
2107	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2108	    &rid, RF_ACTIVE);
2109	if (adapter->res_memory == NULL) {
2110		device_printf(dev, "Unable to allocate bus resource: memory\n");
2111		return (ENXIO);
2112	}
2113	adapter->osdep.mem_bus_space_tag =
2114	rman_get_bustag(adapter->res_memory);
2115	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2116	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2117
2118	if (adapter->hw.mac_type > em_82543) {
2119		/* Figure our where our IO BAR is ? */
2120		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2121			val = pci_read_config(dev, rid, 4);
2122			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2123				adapter->io_rid = rid;
2124				break;
2125			}
2126			rid += 4;
2127			/* check for 64bit BAR */
2128			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2129				rid += 4;
2130		}
2131		if (rid >= PCIR_CIS) {
2132			device_printf(dev, "Unable to locate IO BAR\n");
2133			return (ENXIO);
2134		}
2135		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2136		    &adapter->io_rid, RF_ACTIVE);
2137		if (adapter->res_ioport == NULL) {
2138			device_printf(dev, "Unable to allocate bus resource: "
2139			    "ioport\n");
2140			return (ENXIO);
2141		}
2142		adapter->hw.io_base = 0;
2143		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2144		adapter->osdep.io_bus_space_handle =
2145		    rman_get_bushandle(adapter->res_ioport);
2146	}
2147
2148	/* For ICH8 we need to find the flash memory. */
2149	if (adapter->hw.mac_type == em_ich8lan) {
2150		rid = EM_FLASH;
2151
2152		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2153		    &rid, RF_ACTIVE);
2154		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2155		adapter->osdep.flash_bus_space_handle =
2156		    rman_get_bushandle(adapter->flash_mem);
2157	}
2158
2159	rid = 0x0;
2160	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2161	    RF_SHAREABLE | RF_ACTIVE);
2162	if (adapter->res_interrupt == NULL) {
2163		device_printf(dev, "Unable to allocate bus resource: "
2164		    "interrupt\n");
2165		return (ENXIO);
2166	}
2167
2168	adapter->hw.back = &adapter->osdep;
2169
2170	return (0);
2171}
2172
2173int
2174em_allocate_intr(struct adapter *adapter)
2175{
2176	device_t dev = adapter->dev;
2177	int error;
2178
2179	/* Manually turn off all interrupts */
2180	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2181
2182#ifdef DEVICE_POLLING
2183	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2184	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2185	    &adapter->int_handler_tag)) != 0) {
2186		device_printf(dev, "Failed to register interrupt handler");
2187		return (error);
2188	}
2189#else
2190	/*
2191	 * Try allocating a fast interrupt and the associated deferred
2192	 * processing contexts.
2193	 */
2194	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2195	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2196	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2197	    taskqueue_thread_enqueue, &adapter->tq);
2198	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2199	    device_get_nameunit(adapter->dev));
2200	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2201	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2202	    &adapter->int_handler_tag)) != 0) {
2203		device_printf(dev, "Failed to register fast interrupt "
2204			    "handler: %d\n", error);
2205		taskqueue_free(adapter->tq);
2206		adapter->tq = NULL;
2207		return (error);
2208	}
2209#endif
2210
2211	em_enable_intr(adapter);
2212	return (0);
2213}
2214
2215static void
2216em_free_intr(struct adapter *adapter)
2217{
2218	device_t dev = adapter->dev;
2219
2220	if (adapter->int_handler_tag != NULL) {
2221		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2222		adapter->int_handler_tag = NULL;
2223	}
2224	if (adapter->tq != NULL) {
2225		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2226		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2227		taskqueue_free(adapter->tq);
2228		adapter->tq = NULL;
2229	}
2230}
2231
2232static void
2233em_free_pci_resources(struct adapter *adapter)
2234{
2235	device_t dev = adapter->dev;
2236
2237	if (adapter->res_interrupt != NULL)
2238		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2239
2240	if (adapter->res_memory != NULL)
2241		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2242		    adapter->res_memory);
2243
2244	if (adapter->flash_mem != NULL)
2245		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2246		    adapter->flash_mem);
2247
2248	if (adapter->res_ioport != NULL)
2249		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2250		    adapter->res_ioport);
2251}
2252
2253/*********************************************************************
2254 *
2255 *  Initialize the hardware to a configuration as specified by the
2256 *  adapter structure. The controller is reset, the EEPROM is
2257 *  verified, the MAC address is set, then the shared initialization
2258 *  routines are called.
2259 *
2260 **********************************************************************/
2261static int
2262em_hardware_init(struct adapter *adapter)
2263{
2264	device_t dev = adapter->dev;
2265	uint16_t rx_buffer_size;
2266
2267	INIT_DEBUGOUT("em_hardware_init: begin");
2268	/* Issue a global reset */
2269	em_reset_hw(&adapter->hw);
2270
2271	/* When hardware is reset, fifo_head is also reset */
2272	adapter->tx_fifo_head = 0;
2273
2274	/* Make sure we have a good EEPROM before we read from it */
2275	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2276		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2277		return (EIO);
2278	}
2279
2280	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2281		device_printf(dev, "EEPROM read error while reading part "
2282		    "number\n");
2283		return (EIO);
2284	}
2285
2286	/* Set up smart power down as default off on newer adapters. */
2287	if (!em_smart_pwr_down &&
2288	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2289		uint16_t phy_tmp = 0;
2290
2291		/* Speed up time to link by disabling smart power down. */
2292		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2293		phy_tmp &= ~IGP02E1000_PM_SPD;
2294		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2295	}
2296
2297	/*
2298	 * These parameters control the automatic generation (Tx) and
2299	 * response (Rx) to Ethernet PAUSE frames.
2300	 * - High water mark should allow for at least two frames to be
2301	 *   received after sending an XOFF.
2302	 * - Low water mark works best when it is very near the high water mark.
2303	 *   This allows the receiver to restart by sending XON when it has
2304	 *   drained a bit. Here we use an arbitary value of 1500 which will
2305	 *   restart after one full frame is pulled from the buffer. There
2306	 *   could be several smaller frames in the buffer and if so they will
2307	 *   not trigger the XON until their total number reduces the buffer
2308	 *   by 1500.
2309	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2310	 */
2311	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2312
2313	adapter->hw.fc_high_water = rx_buffer_size -
2314	    roundup2(adapter->hw.max_frame_size, 1024);
2315	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2316	if (adapter->hw.mac_type == em_80003es2lan)
2317		adapter->hw.fc_pause_time = 0xFFFF;
2318	else
2319		adapter->hw.fc_pause_time = 0x1000;
2320	adapter->hw.fc_send_xon = TRUE;
2321	adapter->hw.fc = em_fc_full;
2322
2323	if (em_init_hw(&adapter->hw) < 0) {
2324		device_printf(dev, "Hardware Initialization Failed");
2325		return (EIO);
2326	}
2327
2328	em_check_for_link(&adapter->hw);
2329
2330	return (0);
2331}
2332
2333/*********************************************************************
2334 *
2335 *  Setup networking device structure and register an interface.
2336 *
2337 **********************************************************************/
2338static void
2339em_setup_interface(device_t dev, struct adapter *adapter)
2340{
2341	struct ifnet   *ifp;
2342	INIT_DEBUGOUT("em_setup_interface: begin");
2343
2344	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2345	if (ifp == NULL)
2346		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2347	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2348	ifp->if_mtu = ETHERMTU;
2349	ifp->if_init =  em_init;
2350	ifp->if_softc = adapter;
2351	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2352	ifp->if_ioctl = em_ioctl;
2353	ifp->if_start = em_start;
2354	ifp->if_watchdog = em_watchdog;
2355	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2356	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2357	IFQ_SET_READY(&ifp->if_snd);
2358
2359	ether_ifattach(ifp, adapter->hw.mac_addr);
2360
2361	ifp->if_capabilities = ifp->if_capenable = 0;
2362
2363	if (adapter->hw.mac_type >= em_82543) {
2364		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2365		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2366	}
2367
2368	/* Enable TSO if available */
2369	if ((adapter->hw.mac_type > em_82544) &&
2370	    (adapter->hw.mac_type != em_82547)) {
2371		ifp->if_capabilities |= IFCAP_TSO4;
2372		ifp->if_capenable |= IFCAP_TSO4;
2373	}
2374
2375	/*
2376	 * Tell the upper layer(s) we support long frames.
2377	 */
2378	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2379	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2380	ifp->if_capenable |= IFCAP_VLAN_MTU;
2381
2382#ifdef DEVICE_POLLING
2383	ifp->if_capabilities |= IFCAP_POLLING;
2384#endif
2385
2386	/*
2387	 * Specify the media types supported by this adapter and register
2388	 * callbacks to update media and link information
2389	 */
2390	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2391	    em_media_status);
2392	if ((adapter->hw.media_type == em_media_type_fiber) ||
2393	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2394		u_char fiber_type = IFM_1000_SX;	/* default type; */
2395
2396		if (adapter->hw.mac_type == em_82545)
2397			fiber_type = IFM_1000_LX;
2398		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2399		    0, NULL);
2400		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2401	} else {
2402		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2403		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2404			    0, NULL);
2405		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2406			    0, NULL);
2407		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2408			    0, NULL);
2409		if (adapter->hw.phy_type != em_phy_ife) {
2410			ifmedia_add(&adapter->media,
2411				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2412			ifmedia_add(&adapter->media,
2413				IFM_ETHER | IFM_1000_T, 0, NULL);
2414		}
2415	}
2416	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2417	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2418}
2419
2420
2421/*********************************************************************
2422 *
2423 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2424 *
2425 **********************************************************************/
2426static void
2427em_smartspeed(struct adapter *adapter)
2428{
2429	uint16_t phy_tmp;
2430
2431	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2432	    adapter->hw.autoneg == 0 ||
2433	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2434		return;
2435
2436	if (adapter->smartspeed == 0) {
2437		/* If Master/Slave config fault is asserted twice,
2438		 * we assume back-to-back */
2439		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2440		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2441			return;
2442		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2443		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2444			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2445			if(phy_tmp & CR_1000T_MS_ENABLE) {
2446				phy_tmp &= ~CR_1000T_MS_ENABLE;
2447				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2448				    phy_tmp);
2449				adapter->smartspeed++;
2450				if(adapter->hw.autoneg &&
2451				   !em_phy_setup_autoneg(&adapter->hw) &&
2452				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2453				    &phy_tmp)) {
2454					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2455						    MII_CR_RESTART_AUTO_NEG);
2456					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2457					    phy_tmp);
2458				}
2459			}
2460		}
2461		return;
2462	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2463		/* If still no link, perhaps using 2/3 pair cable */
2464		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2465		phy_tmp |= CR_1000T_MS_ENABLE;
2466		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2467		if(adapter->hw.autoneg &&
2468		   !em_phy_setup_autoneg(&adapter->hw) &&
2469		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2470			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2471				    MII_CR_RESTART_AUTO_NEG);
2472			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2473		}
2474	}
2475	/* Restart process after EM_SMARTSPEED_MAX iterations */
2476	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2477		adapter->smartspeed = 0;
2478}
2479
2480
2481/*
2482 * Manage DMA'able memory.
2483 */
2484static void
2485em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2486{
2487	if (error)
2488		return;
2489	*(bus_addr_t *) arg = segs[0].ds_addr;
2490}
2491
2492static int
2493em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2494	int mapflags)
2495{
2496	int error;
2497
2498	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2499				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2500				BUS_SPACE_MAXADDR,	/* lowaddr */
2501				BUS_SPACE_MAXADDR,	/* highaddr */
2502				NULL, NULL,		/* filter, filterarg */
2503				size,			/* maxsize */
2504				1,			/* nsegments */
2505				size,			/* maxsegsize */
2506				0,			/* flags */
2507				NULL,			/* lockfunc */
2508				NULL,			/* lockarg */
2509				&dma->dma_tag);
2510	if (error) {
2511		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2512		    __func__, error);
2513		goto fail_0;
2514	}
2515
2516	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2517	    BUS_DMA_NOWAIT, &dma->dma_map);
2518	if (error) {
2519		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2520		    __func__, (uintmax_t)size, error);
2521		goto fail_2;
2522	}
2523
2524	dma->dma_paddr = 0;
2525	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2526	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2527	if (error || dma->dma_paddr == 0) {
2528		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2529		    __func__, error);
2530		goto fail_3;
2531	}
2532
2533	return (0);
2534
2535fail_3:
2536	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2537fail_2:
2538	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2539	bus_dma_tag_destroy(dma->dma_tag);
2540fail_0:
2541	dma->dma_map = NULL;
2542	dma->dma_tag = NULL;
2543
2544	return (error);
2545}
2546
2547static void
2548em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2549{
2550	if (dma->dma_tag == NULL)
2551		return;
2552	if (dma->dma_map != NULL) {
2553		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2554		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2555		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2556		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2557		dma->dma_map = NULL;
2558	}
2559	bus_dma_tag_destroy(dma->dma_tag);
2560	dma->dma_tag = NULL;
2561}
2562
2563
2564/*********************************************************************
2565 *
2566 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2567 *  the information needed to transmit a packet on the wire.
2568 *
2569 **********************************************************************/
2570static int
2571em_allocate_transmit_structures(struct adapter *adapter)
2572{
2573	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2574	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2575	if (adapter->tx_buffer_area == NULL) {
2576		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2577		return (ENOMEM);
2578	}
2579
2580	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2581
2582	return (0);
2583}
2584
2585/*********************************************************************
2586 *
2587 *  Allocate and initialize transmit structures.
2588 *
2589 **********************************************************************/
2590static int
2591em_setup_transmit_structures(struct adapter *adapter)
2592{
2593	struct ifnet   *ifp = adapter->ifp;
2594	device_t dev = adapter->dev;
2595	struct em_buffer *tx_buffer;
2596	bus_size_t size, segsize;
2597	int error, i;
2598
2599	/*
2600	 * Setup DMA descriptor areas.
2601	 */
2602	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2603
2604	/* Overrides for TSO - want large sizes */
2605	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2606		size = EM_TSO_SIZE;
2607		segsize = PAGE_SIZE;
2608	}
2609
2610	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2611				1, 0,			/* alignment, bounds */
2612				BUS_SPACE_MAXADDR,	/* lowaddr */
2613				BUS_SPACE_MAXADDR,	/* highaddr */
2614				NULL, NULL,		/* filter, filterarg */
2615				size,			/* maxsize */
2616				EM_MAX_SCATTER,		/* nsegments */
2617				segsize,		/* maxsegsize */
2618				0,			/* flags */
2619				NULL,		/* lockfunc */
2620				NULL,		/* lockarg */
2621				&adapter->txtag)) != 0) {
2622		device_printf(dev, "Unable to allocate TX DMA tag\n");
2623		goto fail;
2624	}
2625
2626	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2627		goto fail;
2628
2629	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2630	tx_buffer = adapter->tx_buffer_area;
2631	for (i = 0; i < adapter->num_tx_desc; i++) {
2632		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2633		if (error != 0) {
2634			device_printf(dev, "Unable to create TX DMA map\n");
2635			goto fail;
2636		}
2637		tx_buffer++;
2638	}
2639
2640	adapter->next_avail_tx_desc = 0;
2641	adapter->oldest_used_tx_desc = 0;
2642
2643	/* Set number of descriptors available */
2644	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2645
2646	/* Set checksum context */
2647	adapter->active_checksum_context = OFFLOAD_NONE;
2648	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2649	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2650
2651	return (0);
2652
2653fail:
2654	em_free_transmit_structures(adapter);
2655	return (error);
2656}
2657
2658/*********************************************************************
2659 *
2660 *  Enable transmit unit.
2661 *
2662 **********************************************************************/
2663static void
2664em_initialize_transmit_unit(struct adapter *adapter)
2665{
2666	uint32_t	reg_tctl, reg_tarc;
2667	uint32_t	reg_tipg = 0;
2668	uint64_t	bus_addr;
2669
2670	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2671	/* Setup the Base and Length of the Tx Descriptor Ring */
2672	bus_addr = adapter->txdma.dma_paddr;
2673	E1000_WRITE_REG(&adapter->hw, TDLEN,
2674	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2675	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2676	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2677
2678	/* Setup the HW Tx Head and Tail descriptor pointers */
2679	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2680	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2681
2682
2683	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2684	    E1000_READ_REG(&adapter->hw, TDLEN));
2685
2686	/* Set the default values for the Tx Inter Packet Gap timer */
2687	switch (adapter->hw.mac_type) {
2688	case em_82542_rev2_0:
2689	case em_82542_rev2_1:
2690		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2691		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2692		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2693		break;
2694	case em_80003es2lan:
2695		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2696		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2697		    E1000_TIPG_IPGR2_SHIFT;
2698		break;
2699	default:
2700		if ((adapter->hw.media_type == em_media_type_fiber) ||
2701		    (adapter->hw.media_type == em_media_type_internal_serdes))
2702			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2703		else
2704			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2705		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2706		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2707	}
2708
2709	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2710	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2711	if(adapter->hw.mac_type >= em_82540)
2712		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2713
2714	/* Do adapter specific tweaks before we enable the transmitter. */
2715	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2716		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2717		reg_tarc |= (1 << 25);
2718		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2719		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2720		reg_tarc |= (1 << 25);
2721		reg_tarc &= ~(1 << 28);
2722		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2723	} else if (adapter->hw.mac_type == em_80003es2lan) {
2724		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2725		reg_tarc |= 1;
2726		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2727		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2728		reg_tarc |= 1;
2729		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2730	}
2731
2732	/* Program the Transmit Control Register */
2733	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2734		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2735	if (adapter->hw.mac_type >= em_82571)
2736		reg_tctl |= E1000_TCTL_MULR;
2737	if (adapter->link_duplex == FULL_DUPLEX) {
2738		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2739	} else {
2740		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2741	}
2742	/* This write will effectively turn on the transmit unit. */
2743	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2744
2745	/* Setup Transmit Descriptor Settings for this adapter */
2746	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2747
2748	if (adapter->tx_int_delay.value > 0)
2749		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2750}
2751
2752/*********************************************************************
2753 *
2754 *  Free all transmit related data structures.
2755 *
2756 **********************************************************************/
2757static void
2758em_free_transmit_structures(struct adapter *adapter)
2759{
2760	struct em_buffer *tx_buffer;
2761	int i;
2762
2763	INIT_DEBUGOUT("free_transmit_structures: begin");
2764
2765	if (adapter->tx_buffer_area != NULL) {
2766		tx_buffer = adapter->tx_buffer_area;
2767		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2768			if (tx_buffer->m_head != NULL) {
2769				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2770				    BUS_DMASYNC_POSTWRITE);
2771				bus_dmamap_unload(adapter->txtag,
2772				    tx_buffer->map);
2773				m_freem(tx_buffer->m_head);
2774				tx_buffer->m_head = NULL;
2775			} else if (tx_buffer->map != NULL)
2776				bus_dmamap_unload(adapter->txtag,
2777				    tx_buffer->map);
2778			if (tx_buffer->map != NULL) {
2779				bus_dmamap_destroy(adapter->txtag,
2780				    tx_buffer->map);
2781				tx_buffer->map = NULL;
2782			}
2783		}
2784	}
2785	if (adapter->tx_buffer_area != NULL) {
2786		free(adapter->tx_buffer_area, M_DEVBUF);
2787		adapter->tx_buffer_area = NULL;
2788	}
2789	if (adapter->txtag != NULL) {
2790		bus_dma_tag_destroy(adapter->txtag);
2791		adapter->txtag = NULL;
2792	}
2793}
2794
2795/*********************************************************************
2796 *
2797 *  The offload context needs to be set when we transfer the first
2798 *  packet of a particular protocol (TCP/UDP). We change the
2799 *  context only if the protocol type changes.
2800 *
2801 **********************************************************************/
2802static void
2803em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2804    uint32_t *txd_upper, uint32_t *txd_lower)
2805{
2806	struct em_context_desc *TXD;
2807	struct em_buffer *tx_buffer;
2808	struct ether_vlan_header *eh;
2809	struct ip *ip;
2810	struct ip6_hdr *ip6;
2811	struct tcp_hdr *th;
2812	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2813	uint32_t cmd = 0;
2814	uint16_t etype;
2815	uint8_t ipproto;
2816
2817	/* Setup checksum offload context. */
2818	curr_txd = adapter->next_avail_tx_desc;
2819	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2820	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2821
2822	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2823		     E1000_TXD_DTYP_D;		/* Data descr */
2824
2825	/*
2826	 * Determine where frame payload starts.
2827	 * Jump over vlan headers if already present,
2828	 * helpful for QinQ too.
2829	 */
2830	eh = mtod(mp, struct ether_vlan_header *);
2831	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2832		etype = ntohs(eh->evl_proto);
2833		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2834	} else {
2835		etype = ntohs(eh->evl_encap_proto);
2836		ehdrlen = ETHER_HDR_LEN;
2837	}
2838
2839	/*
2840	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2841	 * TODO: Support SCTP too when it hits the tree.
2842	 */
2843	switch (etype) {
2844	case ETHERTYPE_IP:
2845		ip = (struct ip *)(mp->m_data + ehdrlen);
2846		ip_hlen = ip->ip_hl << 2;
2847
2848		/* Setup of IP header checksum. */
2849		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2850			/*
2851			 * Start offset for header checksum calculation.
2852			 * End offset for header checksum calculation.
2853			 * Offset of place to put the checksum.
2854			 */
2855			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2856			TXD->lower_setup.ip_fields.ipcse =
2857			    htole16(ehdrlen + ip_hlen);
2858			TXD->lower_setup.ip_fields.ipcso =
2859			    ehdrlen + offsetof(struct ip, ip_sum);
2860			cmd |= E1000_TXD_CMD_IP;
2861			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2862		}
2863
2864		if (mp->m_len < ehdrlen + ip_hlen)
2865			return;	/* failure */
2866
2867		hdr_len = ehdrlen + ip_hlen;
2868		ipproto = ip->ip_p;
2869
2870		break;
2871	case ETHERTYPE_IPV6:
2872		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2873		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2874
2875		if (mp->m_len < ehdrlen + ip_hlen)
2876			return;	/* failure */
2877
2878		/* IPv6 doesn't have a header checksum. */
2879
2880		hdr_len = ehdrlen + ip_hlen;
2881		ipproto = ip6->ip6_nxt;
2882
2883		break;
2884	default:
2885		*txd_upper = 0;
2886		*txd_lower = 0;
2887		return;
2888	}
2889
2890	switch (ipproto) {
2891	case IPPROTO_TCP:
2892		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2893			/*
2894			 * Start offset for payload checksum calculation.
2895			 * End offset for payload checksum calculation.
2896			 * Offset of place to put the checksum.
2897			 */
2898			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2899			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2900			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2901			TXD->upper_setup.tcp_fields.tucso =
2902			    hdr_len + offsetof(struct tcphdr, th_sum);
2903			cmd |= E1000_TXD_CMD_TCP;
2904			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2905		}
2906		break;
2907	case IPPROTO_UDP:
2908		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2909			/*
2910			 * Start offset for header checksum calculation.
2911			 * End offset for header checksum calculation.
2912			 * Offset of place to put the checksum.
2913			 */
2914			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2915			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2916			TXD->upper_setup.tcp_fields.tucso =
2917			    hdr_len + offsetof(struct udphdr, uh_sum);
2918			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2919		}
2920		break;
2921	default:
2922		break;
2923	}
2924
2925	TXD->tcp_seg_setup.data = htole32(0);
2926	TXD->cmd_and_length =
2927	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2928	tx_buffer->m_head = NULL;
2929
2930	if (++curr_txd == adapter->num_tx_desc)
2931		curr_txd = 0;
2932
2933	adapter->num_tx_desc_avail--;
2934	adapter->next_avail_tx_desc = curr_txd;
2935}
2936
2937/**********************************************************************
2938 *
2939 *  Setup work for hardware segmentation offload (TSO)
2940 *
2941 **********************************************************************/
2942static boolean_t
2943em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2944   uint32_t *txd_lower)
2945{
2946	struct em_context_desc *TXD;
2947	struct em_buffer *tx_buffer;
2948	struct ether_vlan_header *eh;
2949	struct ip *ip;
2950	struct ip6_hdr *ip6;
2951	struct tcphdr *th;
2952	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2953	uint16_t etype;
2954
2955	/*
2956	 * XXX: This is not really correct as the stack would not have
2957	 * set up all checksums.
2958	 * XXX: Return FALSE is not sufficient as we may have to return
2959	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2960	 * and 1 (success).
2961	 */
2962	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2963		return FALSE;	/* 0 */
2964
2965	/*
2966	 * This function could/should be extended to support IP/IPv6
2967	 * fragmentation as well.  But as they say, one step at a time.
2968	 */
2969
2970	/*
2971	 * Determine where frame payload starts.
2972	 * Jump over vlan headers if already present,
2973	 * helpful for QinQ too.
2974	 */
2975	eh = mtod(mp, struct ether_vlan_header *);
2976	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2977		etype = ntohs(eh->evl_proto);
2978		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2979	} else {
2980		etype = ntohs(eh->evl_encap_proto);
2981		ehdrlen = ETHER_HDR_LEN;
2982	}
2983
2984	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2985	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2986		return FALSE;	/* -1 */
2987
2988	/*
2989	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
2990	 * TODO: Support SCTP too when it hits the tree.
2991	 */
2992	switch (etype) {
2993	case ETHERTYPE_IP:
2994		isip6 = 0;
2995		ip = (struct ip *)(mp->m_data + ehdrlen);
2996		if (ip->ip_p != IPPROTO_TCP)
2997			return FALSE;	/* 0 */
2998		ip->ip_len = 0;
2999		ip->ip_sum = 0;
3000		ip_hlen = ip->ip_hl << 2;
3001		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3002			return FALSE;	/* -1 */
3003		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3004#if 1
3005		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3006		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3007#else
3008		th->th_sum = mp->m_pkthdr.csum_data;
3009#endif
3010		break;
3011	case ETHERTYPE_IPV6:
3012		isip6 = 1;
3013		return FALSE;			/* Not supported yet. */
3014		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3015		if (ip6->ip6_nxt != IPPROTO_TCP)
3016			return FALSE;	/* 0 */
3017		ip6->ip6_plen = 0;
3018		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3019		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3020			return FALSE;	/* -1 */
3021		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3022#if 0
3023		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3024		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3025#else
3026		th->th_sum = mp->m_pkthdr.csum_data;
3027#endif
3028		break;
3029	default:
3030		return FALSE;
3031	}
3032	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3033
3034	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3035		      E1000_TXD_DTYP_D |	/* Data descr type */
3036		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3037
3038	/* IP and/or TCP header checksum calculation and insertion. */
3039	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3040		      E1000_TXD_POPTS_TXSM) << 8;
3041
3042	curr_txd = adapter->next_avail_tx_desc;
3043	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3044	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3045
3046	/* IPv6 doesn't have a header checksum. */
3047	if (!isip6) {
3048		/*
3049		 * Start offset for header checksum calculation.
3050		 * End offset for header checksum calculation.
3051		 * Offset of place put the checksum.
3052		 */
3053		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3054		TXD->lower_setup.ip_fields.ipcse =
3055		    htole16(ehdrlen + ip_hlen - 1);
3056		TXD->lower_setup.ip_fields.ipcso =
3057		    ehdrlen + offsetof(struct ip, ip_sum);
3058	}
3059	/*
3060	 * Start offset for payload checksum calculation.
3061	 * End offset for payload checksum calculation.
3062	 * Offset of place to put the checksum.
3063	 */
3064	TXD->upper_setup.tcp_fields.tucss =
3065	    ehdrlen + ip_hlen;
3066	TXD->upper_setup.tcp_fields.tucse = 0;
3067	TXD->upper_setup.tcp_fields.tucso =
3068	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3069	/*
3070	 * Payload size per packet w/o any headers.
3071	 * Length of all headers up to payload.
3072	 */
3073	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3074	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3075
3076	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3077				E1000_TXD_CMD_DEXT |	/* Extended descr */
3078				E1000_TXD_CMD_TSE |	/* TSE context */
3079				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3080				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3081				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3082
3083	tx_buffer->m_head = NULL;
3084
3085	if (++curr_txd == adapter->num_tx_desc)
3086		curr_txd = 0;
3087
3088	adapter->num_tx_desc_avail--;
3089	adapter->next_avail_tx_desc = curr_txd;
3090	adapter->tx_tso = TRUE;
3091
3092	return TRUE;
3093}
3094
3095/**********************************************************************
3096 *
3097 *  Examine each tx_buffer in the used queue. If the hardware is done
3098 *  processing the packet then free associated resources. The
3099 *  tx_buffer is put back on the free queue.
3100 *
3101 **********************************************************************/
3102static void
3103em_txeof(struct adapter *adapter)
3104{
3105	int i, num_avail;
3106	struct em_buffer *tx_buffer;
3107	struct em_tx_desc   *tx_desc;
3108	struct ifnet   *ifp = adapter->ifp;
3109
3110	EM_LOCK_ASSERT(adapter);
3111
3112	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3113		return;
3114
3115	num_avail = adapter->num_tx_desc_avail;
3116	i = adapter->oldest_used_tx_desc;
3117
3118	tx_buffer = &adapter->tx_buffer_area[i];
3119	tx_desc = &adapter->tx_desc_base[i];
3120
3121	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3122	    BUS_DMASYNC_POSTREAD);
3123	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3124
3125		tx_desc->upper.data = 0;
3126		num_avail++;
3127
3128		if (tx_buffer->m_head) {
3129			ifp->if_opackets++;
3130			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3131			    BUS_DMASYNC_POSTWRITE);
3132			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
3133
3134			m_freem(tx_buffer->m_head);
3135			tx_buffer->m_head = NULL;
3136		}
3137
3138		if (++i == adapter->num_tx_desc)
3139			i = 0;
3140
3141		tx_buffer = &adapter->tx_buffer_area[i];
3142		tx_desc = &adapter->tx_desc_base[i];
3143	}
3144	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3145	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3146
3147	adapter->oldest_used_tx_desc = i;
3148
3149	/*
3150	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3151	 * that it is OK to send packets.
3152	 * If there are no pending descriptors, clear the timeout. Otherwise,
3153	 * if some descriptors have been freed, restart the timeout.
3154	 */
3155	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3156		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3157		if (num_avail == adapter->num_tx_desc)
3158			ifp->if_timer = 0;
3159		else if (num_avail != adapter->num_tx_desc_avail)
3160			ifp->if_timer = EM_TX_TIMEOUT;
3161	}
3162	adapter->num_tx_desc_avail = num_avail;
3163}
3164
3165/*********************************************************************
3166 *
3167 *  Get a buffer from system mbuf buffer pool.
3168 *
3169 **********************************************************************/
3170static int
3171em_get_buf(struct adapter *adapter, int i)
3172{
3173	struct mbuf		*m;
3174	bus_dma_segment_t	segs[1];
3175	bus_dmamap_t		map;
3176	struct em_buffer	*rx_buffer;
3177	int			error, nsegs;
3178
3179	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3180	if (m == NULL) {
3181		adapter->mbuf_cluster_failed++;
3182		return (ENOBUFS);
3183	}
3184	m->m_len = m->m_pkthdr.len = MCLBYTES;
3185	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3186		m_adj(m, ETHER_ALIGN);
3187
3188	/*
3189	 * Using memory from the mbuf cluster pool, invoke the
3190	 * bus_dma machinery to arrange the memory mapping.
3191	 */
3192	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3193	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3194	if (error != 0) {
3195		m_free(m);
3196		return (error);
3197	}
3198	/* If nsegs is wrong then the stack is corrupt. */
3199	KASSERT(nsegs == 1, ("Too many segments returned!"));
3200
3201	rx_buffer = &adapter->rx_buffer_area[i];
3202	if (rx_buffer->m_head != NULL)
3203		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3204
3205	map = rx_buffer->map;
3206	rx_buffer->map = adapter->rx_sparemap;
3207	adapter->rx_sparemap = map;
3208	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3209	rx_buffer->m_head = m;
3210
3211	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3212
3213	return (0);
3214}
3215
3216/*********************************************************************
3217 *
3218 *  Allocate memory for rx_buffer structures. Since we use one
3219 *  rx_buffer per received packet, the maximum number of rx_buffer's
3220 *  that we'll need is equal to the number of receive descriptors
3221 *  that we've allocated.
3222 *
3223 **********************************************************************/
3224static int
3225em_allocate_receive_structures(struct adapter *adapter)
3226{
3227	device_t dev = adapter->dev;
3228	struct em_buffer *rx_buffer;
3229	int i, error;
3230
3231	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3232	    M_DEVBUF, M_NOWAIT);
3233	if (adapter->rx_buffer_area == NULL) {
3234		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3235		return (ENOMEM);
3236	}
3237
3238	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3239
3240	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3241				1, 0,			/* alignment, bounds */
3242				BUS_SPACE_MAXADDR,	/* lowaddr */
3243				BUS_SPACE_MAXADDR,	/* highaddr */
3244				NULL, NULL,		/* filter, filterarg */
3245				MCLBYTES,		/* maxsize */
3246				1,			/* nsegments */
3247				MCLBYTES,		/* maxsegsize */
3248				0,			/* flags */
3249				NULL,			/* lockfunc */
3250				NULL,			/* lockarg */
3251				&adapter->rxtag);
3252	if (error) {
3253		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3254		    __func__, error);
3255		goto fail;
3256	}
3257
3258	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3259	    &adapter->rx_sparemap);
3260	if (error) {
3261		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3262		    __func__, error);
3263		goto fail;
3264	}
3265	rx_buffer = adapter->rx_buffer_area;
3266	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3267		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3268		    &rx_buffer->map);
3269		if (error) {
3270			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3271			    __func__, error);
3272			goto fail;
3273		}
3274	}
3275
3276	for (i = 0; i < adapter->num_rx_desc; i++) {
3277		error = em_get_buf(adapter, i);
3278		if (error)
3279			goto fail;
3280	}
3281	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3282	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3283
3284	return (0);
3285
3286fail:
3287	em_free_receive_structures(adapter);
3288	return (error);
3289}
3290
3291/*********************************************************************
3292 *
3293 *  Allocate and initialize receive structures.
3294 *
3295 **********************************************************************/
3296static int
3297em_setup_receive_structures(struct adapter *adapter)
3298{
3299	int error;
3300
3301	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3302
3303	if ((error = em_allocate_receive_structures(adapter)) != 0)
3304		return (error);
3305
3306	/* Setup our descriptor pointers */
3307	adapter->next_rx_desc_to_check = 0;
3308
3309	return (0);
3310}
3311
3312/*********************************************************************
3313 *
3314 *  Enable receive unit.
3315 *
3316 **********************************************************************/
3317static void
3318em_initialize_receive_unit(struct adapter *adapter)
3319{
3320	struct ifnet	*ifp = adapter->ifp;
3321	uint64_t	bus_addr;
3322	uint32_t	reg_rctl;
3323	uint32_t	reg_rxcsum;
3324
3325	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3326
3327	/*
3328	 * Make sure receives are disabled while setting
3329	 * up the descriptor ring
3330	 */
3331	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3332
3333	/* Set the Receive Delay Timer Register */
3334	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3335
3336	if(adapter->hw.mac_type >= em_82540) {
3337		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3338
3339		/*
3340		 * Set the interrupt throttling rate. Value is calculated
3341		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3342		 */
3343#define MAX_INTS_PER_SEC	8000
3344#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3345		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3346	}
3347
3348	/* Setup the Base and Length of the Rx Descriptor Ring */
3349	bus_addr = adapter->rxdma.dma_paddr;
3350	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3351			sizeof(struct em_rx_desc));
3352	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3353	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3354
3355	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3356	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3357	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3358
3359	/* Setup the Receive Control Register */
3360	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3361		   E1000_RCTL_RDMTS_HALF |
3362		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3363
3364	if (adapter->hw.tbi_compatibility_on == TRUE)
3365		reg_rctl |= E1000_RCTL_SBP;
3366
3367
3368	switch (adapter->rx_buffer_len) {
3369	default:
3370	case EM_RXBUFFER_2048:
3371		reg_rctl |= E1000_RCTL_SZ_2048;
3372		break;
3373	case EM_RXBUFFER_4096:
3374		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3375		break;
3376	case EM_RXBUFFER_8192:
3377		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3378		break;
3379	case EM_RXBUFFER_16384:
3380		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3381		break;
3382	}
3383
3384	if (ifp->if_mtu > ETHERMTU)
3385		reg_rctl |= E1000_RCTL_LPE;
3386
3387	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3388	if ((adapter->hw.mac_type >= em_82543) &&
3389	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3390		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3391		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3392		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3393	}
3394
3395	/* Enable Receives */
3396	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3397}
3398
3399/*********************************************************************
3400 *
3401 *  Free receive related data structures.
3402 *
3403 **********************************************************************/
3404static void
3405em_free_receive_structures(struct adapter *adapter)
3406{
3407	struct em_buffer *rx_buffer;
3408	int i;
3409
3410	INIT_DEBUGOUT("free_receive_structures: begin");
3411
3412	if (adapter->rx_sparemap) {
3413		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3414		adapter->rx_sparemap = NULL;
3415	}
3416	if (adapter->rx_buffer_area != NULL) {
3417		rx_buffer = adapter->rx_buffer_area;
3418		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3419			if (rx_buffer->m_head != NULL) {
3420				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3421				    BUS_DMASYNC_POSTREAD);
3422				bus_dmamap_unload(adapter->rxtag,
3423				    rx_buffer->map);
3424				m_freem(rx_buffer->m_head);
3425				rx_buffer->m_head = NULL;
3426			} else if (rx_buffer->map != NULL)
3427				bus_dmamap_unload(adapter->rxtag,
3428				    rx_buffer->map);
3429			if (rx_buffer->map != NULL) {
3430				bus_dmamap_destroy(adapter->rxtag,
3431				    rx_buffer->map);
3432				rx_buffer->map = NULL;
3433			}
3434		}
3435	}
3436	if (adapter->rx_buffer_area != NULL) {
3437		free(adapter->rx_buffer_area, M_DEVBUF);
3438		adapter->rx_buffer_area = NULL;
3439	}
3440	if (adapter->rxtag != NULL) {
3441		bus_dma_tag_destroy(adapter->rxtag);
3442		adapter->rxtag = NULL;
3443	}
3444}
3445
3446/*********************************************************************
3447 *
3448 *  This routine executes in interrupt context. It replenishes
3449 *  the mbufs in the descriptor and sends data which has been
3450 *  dma'ed into host memory to upper layer.
3451 *
3452 *  We loop at most count times if count is > 0, or until done if
3453 *  count < 0.
3454 *
3455 *********************************************************************/
3456static int
3457em_rxeof(struct adapter *adapter, int count)
3458{
3459	struct ifnet	*ifp;
3460	struct mbuf	*mp;
3461	uint8_t		accept_frame = 0;
3462	uint8_t		eop = 0;
3463	uint16_t 	len, desc_len, prev_len_adj;
3464	int		i;
3465
3466	/* Pointer to the receive descriptor being examined. */
3467	struct em_rx_desc   *current_desc;
3468	uint8_t		status;
3469
3470	ifp = adapter->ifp;
3471	i = adapter->next_rx_desc_to_check;
3472	current_desc = &adapter->rx_desc_base[i];
3473	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3474	    BUS_DMASYNC_POSTREAD);
3475
3476	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3477		return (0);
3478
3479	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3480	    (count != 0) &&
3481	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3482		struct mbuf *m = NULL;
3483
3484		mp = adapter->rx_buffer_area[i].m_head;
3485		/*
3486		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3487		 * needs to access the last received byte in the mbuf.
3488		 */
3489		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3490		    BUS_DMASYNC_POSTREAD);
3491
3492		accept_frame = 1;
3493		prev_len_adj = 0;
3494		desc_len = le16toh(current_desc->length);
3495		status = current_desc->status;
3496		if (status & E1000_RXD_STAT_EOP) {
3497			count--;
3498			eop = 1;
3499			if (desc_len < ETHER_CRC_LEN) {
3500				len = 0;
3501				prev_len_adj = ETHER_CRC_LEN - desc_len;
3502			} else
3503				len = desc_len - ETHER_CRC_LEN;
3504		} else {
3505			eop = 0;
3506			len = desc_len;
3507		}
3508
3509		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3510			uint8_t		last_byte;
3511			uint32_t	pkt_len = desc_len;
3512
3513			if (adapter->fmp != NULL)
3514				pkt_len += adapter->fmp->m_pkthdr.len;
3515
3516			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3517			if (TBI_ACCEPT(&adapter->hw, status,
3518			    current_desc->errors, pkt_len, last_byte)) {
3519				em_tbi_adjust_stats(&adapter->hw,
3520				    &adapter->stats, pkt_len,
3521				    adapter->hw.mac_addr);
3522				if (len > 0)
3523					len--;
3524			} else
3525				accept_frame = 0;
3526		}
3527
3528		if (accept_frame) {
3529			if (em_get_buf(adapter, i) != 0) {
3530				ifp->if_iqdrops++;
3531				goto discard;
3532			}
3533
3534			/* Assign correct length to the current fragment */
3535			mp->m_len = len;
3536
3537			if (adapter->fmp == NULL) {
3538				mp->m_pkthdr.len = len;
3539				adapter->fmp = mp; /* Store the first mbuf */
3540				adapter->lmp = mp;
3541			} else {
3542				/* Chain mbuf's together */
3543				mp->m_flags &= ~M_PKTHDR;
3544				/*
3545				 * Adjust length of previous mbuf in chain if
3546				 * we received less than 4 bytes in the last
3547				 * descriptor.
3548				 */
3549				if (prev_len_adj > 0) {
3550					adapter->lmp->m_len -= prev_len_adj;
3551					adapter->fmp->m_pkthdr.len -=
3552					    prev_len_adj;
3553				}
3554				adapter->lmp->m_next = mp;
3555				adapter->lmp = adapter->lmp->m_next;
3556				adapter->fmp->m_pkthdr.len += len;
3557			}
3558
3559			if (eop) {
3560				adapter->fmp->m_pkthdr.rcvif = ifp;
3561				ifp->if_ipackets++;
3562				em_receive_checksum(adapter, current_desc,
3563				    adapter->fmp);
3564#ifndef __NO_STRICT_ALIGNMENT
3565				if (adapter->hw.max_frame_size >
3566				    (MCLBYTES - ETHER_ALIGN) &&
3567				    em_fixup_rx(adapter) != 0)
3568					goto skip;
3569#endif
3570				if (status & E1000_RXD_STAT_VP) {
3571					adapter->fmp->m_pkthdr.ether_vtag =
3572					    (le16toh(current_desc->special) &
3573					    E1000_RXD_SPC_VLAN_MASK);
3574					adapter->fmp->m_flags |= M_VLANTAG;
3575				}
3576#ifndef __NO_STRICT_ALIGNMENT
3577skip:
3578#endif
3579				m = adapter->fmp;
3580				adapter->fmp = NULL;
3581				adapter->lmp = NULL;
3582			}
3583		} else {
3584			ifp->if_ierrors++;
3585discard:
3586			/* Reuse loaded DMA map and just update mbuf chain */
3587			mp = adapter->rx_buffer_area[i].m_head;
3588			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3589			mp->m_data = mp->m_ext.ext_buf;
3590			mp->m_next = NULL;
3591			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3592				m_adj(mp, ETHER_ALIGN);
3593			if (adapter->fmp != NULL) {
3594				m_freem(adapter->fmp);
3595				adapter->fmp = NULL;
3596				adapter->lmp = NULL;
3597			}
3598			m = NULL;
3599		}
3600
3601		/* Zero out the receive descriptors status. */
3602		current_desc->status = 0;
3603		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3604		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3605
3606		/* Advance our pointers to the next descriptor. */
3607		if (++i == adapter->num_rx_desc)
3608			i = 0;
3609		if (m != NULL) {
3610			adapter->next_rx_desc_to_check = i;
3611#ifdef DEVICE_POLLING
3612			EM_UNLOCK(adapter);
3613			(*ifp->if_input)(ifp, m);
3614			EM_LOCK(adapter);
3615#else
3616			(*ifp->if_input)(ifp, m);
3617#endif
3618			i = adapter->next_rx_desc_to_check;
3619		}
3620		current_desc = &adapter->rx_desc_base[i];
3621	}
3622	adapter->next_rx_desc_to_check = i;
3623
3624	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3625	if (--i < 0)
3626		i = adapter->num_rx_desc - 1;
3627	E1000_WRITE_REG(&adapter->hw, RDT, i);
3628	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3629		return (0);
3630
3631	return (1);
3632}
3633
3634#ifndef __NO_STRICT_ALIGNMENT
3635/*
3636 * When jumbo frames are enabled we should realign entire payload on
3637 * architecures with strict alignment. This is serious design mistake of 8254x
3638 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3639 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3640 * payload. On architecures without strict alignment restrictions 8254x still
3641 * performs unaligned memory access which would reduce the performance too.
3642 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3643 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3644 * existing mbuf chain.
3645 *
3646 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3647 * not used at all on architectures with strict alignment.
3648 */
3649static int
3650em_fixup_rx(struct adapter *adapter)
3651{
3652	struct mbuf *m, *n;
3653	int error;
3654
3655	error = 0;
3656	m = adapter->fmp;
3657	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3658		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3659		m->m_data += ETHER_HDR_LEN;
3660	} else {
3661		MGETHDR(n, M_DONTWAIT, MT_DATA);
3662		if (n != NULL) {
3663			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3664			m->m_data += ETHER_HDR_LEN;
3665			m->m_len -= ETHER_HDR_LEN;
3666			n->m_len = ETHER_HDR_LEN;
3667			M_MOVE_PKTHDR(n, m);
3668			n->m_next = m;
3669			adapter->fmp = n;
3670		} else {
3671			adapter->ifp->if_iqdrops++;
3672			adapter->mbuf_alloc_failed++;
3673			m_freem(adapter->fmp);
3674			adapter->fmp = NULL;
3675			adapter->lmp = NULL;
3676			error = ENOBUFS;
3677		}
3678	}
3679
3680	return (error);
3681}
3682#endif
3683
3684/*********************************************************************
3685 *
3686 *  Verify that the hardware indicated that the checksum is valid.
3687 *  Inform the stack about the status of checksum so that stack
3688 *  doesn't spend time verifying the checksum.
3689 *
3690 *********************************************************************/
3691static void
3692em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3693		    struct mbuf *mp)
3694{
3695	/* 82543 or newer only */
3696	if ((adapter->hw.mac_type < em_82543) ||
3697	    /* Ignore Checksum bit is set */
3698	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3699		mp->m_pkthdr.csum_flags = 0;
3700		return;
3701	}
3702
3703	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3704		/* Did it pass? */
3705		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3706			/* IP Checksum Good */
3707			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3708			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3709
3710		} else {
3711			mp->m_pkthdr.csum_flags = 0;
3712		}
3713	}
3714
3715	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3716		/* Did it pass? */
3717		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3718			mp->m_pkthdr.csum_flags |=
3719			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3720			mp->m_pkthdr.csum_data = htons(0xffff);
3721		}
3722	}
3723}
3724
3725
3726static void
3727em_enable_vlans(struct adapter *adapter)
3728{
3729	uint32_t ctrl;
3730
3731	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3732
3733	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3734	ctrl |= E1000_CTRL_VME;
3735	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3736}
3737
3738static void
3739em_disable_vlans(struct adapter *adapter)
3740{
3741	uint32_t ctrl;
3742
3743	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3744	ctrl &= ~E1000_CTRL_VME;
3745	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3746}
3747
3748static void
3749em_enable_intr(struct adapter *adapter)
3750{
3751	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3752}
3753
3754static void
3755em_disable_intr(struct adapter *adapter)
3756{
3757	/*
3758	 * The first version of 82542 had an errata where when link was forced
3759	 * it would stay up even up even if the cable was disconnected.
3760	 * Sequence errors were used to detect the disconnect and then the
3761	 * driver would unforce the link. This code in the in the ISR. For this
3762	 * to work correctly the Sequence error interrupt had to be enabled
3763	 * all the time.
3764	 */
3765
3766	if (adapter->hw.mac_type == em_82542_rev2_0)
3767	    E1000_WRITE_REG(&adapter->hw, IMC,
3768		(0xffffffff & ~E1000_IMC_RXSEQ));
3769	else
3770	    E1000_WRITE_REG(&adapter->hw, IMC,
3771		0xffffffff);
3772}
3773
3774static int
3775em_is_valid_ether_addr(uint8_t *addr)
3776{
3777	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3778
3779	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3780		return (FALSE);
3781	}
3782
3783	return (TRUE);
3784}
3785
3786void
3787em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3788{
3789	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3790}
3791
3792void
3793em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3794{
3795	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3796}
3797
3798void
3799em_pci_set_mwi(struct em_hw *hw)
3800{
3801	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3802	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3803}
3804
3805void
3806em_pci_clear_mwi(struct em_hw *hw)
3807{
3808	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3809	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3810}
3811
3812/*********************************************************************
3813* 82544 Coexistence issue workaround.
3814*    There are 2 issues.
3815*       1. Transmit Hang issue.
3816*    To detect this issue, following equation can be used...
3817*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3818*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3819*
3820*       2. DAC issue.
3821*    To detect this issue, following equation can be used...
3822*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3823*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3824*
3825*
3826*    WORKAROUND:
3827*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3828*
3829*** *********************************************************************/
3830static uint32_t
3831em_fill_descriptors (bus_addr_t address, uint32_t length,
3832		PDESC_ARRAY desc_array)
3833{
3834	/* Since issue is sensitive to length and address.*/
3835	/* Let us first check the address...*/
3836	uint32_t safe_terminator;
3837	if (length <= 4) {
3838		desc_array->descriptor[0].address = address;
3839		desc_array->descriptor[0].length = length;
3840		desc_array->elements = 1;
3841		return (desc_array->elements);
3842	}
3843	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3844	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3845	if (safe_terminator == 0   ||
3846	(safe_terminator > 4   &&
3847	safe_terminator < 9)   ||
3848	(safe_terminator > 0xC &&
3849	safe_terminator <= 0xF)) {
3850		desc_array->descriptor[0].address = address;
3851		desc_array->descriptor[0].length = length;
3852		desc_array->elements = 1;
3853		return (desc_array->elements);
3854	}
3855
3856	desc_array->descriptor[0].address = address;
3857	desc_array->descriptor[0].length = length - 4;
3858	desc_array->descriptor[1].address = address + (length - 4);
3859	desc_array->descriptor[1].length = 4;
3860	desc_array->elements = 2;
3861	return (desc_array->elements);
3862}
3863
3864/**********************************************************************
3865 *
3866 *  Update the board statistics counters.
3867 *
3868 **********************************************************************/
3869static void
3870em_update_stats_counters(struct adapter *adapter)
3871{
3872	struct ifnet   *ifp;
3873
3874	if(adapter->hw.media_type == em_media_type_copper ||
3875	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3876		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3877		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3878	}
3879	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3880	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3881	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3882	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3883
3884	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3885	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3886	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3887	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3888	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3889	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3890	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3891	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3892	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3893	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3894	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3895	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3896	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3897	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3898	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3899	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3900	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3901	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3902	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3903	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3904
3905	/* For the 64-bit byte counters the low dword must be read first. */
3906	/* Both registers clear on the read of the high dword */
3907
3908	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3909	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3910	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3911	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3912
3913	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3914	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3915	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3916	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3917	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3918
3919	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3920	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3921	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3922	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3923
3924	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3925	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3926	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3927	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3928	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3929	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3930	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3931	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3932	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3933	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3934
3935	if (adapter->hw.mac_type >= em_82543) {
3936		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3937		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3938		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3939		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3940		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3941		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3942	}
3943	ifp = adapter->ifp;
3944
3945	ifp->if_collisions = adapter->stats.colc;
3946
3947	/* Rx Errors */
3948	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3949	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3950	    adapter->stats.mpc + adapter->stats.cexterr;
3951
3952	/* Tx Errors */
3953	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3954	    adapter->watchdog_events;
3955}
3956
3957
3958/**********************************************************************
3959 *
3960 *  This routine is called only when em_display_debug_stats is enabled.
3961 *  This routine provides a way to take a look at important statistics
3962 *  maintained by the driver and hardware.
3963 *
3964 **********************************************************************/
3965static void
3966em_print_debug_info(struct adapter *adapter)
3967{
3968	device_t dev = adapter->dev;
3969	uint8_t *hw_addr = adapter->hw.hw_addr;
3970
3971	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3972	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3973	    E1000_READ_REG(&adapter->hw, CTRL),
3974	    E1000_READ_REG(&adapter->hw, RCTL));
3975	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3976	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3977	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3978	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3979	    adapter->hw.fc_high_water,
3980	    adapter->hw.fc_low_water);
3981	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3982	    E1000_READ_REG(&adapter->hw, TIDV),
3983	    E1000_READ_REG(&adapter->hw, TADV));
3984	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3985	    E1000_READ_REG(&adapter->hw, RDTR),
3986	    E1000_READ_REG(&adapter->hw, RADV));
3987	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3988	    (long long)adapter->tx_fifo_wrk_cnt,
3989	    (long long)adapter->tx_fifo_reset_cnt);
3990	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3991	    E1000_READ_REG(&adapter->hw, TDH),
3992	    E1000_READ_REG(&adapter->hw, TDT));
3993	device_printf(dev, "Num Tx descriptors avail = %d\n",
3994	    adapter->num_tx_desc_avail);
3995	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3996	    adapter->no_tx_desc_avail1);
3997	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3998	    adapter->no_tx_desc_avail2);
3999	device_printf(dev, "Std mbuf failed = %ld\n",
4000	    adapter->mbuf_alloc_failed);
4001	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4002	    adapter->mbuf_cluster_failed);
4003}
4004
4005static void
4006em_print_hw_stats(struct adapter *adapter)
4007{
4008	device_t dev = adapter->dev;
4009
4010	device_printf(dev, "Excessive collisions = %lld\n",
4011	    (long long)adapter->stats.ecol);
4012	device_printf(dev, "Symbol errors = %lld\n",
4013	    (long long)adapter->stats.symerrs);
4014	device_printf(dev, "Sequence errors = %lld\n",
4015	    (long long)adapter->stats.sec);
4016	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4017
4018	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4019	device_printf(dev, "Receive No Buffers = %lld\n",
4020	    (long long)adapter->stats.rnbc);
4021	/* RLEC is inaccurate on some hardware, calculate our own. */
4022	device_printf(dev, "Receive Length Errors = %lld\n",
4023	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4024	device_printf(dev, "Receive errors = %lld\n",
4025	    (long long)adapter->stats.rxerrc);
4026	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4027	device_printf(dev, "Alignment errors = %lld\n",
4028	    (long long)adapter->stats.algnerrc);
4029	device_printf(dev, "Carrier extension errors = %lld\n",
4030	    (long long)adapter->stats.cexterr);
4031	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4032	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4033
4034	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4035	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4036	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4037	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4038
4039	device_printf(dev, "Good Packets Rcvd = %lld\n",
4040	    (long long)adapter->stats.gprc);
4041	device_printf(dev, "Good Packets Xmtd = %lld\n",
4042	    (long long)adapter->stats.gptc);
4043	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4044	    (long long)adapter->stats.tsctc);
4045	device_printf(dev, "TSO Contexts Failed = %lld\n",
4046	    (long long)adapter->stats.tsctfc);
4047}
4048
4049static int
4050em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4051{
4052	struct adapter *adapter;
4053	int error;
4054	int result;
4055
4056	result = -1;
4057	error = sysctl_handle_int(oidp, &result, 0, req);
4058
4059	if (error || !req->newptr)
4060		return (error);
4061
4062	if (result == 1) {
4063		adapter = (struct adapter *)arg1;
4064		em_print_debug_info(adapter);
4065	}
4066
4067	return (error);
4068}
4069
4070
4071static int
4072em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4073{
4074	struct adapter *adapter;
4075	int error;
4076	int result;
4077
4078	result = -1;
4079	error = sysctl_handle_int(oidp, &result, 0, req);
4080
4081	if (error || !req->newptr)
4082		return (error);
4083
4084	if (result == 1) {
4085		adapter = (struct adapter *)arg1;
4086		em_print_hw_stats(adapter);
4087	}
4088
4089	return (error);
4090}
4091
4092static int
4093em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4094{
4095	struct em_int_delay_info *info;
4096	struct adapter *adapter;
4097	uint32_t regval;
4098	int error;
4099	int usecs;
4100	int ticks;
4101
4102	info = (struct em_int_delay_info *)arg1;
4103	usecs = info->value;
4104	error = sysctl_handle_int(oidp, &usecs, 0, req);
4105	if (error != 0 || req->newptr == NULL)
4106		return (error);
4107	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4108		return (EINVAL);
4109	info->value = usecs;
4110	ticks = E1000_USECS_TO_TICKS(usecs);
4111
4112	adapter = info->adapter;
4113
4114	EM_LOCK(adapter);
4115	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4116	regval = (regval & ~0xffff) | (ticks & 0xffff);
4117	/* Handle a few special cases. */
4118	switch (info->offset) {
4119	case E1000_RDTR:
4120	case E1000_82542_RDTR:
4121		regval |= E1000_RDT_FPDB;
4122		break;
4123	case E1000_TIDV:
4124	case E1000_82542_TIDV:
4125		if (ticks == 0) {
4126			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4127			/* Don't write 0 into the TIDV register. */
4128			regval++;
4129		} else
4130			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4131		break;
4132	}
4133	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4134	EM_UNLOCK(adapter);
4135	return (0);
4136}
4137
4138static void
4139em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4140	const char *description, struct em_int_delay_info *info,
4141	int offset, int value)
4142{
4143	info->adapter = adapter;
4144	info->offset = offset;
4145	info->value = value;
4146	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4147	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4148	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4149	    info, 0, em_sysctl_int_delay, "I", description);
4150}
4151
4152#ifndef DEVICE_POLLING
4153static void
4154em_add_int_process_limit(struct adapter *adapter, const char *name,
4155	const char *description, int *limit, int value)
4156{
4157	*limit = value;
4158	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4159	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4160	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4161}
4162#endif
4163