if_em.c revision 162186
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162186 2006-09-09 19:55:13Z pdeuskar $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57#include <machine/in_cksum.h>
58
59#include <net/bpf.h>
60#include <net/ethernet.h>
61#include <net/if.h>
62#include <net/if_arp.h>
63#include <net/if_dl.h>
64#include <net/if_media.h>
65
66#include <net/if_types.h>
67#include <net/if_vlan_var.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78#include <dev/em/if_em_hw.h>
79#include <dev/em/if_em.h>
80
81/*********************************************************************
82 *  Set this to one to display debug statistics
83 *********************************************************************/
84int	em_display_debug_stats = 0;
85
86/*********************************************************************
87 *  Driver version
88 *********************************************************************/
89
90char em_driver_version[] = "Version - 6.1.4 - TSO";
91
92
93/*********************************************************************
94 *  PCI Device ID Table
95 *
96 *  Used by probe to select devices to load on
97 *  Last field stores an index into em_strings
98 *  Last entry must be all 0s
99 *
100 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
101 *********************************************************************/
102
103static em_vendor_info_t em_vendor_info_array[] =
104{
105	/* Intel(R) PRO/1000 Network Connection */
106	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
111
112	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119
120	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
129
130	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146
147	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150
151	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
155						PCI_ANY_ID, PCI_ANY_ID, 0},
156
157	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
161
162	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
166						PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
168						PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
170						PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
176
177	/* required last entry */
178	{ 0, 0, 0, 0, 0}
179};
180
181/*********************************************************************
182 *  Table of branding strings for all supported NICs.
183 *********************************************************************/
184
185static char *em_strings[] = {
186	"Intel(R) PRO/1000 Network Connection"
187};
188
189/*********************************************************************
190 *  Function prototypes
191 *********************************************************************/
192static int	em_probe(device_t);
193static int	em_attach(device_t);
194static int	em_detach(device_t);
195static int	em_shutdown(device_t);
196static int	em_suspend(device_t);
197static int	em_resume(device_t);
198static void	em_start(struct ifnet *);
199static void	em_start_locked(struct ifnet *ifp);
200static int	em_ioctl(struct ifnet *, u_long, caddr_t);
201static void	em_watchdog(struct ifnet *);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_intr(struct adapter *);
210static void	em_free_intr(struct adapter *);
211static void	em_free_pci_resources(struct adapter *);
212static void	em_local_timer(void *);
213static int	em_hardware_init(struct adapter *);
214static void	em_setup_interface(device_t, struct adapter *);
215static int	em_setup_transmit_structures(struct adapter *);
216static void	em_initialize_transmit_unit(struct adapter *);
217static int	em_setup_receive_structures(struct adapter *);
218static void	em_initialize_receive_unit(struct adapter *);
219static void	em_enable_intr(struct adapter *);
220static void	em_disable_intr(struct adapter *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_receive_structures(struct adapter *);
223static void	em_update_stats_counters(struct adapter *);
224static void	em_txeof(struct adapter *);
225static int	em_allocate_receive_structures(struct adapter *);
226static int	em_allocate_transmit_structures(struct adapter *);
227static int	em_rxeof(struct adapter *, int);
228#ifndef __NO_STRICT_ALIGNMENT
229static int	em_fixup_rx(struct adapter *);
230#endif
231static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
232		    struct mbuf *);
233static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
234		    uint32_t *, uint32_t *);
235static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
236		    uint32_t *, uint32_t *);
237static void	em_set_promisc(struct adapter *);
238static void	em_disable_promisc(struct adapter *);
239static void	em_set_multi(struct adapter *);
240static void	em_print_hw_stats(struct adapter *);
241static void	em_update_link_status(struct adapter *);
242static int	em_get_buf(struct adapter *, int);
243static void	em_enable_vlans(struct adapter *);
244static void	em_disable_vlans(struct adapter *);
245static int	em_encap(struct adapter *, struct mbuf **);
246static void	em_smartspeed(struct adapter *);
247static int	em_82547_fifo_workaround(struct adapter *, int);
248static void	em_82547_update_fifo_head(struct adapter *, int);
249static int	em_82547_tx_fifo_reset(struct adapter *);
250static void	em_82547_move_tail(void *arg);
251static void	em_82547_move_tail_locked(struct adapter *);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static int 	em_is_valid_ether_addr(uint8_t *);
257static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
258static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
259static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
260		    PDESC_ARRAY desc_array);
261static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
262static void	em_add_int_delay_sysctl(struct adapter *, const char *,
263		const char *, struct em_int_delay_info *, int, int);
264
265/*
266 * Fast interrupt handler and legacy ithread/polling modes are
267 * mutually exclusive.
268 */
269#ifdef DEVICE_POLLING
270static poll_handler_t em_poll;
271static void	em_intr(void *);
272#else
273static void	em_intr_fast(void *);
274static void	em_add_int_process_limit(struct adapter *, const char *,
275		const char *, int *, int);
276static void	em_handle_rxtx(void *context, int pending);
277static void	em_handle_link(void *context, int pending);
278#endif
279
280/*********************************************************************
281 *  FreeBSD Device Interface Entry Points
282 *********************************************************************/
283
284static device_method_t em_methods[] = {
285	/* Device interface */
286	DEVMETHOD(device_probe, em_probe),
287	DEVMETHOD(device_attach, em_attach),
288	DEVMETHOD(device_detach, em_detach),
289	DEVMETHOD(device_shutdown, em_shutdown),
290	DEVMETHOD(device_suspend, em_suspend),
291	DEVMETHOD(device_resume, em_resume),
292	{0, 0}
293};
294
295static driver_t em_driver = {
296	"em", em_methods, sizeof(struct adapter),
297};
298
299static devclass_t em_devclass;
300DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
301MODULE_DEPEND(em, pci, 1, 1, 1);
302MODULE_DEPEND(em, ether, 1, 1, 1);
303
304/*********************************************************************
305 *  Tunable default values.
306 *********************************************************************/
307
308#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
309#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
310#define M_TSO_LEN			66
311
312static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
313static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
314static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
315static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
316static int em_rxd = EM_DEFAULT_RXD;
317static int em_txd = EM_DEFAULT_TXD;
318static int em_smart_pwr_down = FALSE;
319
320TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
322TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rxd", &em_rxd);
325TUNABLE_INT("hw.em.txd", &em_txd);
326TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
327#ifndef DEVICE_POLLING
328static int em_rx_process_limit = 100;
329TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
330#endif
331
332/*********************************************************************
333 *  Device identification routine
334 *
335 *  em_probe determines if the driver should be loaded on
336 *  adapter based on PCI vendor/device id of the adapter.
337 *
338 *  return BUS_PROBE_DEFAULT on success, positive on failure
339 *********************************************************************/
340
341static int
342em_probe(device_t dev)
343{
344	char		adapter_name[60];
345	uint16_t	pci_vendor_id = 0;
346	uint16_t	pci_device_id = 0;
347	uint16_t	pci_subvendor_id = 0;
348	uint16_t	pci_subdevice_id = 0;
349	em_vendor_info_t *ent;
350
351	INIT_DEBUGOUT("em_probe: begin");
352
353	pci_vendor_id = pci_get_vendor(dev);
354	if (pci_vendor_id != EM_VENDOR_ID)
355		return (ENXIO);
356
357	pci_device_id = pci_get_device(dev);
358	pci_subvendor_id = pci_get_subvendor(dev);
359	pci_subdevice_id = pci_get_subdevice(dev);
360
361	ent = em_vendor_info_array;
362	while (ent->vendor_id != 0) {
363		if ((pci_vendor_id == ent->vendor_id) &&
364		    (pci_device_id == ent->device_id) &&
365
366		    ((pci_subvendor_id == ent->subvendor_id) ||
367		    (ent->subvendor_id == PCI_ANY_ID)) &&
368
369		    ((pci_subdevice_id == ent->subdevice_id) ||
370		    (ent->subdevice_id == PCI_ANY_ID))) {
371			sprintf(adapter_name, "%s %s",
372				em_strings[ent->index],
373				em_driver_version);
374			device_set_desc_copy(dev, adapter_name);
375			return (BUS_PROBE_DEFAULT);
376		}
377		ent++;
378	}
379
380	return (ENXIO);
381}
382
383/*********************************************************************
384 *  Device initialization routine
385 *
386 *  The attach entry point is called when the driver is being loaded.
387 *  This routine identifies the type of hardware, allocates all resources
388 *  and initializes the hardware.
389 *
390 *  return 0 on success, positive on failure
391 *********************************************************************/
392
393static int
394em_attach(device_t dev)
395{
396	struct adapter	*adapter;
397	int		tsize, rsize;
398	int		error = 0;
399
400	INIT_DEBUGOUT("em_attach: begin");
401
402	adapter = device_get_softc(dev);
403	adapter->dev = adapter->osdep.dev = dev;
404	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
405
406	/* SYSCTL stuff */
407	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
409	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
410	    em_sysctl_debug_info, "I", "Debug Information");
411
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_stats, "I", "Statistics");
416
417	callout_init(&adapter->timer, CALLOUT_MPSAFE);
418	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
419
420	/* Determine hardware revision */
421	em_identify_hardware(adapter);
422
423	/* Set up some sysctls for the tunable interrupt delays */
424	em_add_int_delay_sysctl(adapter, "rx_int_delay",
425	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
426	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
427	em_add_int_delay_sysctl(adapter, "tx_int_delay",
428	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
429	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
430	if (adapter->hw.mac_type >= em_82540) {
431		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
432		    "receive interrupt delay limit in usecs",
433		    &adapter->rx_abs_int_delay,
434		    E1000_REG_OFFSET(&adapter->hw, RADV),
435		    em_rx_abs_int_delay_dflt);
436		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
437		    "transmit interrupt delay limit in usecs",
438		    &adapter->tx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, TADV),
440		    em_tx_abs_int_delay_dflt);
441	}
442
443#ifndef DEVICE_POLLING
444	/* Sysctls for limiting the amount of work done in the taskqueue */
445	em_add_int_process_limit(adapter, "rx_processing_limit",
446	    "max number of rx packets to process", &adapter->rx_process_limit,
447	    em_rx_process_limit);
448#endif
449
450	/*
451	 * Validate number of transmit and receive descriptors. It
452	 * must not exceed hardware maximum, and must be multiple
453	 * of EM_DBA_ALIGN.
454	 */
455	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
456	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
457	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
458	    (em_txd < EM_MIN_TXD)) {
459		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
460		    EM_DEFAULT_TXD, em_txd);
461		adapter->num_tx_desc = EM_DEFAULT_TXD;
462	} else
463		adapter->num_tx_desc = em_txd;
464	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
465	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
466	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
467	    (em_rxd < EM_MIN_RXD)) {
468		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
469		    EM_DEFAULT_RXD, em_rxd);
470		adapter->num_rx_desc = EM_DEFAULT_RXD;
471	} else
472		adapter->num_rx_desc = em_rxd;
473
474	adapter->hw.autoneg = DO_AUTO_NEG;
475	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
476	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
477	adapter->hw.tbi_compatibility_en = TRUE;
478	adapter->rx_buffer_len = EM_RXBUFFER_2048;
479
480	adapter->hw.phy_init_script = 1;
481	adapter->hw.phy_reset_disable = FALSE;
482
483#ifndef EM_MASTER_SLAVE
484	adapter->hw.master_slave = em_ms_hw_default;
485#else
486	adapter->hw.master_slave = EM_MASTER_SLAVE;
487#endif
488	/*
489	 * Set the max frame size assuming standard ethernet
490	 * sized frames.
491	 */
492	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
493
494	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
495
496	/*
497	 * This controls when hardware reports transmit completion
498	 * status.
499	 */
500	adapter->hw.report_tx_early = 1;
501	if (em_allocate_pci_resources(adapter)) {
502		device_printf(dev, "Allocation of PCI resources failed\n");
503		error = ENXIO;
504		goto err_pci;
505	}
506
507	/* Initialize eeprom parameters */
508	em_init_eeprom_params(&adapter->hw);
509
510	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
511	    EM_DBA_ALIGN);
512
513	/* Allocate Transmit Descriptor ring */
514	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
515		device_printf(dev, "Unable to allocate tx_desc memory\n");
516		error = ENOMEM;
517		goto err_tx_desc;
518	}
519	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
520
521	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
522	    EM_DBA_ALIGN);
523
524	/* Allocate Receive Descriptor ring */
525	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
526		device_printf(dev, "Unable to allocate rx_desc memory\n");
527		error = ENOMEM;
528		goto err_rx_desc;
529	}
530	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
531
532	/* Initialize the hardware */
533	if (em_hardware_init(adapter)) {
534		device_printf(dev, "Unable to initialize the hardware\n");
535		error = EIO;
536		goto err_hw_init;
537	}
538
539	/* Copy the permanent MAC address out of the EEPROM */
540	if (em_read_mac_addr(&adapter->hw) < 0) {
541		device_printf(dev, "EEPROM read error while reading MAC"
542		    " address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
548		device_printf(dev, "Invalid MAC address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	/* Setup OS specific network interface */
554	em_setup_interface(dev, adapter);
555
556	em_allocate_intr(adapter);
557
558	/* Initialize statistics */
559	em_clear_hw_cntrs(&adapter->hw);
560	em_update_stats_counters(adapter);
561	adapter->hw.get_link_status = 1;
562	em_update_link_status(adapter);
563
564	/* Indicate SOL/IDER usage */
565	if (em_check_phy_reset_block(&adapter->hw))
566		device_printf(dev,
567		    "PHY reset is blocked due to SOL/IDER session.\n");
568
569	/* Identify 82544 on PCIX */
570	em_get_bus_info(&adapter->hw);
571	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
572		adapter->pcix_82544 = TRUE;
573	else
574		adapter->pcix_82544 = FALSE;
575
576	INIT_DEBUGOUT("em_attach: end");
577
578	return (0);
579
580err_hw_init:
581	em_dma_free(adapter, &adapter->rxdma);
582err_rx_desc:
583	em_dma_free(adapter, &adapter->txdma);
584err_tx_desc:
585err_pci:
586	em_free_intr(adapter);
587	em_free_pci_resources(adapter);
588	EM_LOCK_DESTROY(adapter);
589
590	return (error);
591}
592
593/*********************************************************************
594 *  Device removal routine
595 *
596 *  The detach entry point is called when the driver is being removed.
597 *  This routine stops the adapter and deallocates all the resources
598 *  that were allocated for driver operation.
599 *
600 *  return 0 on success, positive on failure
601 *********************************************************************/
602
603static int
604em_detach(device_t dev)
605{
606	struct adapter	*adapter = device_get_softc(dev);
607	struct ifnet	*ifp = adapter->ifp;
608
609	INIT_DEBUGOUT("em_detach: begin");
610
611#ifdef DEVICE_POLLING
612	if (ifp->if_capenable & IFCAP_POLLING)
613		ether_poll_deregister(ifp);
614#endif
615
616	em_free_intr(adapter);
617	EM_LOCK(adapter);
618	adapter->in_detach = 1;
619	em_stop(adapter);
620	em_phy_hw_reset(&adapter->hw);
621	EM_UNLOCK(adapter);
622	ether_ifdetach(adapter->ifp);
623
624	em_free_pci_resources(adapter);
625	bus_generic_detach(dev);
626	if_free(ifp);
627
628	/* Free Transmit Descriptor ring */
629	if (adapter->tx_desc_base) {
630		em_dma_free(adapter, &adapter->txdma);
631		adapter->tx_desc_base = NULL;
632	}
633
634	/* Free Receive Descriptor ring */
635	if (adapter->rx_desc_base) {
636		em_dma_free(adapter, &adapter->rxdma);
637		adapter->rx_desc_base = NULL;
638	}
639
640	EM_LOCK_DESTROY(adapter);
641
642	return (0);
643}
644
645/*********************************************************************
646 *
647 *  Shutdown entry point
648 *
649 **********************************************************************/
650
651static int
652em_shutdown(device_t dev)
653{
654	struct adapter *adapter = device_get_softc(dev);
655	EM_LOCK(adapter);
656	em_stop(adapter);
657	EM_UNLOCK(adapter);
658	return (0);
659}
660
661/*
662 * Suspend/resume device methods.
663 */
664static int
665em_suspend(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668
669	EM_LOCK(adapter);
670	em_stop(adapter);
671	EM_UNLOCK(adapter);
672
673	return bus_generic_suspend(dev);
674}
675
676static int
677em_resume(device_t dev)
678{
679	struct adapter *adapter = device_get_softc(dev);
680	struct ifnet *ifp = adapter->ifp;
681
682	EM_LOCK(adapter);
683	em_init_locked(adapter);
684	if ((ifp->if_flags & IFF_UP) &&
685	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
686		em_start_locked(ifp);
687	EM_UNLOCK(adapter);
688
689	return bus_generic_resume(dev);
690}
691
692
693/*********************************************************************
694 *  Transmit entry point
695 *
696 *  em_start is called by the stack to initiate a transmit.
697 *  The driver will remain in this routine as long as there are
698 *  packets to transmit and transmit resources are available.
699 *  In case resources are not available stack is notified and
700 *  the packet is requeued.
701 **********************************************************************/
702
703static void
704em_start_locked(struct ifnet *ifp)
705{
706	struct adapter	*adapter = ifp->if_softc;
707	struct mbuf	*m_head;
708
709	EM_LOCK_ASSERT(adapter);
710
711	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
712	    IFF_DRV_RUNNING)
713		return;
714	if (!adapter->link_active)
715		return;
716
717	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
718
719		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
720		if (m_head == NULL)
721			break;
722		/*
723		 * em_encap() can modify our pointer, and or make it NULL on
724		 * failure.  In that event, we can't requeue.
725		 */
726		if (em_encap(adapter, &m_head)) {
727			if (m_head == NULL)
728				break;
729			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
730			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
731			break;
732		}
733
734		/* Send a copy of the frame to the BPF listener */
735		BPF_MTAP(ifp, m_head);
736
737		/* Set timeout in case hardware has problems transmitting. */
738		ifp->if_timer = EM_TX_TIMEOUT;
739	}
740}
741
742static void
743em_start(struct ifnet *ifp)
744{
745	struct adapter *adapter = ifp->if_softc;
746
747	EM_LOCK(adapter);
748	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749		em_start_locked(ifp);
750	EM_UNLOCK(adapter);
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  em_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775	case SIOCGIFADDR:
776		if (ifa->ifa_addr->sa_family == AF_INET) {
777			/*
778			 * XXX
779			 * Since resetting hardware takes a very long time
780			 * and results in link renegotiation we only
781			 * initialize the hardware only when it is absolutely
782			 * required.
783			 */
784			ifp->if_flags |= IFF_UP;
785			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
786				EM_LOCK(adapter);
787				em_init_locked(adapter);
788				EM_UNLOCK(adapter);
789			}
790			arp_ifinit(ifp, ifa);
791		} else
792			error = ether_ioctl(ifp, command, data);
793		break;
794	case SIOCSIFMTU:
795	    {
796		int max_frame_size;
797		uint16_t eeprom_data = 0;
798
799		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
800
801		EM_LOCK(adapter);
802		switch (adapter->hw.mac_type) {
803		case em_82573:
804			/*
805			 * 82573 only supports jumbo frames
806			 * if ASPM is disabled.
807			 */
808			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
809			    &eeprom_data);
810			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
811				max_frame_size = ETHER_MAX_LEN;
812				break;
813			}
814			/* Allow Jumbo frames - fall thru */
815		case em_82571:
816		case em_82572:
817		case em_80003es2lan:	/* Limit Jumbo Frame size */
818			max_frame_size = 9234;
819			break;
820		case em_ich8lan:
821			/* ICH8 does not support jumbo frames */
822			max_frame_size = ETHER_MAX_LEN;
823			break;
824		default:
825			max_frame_size = MAX_JUMBO_FRAME_SIZE;
826		}
827		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
828		    ETHER_CRC_LEN) {
829			EM_UNLOCK(adapter);
830			error = EINVAL;
831			break;
832		}
833
834		ifp->if_mtu = ifr->ifr_mtu;
835		adapter->hw.max_frame_size =
836		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
837		em_init_locked(adapter);
838		EM_UNLOCK(adapter);
839		break;
840	    }
841	case SIOCSIFFLAGS:
842		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
843		EM_LOCK(adapter);
844		if (ifp->if_flags & IFF_UP) {
845			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
846				if ((ifp->if_flags ^ adapter->if_flags) &
847				    IFF_PROMISC) {
848					em_disable_promisc(adapter);
849					em_set_promisc(adapter);
850				}
851			} else
852				em_init_locked(adapter);
853		} else {
854			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
855				em_stop(adapter);
856			}
857		}
858		adapter->if_flags = ifp->if_flags;
859		EM_UNLOCK(adapter);
860		break;
861	case SIOCADDMULTI:
862	case SIOCDELMULTI:
863		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
864		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
865			EM_LOCK(adapter);
866			em_disable_intr(adapter);
867			em_set_multi(adapter);
868			if (adapter->hw.mac_type == em_82542_rev2_0) {
869				em_initialize_receive_unit(adapter);
870			}
871#ifdef DEVICE_POLLING
872			if (!(ifp->if_capenable & IFCAP_POLLING))
873#endif
874				em_enable_intr(adapter);
875			EM_UNLOCK(adapter);
876		}
877		break;
878	case SIOCSIFMEDIA:
879	case SIOCGIFMEDIA:
880		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
881		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
882		break;
883	case SIOCSIFCAP:
884	    {
885		int mask, reinit;
886
887		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
888		reinit = 0;
889		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
890#ifdef DEVICE_POLLING
891		if (mask & IFCAP_POLLING) {
892			if (ifr->ifr_reqcap & IFCAP_POLLING) {
893				error = ether_poll_register(em_poll, ifp);
894				if (error)
895					return (error);
896				EM_LOCK(adapter);
897				em_disable_intr(adapter);
898				ifp->if_capenable |= IFCAP_POLLING;
899				EM_UNLOCK(adapter);
900			} else {
901				error = ether_poll_deregister(ifp);
902				/* Enable interrupt even in error case */
903				EM_LOCK(adapter);
904				em_enable_intr(adapter);
905				ifp->if_capenable &= ~IFCAP_POLLING;
906				EM_UNLOCK(adapter);
907			}
908		}
909#endif
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO) {
915			ifp->if_capenable ^= IFCAP_TSO;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
923			em_init(adapter);
924		VLAN_CAPABILITIES(ifp);
925		break;
926	    }
927	default:
928		error = ether_ioctl(ifp, command, data);
929		break;
930	}
931
932	return (error);
933}
934
935/*********************************************************************
936 *  Watchdog entry point
937 *
938 *  This routine is called whenever hardware quits transmitting.
939 *
940 **********************************************************************/
941
942static void
943em_watchdog(struct ifnet *ifp)
944{
945	struct adapter *adapter = ifp->if_softc;
946
947	EM_LOCK(adapter);
948	/* If we are in this routine because of pause frames, then
949	 * don't reset the hardware.
950	 */
951	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
952		ifp->if_timer = EM_TX_TIMEOUT;
953		EM_UNLOCK(adapter);
954		return;
955	}
956
957	/*
958	 * Reclaim first as there is a possibility of losing Tx completion
959	 * interrupts. Possible cause of missing Tx completion interrupts
960	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
961	 * or chipset bug.
962	 */
963	em_txeof(adapter);
964	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
965		EM_UNLOCK(adapter);
966		return;
967	}
968
969	if (em_check_for_link(&adapter->hw) == 0)
970		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
971
972	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973	adapter->watchdog_events++;
974
975	em_init_locked(adapter);
976	EM_UNLOCK(adapter);
977}
978
979/*********************************************************************
980 *  Init entry point
981 *
982 *  This routine is used in two ways. It is used by the stack as
983 *  init entry point in network interface structure. It is also used
984 *  by the driver as a hw/sw initialization routine to get to a
985 *  consistent state.
986 *
987 *  return 0 on success, positive on failure
988 **********************************************************************/
989
990static void
991em_init_locked(struct adapter *adapter)
992{
993	struct ifnet	*ifp = adapter->ifp;
994	device_t	dev = adapter->dev;
995	uint32_t	pba;
996
997	INIT_DEBUGOUT("em_init: begin");
998
999	EM_LOCK_ASSERT(adapter);
1000
1001	em_stop(adapter);
1002
1003	/*
1004	 * Packet Buffer Allocation (PBA)
1005	 * Writing PBA sets the receive portion of the buffer
1006	 * the remainder is used for the transmit buffer.
1007	 *
1008	 * Devices before the 82547 had a Packet Buffer of 64K.
1009	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1010	 * After the 82547 the buffer was reduced to 40K.
1011	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1012	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1013	 */
1014	switch (adapter->hw.mac_type) {
1015	case em_82547:
1016	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1017		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1019		else
1020			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1021		adapter->tx_fifo_head = 0;
1022		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1023		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1024		break;
1025	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1026	case em_82571: /* 82571: Total Packet Buffer is 48K */
1027	case em_82572: /* 82572: Total Packet Buffer is 48K */
1028			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		break;
1030	case em_82573: /* 82573: Total Packet Buffer is 32K */
1031		/* Jumbo frames not supported */
1032			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1033		break;
1034	case em_ich8lan:
1035		pba = E1000_PBA_8K;
1036		break;
1037	default:
1038		/* Devices before 82547 had a Packet Buffer of 64K.   */
1039		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1040			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1041		else
1042			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1043	}
1044
1045	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1046	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1047
1048	/* Get the latest mac address, User can use a LAA */
1049	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1050
1051	/* Initialize the hardware */
1052	if (em_hardware_init(adapter)) {
1053		device_printf(dev, "Unable to initialize the hardware\n");
1054		return;
1055	}
1056	em_update_link_status(adapter);
1057
1058	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1059		em_enable_vlans(adapter);
1060
1061	/* Prepare transmit descriptors and buffers */
1062	if (em_setup_transmit_structures(adapter)) {
1063		device_printf(dev, "Could not setup transmit structures\n");
1064		em_stop(adapter);
1065		return;
1066	}
1067	em_initialize_transmit_unit(adapter);
1068
1069	/* Setup Multicast table */
1070	em_set_multi(adapter);
1071
1072	/* Prepare receive descriptors and buffers */
1073	if (em_setup_receive_structures(adapter)) {
1074		device_printf(dev, "Could not setup receive structures\n");
1075		em_stop(adapter);
1076		return;
1077	}
1078	em_initialize_receive_unit(adapter);
1079
1080	/* Don't lose promiscuous settings */
1081	em_set_promisc(adapter);
1082
1083	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1084	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1085
1086	ifp->if_hwassist = 0;
1087	if (adapter->hw.mac_type >= em_82543) {
1088		if (ifp->if_capenable & IFCAP_TXCSUM)
1089			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1090		if (ifp->if_capenable & IFCAP_TSO)
1091			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1092	}
1093
1094	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1095	em_clear_hw_cntrs(&adapter->hw);
1096#ifdef DEVICE_POLLING
1097	/*
1098	 * Only enable interrupts if we are not polling, make sure
1099	 * they are off otherwise.
1100	 */
1101	if (ifp->if_capenable & IFCAP_POLLING)
1102		em_disable_intr(adapter);
1103	else
1104#endif /* DEVICE_POLLING */
1105		em_enable_intr(adapter);
1106
1107	/* Don't reset the phy next time init gets called */
1108	adapter->hw.phy_reset_disable = TRUE;
1109}
1110
1111static void
1112em_init(void *arg)
1113{
1114	struct adapter *adapter = arg;
1115
1116	EM_LOCK(adapter);
1117	em_init_locked(adapter);
1118	EM_UNLOCK(adapter);
1119}
1120
1121
1122#ifdef DEVICE_POLLING
1123/*********************************************************************
1124 *
1125 *  Legacy polling routine
1126 *
1127 *********************************************************************/
1128static void
1129em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1130{
1131	struct adapter *adapter = ifp->if_softc;
1132	uint32_t reg_icr;
1133
1134	EM_LOCK(adapter);
1135	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1136		EM_UNLOCK(adapter);
1137		return;
1138	}
1139
1140	if (cmd == POLL_AND_CHECK_STATUS) {
1141		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1142		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1143			callout_stop(&adapter->timer);
1144			adapter->hw.get_link_status = 1;
1145			em_check_for_link(&adapter->hw);
1146			em_update_link_status(adapter);
1147			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1148		}
1149	}
1150	em_rxeof(adapter, count);
1151	em_txeof(adapter);
1152
1153	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1154		em_start_locked(ifp);
1155	EM_UNLOCK(adapter);
1156}
1157
1158/*********************************************************************
1159 *
1160 *  Legacy Interrupt Service routine
1161 *
1162 *********************************************************************/
1163static void
1164em_intr(void *arg)
1165{
1166	struct adapter	*adapter = arg;
1167	struct ifnet	*ifp;
1168	uint32_t	reg_icr;
1169
1170	EM_LOCK(adapter);
1171
1172	ifp = adapter->ifp;
1173
1174	if (ifp->if_capenable & IFCAP_POLLING) {
1175		EM_UNLOCK(adapter);
1176		return;
1177	}
1178
1179	for (;;) {
1180		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1181		if (adapter->hw.mac_type >= em_82571 &&
1182		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1183			break;
1184		else if (reg_icr == 0)
1185			break;
1186
1187		/*
1188		 * XXX: some laptops trigger several spurious interrupts
1189		 * on em(4) when in the resume cycle. The ICR register
1190		 * reports all-ones value in this case. Processing such
1191		 * interrupts would lead to a freeze. I don't know why.
1192		 */
1193		if (reg_icr == 0xffffffff)
1194			break;
1195
1196		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1197			em_rxeof(adapter, -1);
1198			em_txeof(adapter);
1199		}
1200
1201		/* Link status change */
1202		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1203			callout_stop(&adapter->timer);
1204			adapter->hw.get_link_status = 1;
1205			em_check_for_link(&adapter->hw);
1206			em_update_link_status(adapter);
1207			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1208		}
1209
1210		if (reg_icr & E1000_ICR_RXO)
1211			adapter->rx_overruns++;
1212	}
1213
1214	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1215	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1216		em_start_locked(ifp);
1217
1218	EM_UNLOCK(adapter);
1219}
1220
1221#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1222
1223static void
1224em_handle_link(void *context, int pending)
1225{
1226	struct adapter	*adapter = context;
1227	struct ifnet *ifp;
1228
1229	ifp = adapter->ifp;
1230
1231	EM_LOCK(adapter);
1232
1233	callout_stop(&adapter->timer);
1234	adapter->hw.get_link_status = 1;
1235	em_check_for_link(&adapter->hw);
1236	em_update_link_status(adapter);
1237	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1238	EM_UNLOCK(adapter);
1239}
1240
1241static void
1242em_handle_rxtx(void *context, int pending)
1243{
1244	struct adapter	*adapter = context;
1245	struct ifnet	*ifp;
1246
1247	NET_LOCK_GIANT();
1248	ifp = adapter->ifp;
1249
1250	/*
1251	 * TODO:
1252	 * It should be possible to run the tx clean loop without the lock.
1253	 */
1254	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1255		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1256			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1257		EM_LOCK(adapter);
1258		em_txeof(adapter);
1259
1260		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1261			em_start_locked(ifp);
1262		EM_UNLOCK(adapter);
1263	}
1264
1265	em_enable_intr(adapter);
1266	NET_UNLOCK_GIANT();
1267}
1268
1269/*********************************************************************
1270 *
1271 *  Fast Interrupt Service routine
1272 *
1273 *********************************************************************/
1274static void
1275em_intr_fast(void *arg)
1276{
1277	struct adapter	*adapter = arg;
1278	struct ifnet	*ifp;
1279	uint32_t	reg_icr;
1280
1281	ifp = adapter->ifp;
1282
1283	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1284
1285	/* Hot eject?  */
1286	if (reg_icr == 0xffffffff)
1287		return;
1288
1289	/* Definitely not our interrupt.  */
1290	if (reg_icr == 0x0)
1291		return;
1292
1293	/*
1294	 * Starting with the 82571 chip, bit 31 should be used to
1295	 * determine whether the interrupt belongs to us.
1296	 */
1297	if (adapter->hw.mac_type >= em_82571 &&
1298	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1299		return;
1300
1301	/*
1302	 * Mask interrupts until the taskqueue is finished running.  This is
1303	 * cheap, just assume that it is needed.  This also works around the
1304	 * MSI message reordering errata on certain systems.
1305	 */
1306	em_disable_intr(adapter);
1307	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1308
1309	/* Link status change */
1310	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1311		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1312
1313	if (reg_icr & E1000_ICR_RXO)
1314		adapter->rx_overruns++;
1315}
1316#endif /* ! DEVICE_POLLING */
1317
1318/*********************************************************************
1319 *
1320 *  Media Ioctl callback
1321 *
1322 *  This routine is called whenever the user queries the status of
1323 *  the interface using ifconfig.
1324 *
1325 **********************************************************************/
1326static void
1327em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1328{
1329	struct adapter *adapter = ifp->if_softc;
1330
1331	INIT_DEBUGOUT("em_media_status: begin");
1332
1333	EM_LOCK(adapter);
1334	em_check_for_link(&adapter->hw);
1335	em_update_link_status(adapter);
1336
1337	ifmr->ifm_status = IFM_AVALID;
1338	ifmr->ifm_active = IFM_ETHER;
1339
1340	if (!adapter->link_active) {
1341		EM_UNLOCK(adapter);
1342		return;
1343	}
1344
1345	ifmr->ifm_status |= IFM_ACTIVE;
1346
1347	if ((adapter->hw.media_type == em_media_type_fiber) ||
1348	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1349		if (adapter->hw.mac_type == em_82545)
1350			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1351		else
1352			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1353	} else {
1354		switch (adapter->link_speed) {
1355		case 10:
1356			ifmr->ifm_active |= IFM_10_T;
1357			break;
1358		case 100:
1359			ifmr->ifm_active |= IFM_100_TX;
1360			break;
1361		case 1000:
1362			ifmr->ifm_active |= IFM_1000_T;
1363			break;
1364		}
1365		if (adapter->link_duplex == FULL_DUPLEX)
1366			ifmr->ifm_active |= IFM_FDX;
1367		else
1368			ifmr->ifm_active |= IFM_HDX;
1369	}
1370	EM_UNLOCK(adapter);
1371}
1372
1373/*********************************************************************
1374 *
1375 *  Media Ioctl callback
1376 *
1377 *  This routine is called when the user changes speed/duplex using
1378 *  media/mediopt option with ifconfig.
1379 *
1380 **********************************************************************/
1381static int
1382em_media_change(struct ifnet *ifp)
1383{
1384	struct adapter *adapter = ifp->if_softc;
1385	struct ifmedia  *ifm = &adapter->media;
1386
1387	INIT_DEBUGOUT("em_media_change: begin");
1388
1389	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1390		return (EINVAL);
1391
1392	EM_LOCK(adapter);
1393	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1394	case IFM_AUTO:
1395		adapter->hw.autoneg = DO_AUTO_NEG;
1396		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1397		break;
1398	case IFM_1000_LX:
1399	case IFM_1000_SX:
1400	case IFM_1000_T:
1401		adapter->hw.autoneg = DO_AUTO_NEG;
1402		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1403		break;
1404	case IFM_100_TX:
1405		adapter->hw.autoneg = FALSE;
1406		adapter->hw.autoneg_advertised = 0;
1407		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1408			adapter->hw.forced_speed_duplex = em_100_full;
1409		else
1410			adapter->hw.forced_speed_duplex = em_100_half;
1411		break;
1412	case IFM_10_T:
1413		adapter->hw.autoneg = FALSE;
1414		adapter->hw.autoneg_advertised = 0;
1415		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1416			adapter->hw.forced_speed_duplex = em_10_full;
1417		else
1418			adapter->hw.forced_speed_duplex = em_10_half;
1419		break;
1420	default:
1421		device_printf(adapter->dev, "Unsupported media type\n");
1422	}
1423
1424	/* As the speed/duplex settings my have changed we need to
1425	 * reset the PHY.
1426	 */
1427	adapter->hw.phy_reset_disable = FALSE;
1428
1429	em_init_locked(adapter);
1430	EM_UNLOCK(adapter);
1431
1432	return (0);
1433}
1434
1435/*********************************************************************
1436 *
1437 *  This routine maps the mbufs to tx descriptors.
1438 *
1439 *  return 0 on success, positive on failure
1440 **********************************************************************/
1441static int
1442em_encap(struct adapter *adapter, struct mbuf **m_headp)
1443{
1444	struct ifnet		*ifp = adapter->ifp;
1445	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1446	bus_dmamap_t		map;
1447	struct em_buffer	*tx_buffer, *tx_buffer_last;
1448	struct em_tx_desc	*current_tx_desc;
1449	struct mbuf		*m_head;
1450	struct m_tag		*mtag;
1451	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1452	int			nsegs, i, j;
1453	int			error, do_tso, tso_desc = 0;
1454
1455	m_head = *m_headp;
1456	current_tx_desc = NULL;
1457	txd_upper = txd_lower = txd_used = txd_saved = 0;
1458
1459        do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1460
1461	/*
1462	 * Force a cleanup if number of TX descriptors
1463	 * available hits the threshold.
1464	 */
1465	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1466		em_txeof(adapter);
1467		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1468			adapter->no_tx_desc_avail1++;
1469			return (ENOBUFS);
1470		}
1471	}
1472
1473	/* Find out if we are in vlan mode. */
1474	mtag = VLAN_OUTPUT_TAG(ifp, m_head);
1475
1476	/*
1477	 * When operating in promiscuous mode, hardware encapsulation for
1478	 * packets is disabled.  This means we have to add the vlan
1479	 * encapsulation in the driver, since it will have come down from the
1480	 * VLAN layer with a tag instead of a VLAN header.
1481	 */
1482	if (mtag != NULL && adapter->em_insert_vlan_header) {
1483		struct ether_vlan_header *evl;
1484		struct ether_header eh;
1485
1486		m_head = m_pullup(m_head, sizeof(eh));
1487		if (m_head == NULL) {
1488			*m_headp = NULL;
1489			return (ENOBUFS);
1490		}
1491		eh = *mtod(m_head, struct ether_header *);
1492		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1493		if (m_head == NULL) {
1494			*m_headp = NULL;
1495			return (ENOBUFS);
1496		}
1497		m_head = m_pullup(m_head, sizeof(*evl));
1498		if (m_head == NULL) {
1499			*m_headp = NULL;
1500			return (ENOBUFS);
1501		}
1502		evl = mtod(m_head, struct ether_vlan_header *);
1503		bcopy(&eh, evl, sizeof(*evl));
1504		evl->evl_proto = evl->evl_encap_proto;
1505		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1506		evl->evl_tag = htons(VLAN_TAG_VALUE(mtag));
1507		m_tag_delete(m_head, mtag);
1508		mtag = NULL;
1509		*m_headp = m_head;
1510	}
1511
1512        /*
1513         * TSO workaround:
1514         *  If an mbuf is only header we need
1515         *     to pull 4 bytes of data into it.
1516         */
1517        if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1518                m_head = m_pullup(m_head, M_TSO_LEN + 4);
1519                if (m_head == NULL)
1520                        return (ENOBUFS);
1521        }
1522
1523	/*
1524	 * Map the packet for DMA.
1525	 */
1526	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1527	tx_buffer_last = tx_buffer;
1528	map = tx_buffer->map;
1529	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1530	    &nsegs, BUS_DMA_NOWAIT);
1531	if (error == EFBIG) {
1532		struct mbuf *m;
1533
1534		m = m_defrag(*m_headp, M_DONTWAIT);
1535		if (m == NULL) {
1536			/* Assume m_defrag(9) used only m_get(9). */
1537			adapter->mbuf_alloc_failed++;
1538			m_freem(*m_headp);
1539			*m_headp = NULL;
1540			return (ENOBUFS);
1541		}
1542		*m_headp = m;
1543		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1544		    segs, &nsegs, BUS_DMA_NOWAIT);
1545		if (error != 0) {
1546			adapter->no_tx_dma_setup++;
1547			m_freem(*m_headp);
1548			*m_headp = NULL;
1549			return (error);
1550		}
1551	} else if (error != 0) {
1552		adapter->no_tx_dma_setup++;
1553		return (error);
1554	}
1555	if (nsegs == 0) {
1556		m_freem(*m_headp);
1557		*m_headp = NULL;
1558		return (EIO);
1559	}
1560
1561        /*
1562         * TSO Hardware workaround, if this packet is not
1563         * TSO, and is only a single descriptor long, and
1564         * it follows a TSO burst, then we need to add a
1565         * sentinel descriptor to prevent premature writeback.
1566         */
1567        if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1568                if (nsegs == 1)
1569                        tso_desc = TRUE;
1570                adapter->tx_tso = FALSE;
1571        }
1572
1573	if (nsegs > adapter->num_tx_desc_avail - 2) {
1574		adapter->no_tx_desc_avail2++;
1575		bus_dmamap_unload(adapter->txtag, map);
1576		return (ENOBUFS);
1577	}
1578
1579        /* Do hardware assists */
1580	m_head = *m_headp;
1581        if ( ifp->if_hwassist > 0) {
1582                if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1583                        /* we need to make a final sentinel transmit desc */
1584                        tso_desc = TRUE;
1585                } else
1586                        em_transmit_checksum_setup(adapter,  m_head,
1587                            &txd_upper, &txd_lower);
1588        }
1589
1590	i = adapter->next_avail_tx_desc;
1591	if (adapter->pcix_82544)
1592		txd_saved = i;
1593
1594	for (j = 0; j < nsegs; j++) {
1595                bus_size_t seg_len;
1596                bus_addr_t seg_addr;
1597		/* If adapter is 82544 and on PCIX bus. */
1598		if(adapter->pcix_82544) {
1599			DESC_ARRAY	desc_array;
1600			uint32_t	array_elements, counter;
1601
1602			/*
1603			 * Check the Address and Length combination and
1604			 * split the data accordingly
1605			 */
1606			array_elements = em_fill_descriptors(segs[j].ds_addr,
1607			    segs[j].ds_len, &desc_array);
1608			for (counter = 0; counter < array_elements; counter++) {
1609				if (txd_used == adapter->num_tx_desc_avail) {
1610					adapter->next_avail_tx_desc = txd_saved;
1611					adapter->no_tx_desc_avail2++;
1612					bus_dmamap_unload(adapter->txtag, map);
1613					return (ENOBUFS);
1614				}
1615				tx_buffer = &adapter->tx_buffer_area[i];
1616				current_tx_desc = &adapter->tx_desc_base[i];
1617				current_tx_desc->buffer_addr = htole64(
1618					desc_array.descriptor[counter].address);
1619				current_tx_desc->lower.data = htole32(
1620					(adapter->txd_cmd | txd_lower |
1621					(uint16_t)desc_array.descriptor[counter].length));
1622				current_tx_desc->upper.data = htole32((txd_upper));
1623				if (++i == adapter->num_tx_desc)
1624					i = 0;
1625
1626				tx_buffer->m_head = NULL;
1627				txd_used++;
1628			}
1629		} else {
1630                       tx_buffer = &adapter->tx_buffer_area[i];
1631                        current_tx_desc = &adapter->tx_desc_base[i];
1632                        seg_addr = htole64(segs[j].ds_addr);
1633                        seg_len  = segs[j].ds_len;
1634                        /*
1635                        ** TSO Workaround:
1636                        ** If this is the last descriptor, we want to
1637                        ** split it so we have a small final sentinel
1638                        */
1639                        if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1640                                seg_len -= 4;
1641                                current_tx_desc->buffer_addr = seg_addr;
1642                                current_tx_desc->lower.data = htole32(
1643                                adapter->txd_cmd | txd_lower | seg_len);
1644                                current_tx_desc->upper.data =
1645                                    htole32(txd_upper);
1646                                if (++i == adapter->num_tx_desc)
1647                                        i = 0;
1648                                /* Now make the sentinel */
1649                                ++txd_used; /* using an extra txd */
1650                                current_tx_desc = &adapter->tx_desc_base[i];
1651                                tx_buffer = &adapter->tx_buffer_area[i];
1652                                current_tx_desc->buffer_addr =
1653                                    seg_addr + seg_len;
1654                                current_tx_desc->lower.data = htole32(
1655                                adapter->txd_cmd | txd_lower | 4);
1656                                current_tx_desc->upper.data =
1657                                    htole32(txd_upper);
1658                                if (++i == adapter->num_tx_desc)
1659                                        i = 0;
1660                        } else {
1661                                current_tx_desc->buffer_addr = seg_addr;
1662                                current_tx_desc->lower.data = htole32(
1663                                adapter->txd_cmd | txd_lower | seg_len);
1664                                current_tx_desc->upper.data =
1665                                    htole32(txd_upper);
1666                                if (++i == adapter->num_tx_desc)
1667                                        i = 0;
1668                        }
1669                        tx_buffer->m_head = NULL;
1670		}
1671	}
1672
1673	adapter->next_avail_tx_desc = i;
1674	if (adapter->pcix_82544)
1675		adapter->num_tx_desc_avail -= txd_used;
1676	else {
1677		adapter->num_tx_desc_avail -= nsegs;
1678                if (tso_desc) /* TSO used an extra for sentinel */
1679                        adapter->num_tx_desc_avail -= txd_used;
1680        }
1681
1682	if (mtag != NULL) {
1683		/* Set the vlan id. */
1684		current_tx_desc->upper.fields.special =
1685		    htole16(VLAN_TAG_VALUE(mtag));
1686
1687		/* Tell hardware to add tag. */
1688		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1689	}
1690
1691	tx_buffer->m_head = m_head;
1692	tx_buffer_last->map = tx_buffer->map;
1693	tx_buffer->map = map;
1694	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1695
1696	/*
1697	 * Last Descriptor of Packet needs End Of Packet (EOP).
1698	 */
1699	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1700
1701	/*
1702	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1703	 * that this frame is available to transmit.
1704	 */
1705	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1706	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1707	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1708		em_82547_move_tail_locked(adapter);
1709	else {
1710		E1000_WRITE_REG(&adapter->hw, TDT, i);
1711		if (adapter->hw.mac_type == em_82547)
1712			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1713	}
1714
1715	return (0);
1716}
1717
1718/*********************************************************************
1719 *
1720 * 82547 workaround to avoid controller hang in half-duplex environment.
1721 * The workaround is to avoid queuing a large packet that would span
1722 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1723 * in this case. We do that only when FIFO is quiescent.
1724 *
1725 **********************************************************************/
1726static void
1727em_82547_move_tail_locked(struct adapter *adapter)
1728{
1729	uint16_t hw_tdt;
1730	uint16_t sw_tdt;
1731	struct em_tx_desc *tx_desc;
1732	uint16_t length = 0;
1733	boolean_t eop = 0;
1734
1735	EM_LOCK_ASSERT(adapter);
1736
1737	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1738	sw_tdt = adapter->next_avail_tx_desc;
1739
1740	while (hw_tdt != sw_tdt) {
1741		tx_desc = &adapter->tx_desc_base[hw_tdt];
1742		length += tx_desc->lower.flags.length;
1743		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1744		if(++hw_tdt == adapter->num_tx_desc)
1745			hw_tdt = 0;
1746
1747		if (eop) {
1748			if (em_82547_fifo_workaround(adapter, length)) {
1749				adapter->tx_fifo_wrk_cnt++;
1750				callout_reset(&adapter->tx_fifo_timer, 1,
1751					em_82547_move_tail, adapter);
1752				break;
1753			}
1754			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1755			em_82547_update_fifo_head(adapter, length);
1756			length = 0;
1757		}
1758	}
1759}
1760
1761static void
1762em_82547_move_tail(void *arg)
1763{
1764	struct adapter *adapter = arg;
1765
1766	EM_LOCK(adapter);
1767	em_82547_move_tail_locked(adapter);
1768	EM_UNLOCK(adapter);
1769}
1770
1771static int
1772em_82547_fifo_workaround(struct adapter *adapter, int len)
1773{
1774	int fifo_space, fifo_pkt_len;
1775
1776	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1777
1778	if (adapter->link_duplex == HALF_DUPLEX) {
1779		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1780
1781		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1782			if (em_82547_tx_fifo_reset(adapter))
1783				return (0);
1784			else
1785				return (1);
1786		}
1787	}
1788
1789	return (0);
1790}
1791
1792static void
1793em_82547_update_fifo_head(struct adapter *adapter, int len)
1794{
1795	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1796
1797	/* tx_fifo_head is always 16 byte aligned */
1798	adapter->tx_fifo_head += fifo_pkt_len;
1799	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1800		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1801	}
1802}
1803
1804
1805static int
1806em_82547_tx_fifo_reset(struct adapter *adapter)
1807{
1808	uint32_t tctl;
1809
1810	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1811	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1812	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1813	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1814
1815		/* Disable TX unit */
1816		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1817		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1818
1819		/* Reset FIFO pointers */
1820		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1821		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1822		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1823		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1824
1825		/* Re-enable TX unit */
1826		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1827		E1000_WRITE_FLUSH(&adapter->hw);
1828
1829		adapter->tx_fifo_head = 0;
1830		adapter->tx_fifo_reset_cnt++;
1831
1832		return (TRUE);
1833	}
1834	else {
1835		return (FALSE);
1836	}
1837}
1838
1839static void
1840em_set_promisc(struct adapter *adapter)
1841{
1842	struct ifnet	*ifp = adapter->ifp;
1843	uint32_t	reg_rctl;
1844
1845	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1846
1847	if (ifp->if_flags & IFF_PROMISC) {
1848		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1849		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1850		/* Disable VLAN stripping in promiscous mode
1851		 * This enables bridging of vlan tagged frames to occur
1852		 * and also allows vlan tags to be seen in tcpdump
1853		 */
1854		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1855			em_disable_vlans(adapter);
1856		adapter->em_insert_vlan_header = 1;
1857	} else if (ifp->if_flags & IFF_ALLMULTI) {
1858		reg_rctl |= E1000_RCTL_MPE;
1859		reg_rctl &= ~E1000_RCTL_UPE;
1860		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1861		adapter->em_insert_vlan_header = 0;
1862	} else
1863		adapter->em_insert_vlan_header = 0;
1864}
1865
1866static void
1867em_disable_promisc(struct adapter *adapter)
1868{
1869	struct ifnet	*ifp = adapter->ifp;
1870	uint32_t	reg_rctl;
1871
1872	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1873
1874	reg_rctl &=  (~E1000_RCTL_UPE);
1875	reg_rctl &=  (~E1000_RCTL_MPE);
1876	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1877
1878	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1879		em_enable_vlans(adapter);
1880	adapter->em_insert_vlan_header = 0;
1881}
1882
1883
1884/*********************************************************************
1885 *  Multicast Update
1886 *
1887 *  This routine is called whenever multicast address list is updated.
1888 *
1889 **********************************************************************/
1890
1891static void
1892em_set_multi(struct adapter *adapter)
1893{
1894	struct ifnet	*ifp = adapter->ifp;
1895	struct ifmultiaddr *ifma;
1896	uint32_t reg_rctl = 0;
1897	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1898	int mcnt = 0;
1899
1900	IOCTL_DEBUGOUT("em_set_multi: begin");
1901
1902	if (adapter->hw.mac_type == em_82542_rev2_0) {
1903		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1904		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1905			em_pci_clear_mwi(&adapter->hw);
1906		reg_rctl |= E1000_RCTL_RST;
1907		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1908		msec_delay(5);
1909	}
1910
1911	IF_ADDR_LOCK(ifp);
1912	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1913		if (ifma->ifma_addr->sa_family != AF_LINK)
1914			continue;
1915
1916		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1917			break;
1918
1919		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1920		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1921		mcnt++;
1922	}
1923	IF_ADDR_UNLOCK(ifp);
1924
1925	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1926		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1927		reg_rctl |= E1000_RCTL_MPE;
1928		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1929	} else
1930		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1931
1932	if (adapter->hw.mac_type == em_82542_rev2_0) {
1933		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1934		reg_rctl &= ~E1000_RCTL_RST;
1935		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1936		msec_delay(5);
1937		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1938			em_pci_set_mwi(&adapter->hw);
1939	}
1940}
1941
1942
1943/*********************************************************************
1944 *  Timer routine
1945 *
1946 *  This routine checks for link status and updates statistics.
1947 *
1948 **********************************************************************/
1949
1950static void
1951em_local_timer(void *arg)
1952{
1953	struct adapter	*adapter = arg;
1954	struct ifnet	*ifp = adapter->ifp;
1955
1956	EM_LOCK(adapter);
1957
1958	em_check_for_link(&adapter->hw);
1959	em_update_link_status(adapter);
1960	em_update_stats_counters(adapter);
1961	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1962		em_print_hw_stats(adapter);
1963	em_smartspeed(adapter);
1964
1965	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1966
1967	EM_UNLOCK(adapter);
1968}
1969
1970static void
1971em_update_link_status(struct adapter *adapter)
1972{
1973	struct ifnet *ifp = adapter->ifp;
1974	device_t dev = adapter->dev;
1975
1976	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1977		if (adapter->link_active == 0) {
1978			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1979			    &adapter->link_duplex);
1980			/* Check if we may set SPEED_MODE bit on PCI-E */
1981			if ((adapter->link_speed == SPEED_1000) &&
1982			    ((adapter->hw.mac_type == em_82571) ||
1983			    (adapter->hw.mac_type == em_82572))) {
1984				int tarc0;
1985
1986				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1987				tarc0 |= SPEED_MODE_BIT;
1988				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1989			}
1990			if (bootverbose)
1991				device_printf(dev, "Link is up %d Mbps %s\n",
1992				    adapter->link_speed,
1993				    ((adapter->link_duplex == FULL_DUPLEX) ?
1994				    "Full Duplex" : "Half Duplex"));
1995			adapter->link_active = 1;
1996			adapter->smartspeed = 0;
1997			ifp->if_baudrate = adapter->link_speed * 1000000;
1998			if_link_state_change(ifp, LINK_STATE_UP);
1999		}
2000	} else {
2001		if (adapter->link_active == 1) {
2002			ifp->if_baudrate = adapter->link_speed = 0;
2003			adapter->link_duplex = 0;
2004			if (bootverbose)
2005				device_printf(dev, "Link is Down\n");
2006			adapter->link_active = 0;
2007			if_link_state_change(ifp, LINK_STATE_DOWN);
2008		}
2009	}
2010}
2011
2012/*********************************************************************
2013 *
2014 *  This routine disables all traffic on the adapter by issuing a
2015 *  global reset on the MAC and deallocates TX/RX buffers.
2016 *
2017 **********************************************************************/
2018
2019static void
2020em_stop(void *arg)
2021{
2022	struct adapter	*adapter = arg;
2023	struct ifnet	*ifp = adapter->ifp;
2024
2025	EM_LOCK_ASSERT(adapter);
2026
2027	INIT_DEBUGOUT("em_stop: begin");
2028
2029	em_disable_intr(adapter);
2030	em_reset_hw(&adapter->hw);
2031	callout_stop(&adapter->timer);
2032	callout_stop(&adapter->tx_fifo_timer);
2033	em_free_transmit_structures(adapter);
2034	em_free_receive_structures(adapter);
2035
2036	/* Tell the stack that the interface is no longer active */
2037	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2038}
2039
2040
2041/********************************************************************
2042 *
2043 *  Determine hardware revision.
2044 *
2045 **********************************************************************/
2046static void
2047em_identify_hardware(struct adapter *adapter)
2048{
2049	device_t dev = adapter->dev;
2050
2051	/* Make sure our PCI config space has the necessary stuff set */
2052	pci_enable_busmaster(dev);
2053	pci_enable_io(dev, SYS_RES_MEMORY);
2054	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055
2056	/* Save off the information about this board */
2057	adapter->hw.vendor_id = pci_get_vendor(dev);
2058	adapter->hw.device_id = pci_get_device(dev);
2059	adapter->hw.revision_id = pci_get_revid(dev);
2060	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2061	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2062
2063	/* Identify the MAC */
2064	if (em_set_mac_type(&adapter->hw))
2065		device_printf(dev, "Unknown MAC Type\n");
2066
2067	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2068	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2069		adapter->hw.phy_init_script = TRUE;
2070}
2071
2072static int
2073em_allocate_pci_resources(struct adapter *adapter)
2074{
2075	device_t	dev = adapter->dev;
2076	int		val, rid;
2077
2078	rid = PCIR_BAR(0);
2079	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2080	    &rid, RF_ACTIVE);
2081	if (adapter->res_memory == NULL) {
2082		device_printf(dev, "Unable to allocate bus resource: memory\n");
2083		return (ENXIO);
2084	}
2085	adapter->osdep.mem_bus_space_tag =
2086	rman_get_bustag(adapter->res_memory);
2087	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2088	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2089
2090	if (adapter->hw.mac_type > em_82543) {
2091		/* Figure our where our IO BAR is ? */
2092		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2093			val = pci_read_config(dev, rid, 4);
2094			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2095				adapter->io_rid = rid;
2096				break;
2097			}
2098			rid += 4;
2099			/* check for 64bit BAR */
2100			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2101				rid += 4;
2102		}
2103		if (rid >= PCIR_CIS) {
2104			device_printf(dev, "Unable to locate IO BAR\n");
2105			return (ENXIO);
2106		}
2107		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2108		    &adapter->io_rid, RF_ACTIVE);
2109		if (adapter->res_ioport == NULL) {
2110			device_printf(dev, "Unable to allocate bus resource: "
2111			    "ioport\n");
2112			return (ENXIO);
2113		}
2114		adapter->hw.io_base = 0;
2115		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2116		adapter->osdep.io_bus_space_handle =
2117		    rman_get_bushandle(adapter->res_ioport);
2118	}
2119
2120	/* For ICH8 we need to find the flash memory. */
2121	if (adapter->hw.mac_type == em_ich8lan) {
2122		rid = EM_FLASH;
2123
2124		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2125		    &rid, RF_ACTIVE);
2126		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2127		adapter->osdep.flash_bus_space_handle =
2128		    rman_get_bushandle(adapter->flash_mem);
2129	}
2130
2131	rid = 0x0;
2132	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2133	    RF_SHAREABLE | RF_ACTIVE);
2134	if (adapter->res_interrupt == NULL) {
2135		device_printf(dev, "Unable to allocate bus resource: "
2136		    "interrupt\n");
2137		return (ENXIO);
2138	}
2139
2140	adapter->hw.back = &adapter->osdep;
2141
2142	return (0);
2143}
2144
2145int
2146em_allocate_intr(struct adapter *adapter)
2147{
2148	device_t dev = adapter->dev;
2149	int error;
2150
2151	/* Manually turn off all interrupts */
2152	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2153
2154#ifdef DEVICE_POLLING
2155	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2156	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2157	    &adapter->int_handler_tag)) != 0) {
2158		device_printf(dev, "Failed to register interrupt handler");
2159		return (error);
2160	}
2161#else
2162	/*
2163	 * Try allocating a fast interrupt and the associated deferred
2164	 * processing contexts.
2165	 */
2166	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2167	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2168	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2169	    taskqueue_thread_enqueue, &adapter->tq);
2170	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2171	    device_get_nameunit(adapter->dev));
2172	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2173	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2174	    &adapter->int_handler_tag)) != 0) {
2175		device_printf(dev, "Failed to register fast interrupt "
2176			    "handler: %d\n", error);
2177		taskqueue_free(adapter->tq);
2178		adapter->tq = NULL;
2179		return (error);
2180	}
2181#endif
2182
2183	em_enable_intr(adapter);
2184	return (0);
2185}
2186
2187static void
2188em_free_intr(struct adapter *adapter)
2189{
2190	device_t dev = adapter->dev;
2191
2192	if (adapter->int_handler_tag != NULL) {
2193		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2194		adapter->int_handler_tag = NULL;
2195	}
2196	if (adapter->tq != NULL) {
2197		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2198		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2199		taskqueue_free(adapter->tq);
2200		adapter->tq = NULL;
2201	}
2202}
2203
2204static void
2205em_free_pci_resources(struct adapter *adapter)
2206{
2207	device_t dev = adapter->dev;
2208
2209	if (adapter->res_interrupt != NULL)
2210		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2211
2212	if (adapter->res_memory != NULL)
2213		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2214		    adapter->res_memory);
2215
2216	if (adapter->flash_mem != NULL)
2217		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2218		    adapter->flash_mem);
2219
2220	if (adapter->res_ioport != NULL)
2221		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2222		    adapter->res_ioport);
2223}
2224
2225/*********************************************************************
2226 *
2227 *  Initialize the hardware to a configuration as specified by the
2228 *  adapter structure. The controller is reset, the EEPROM is
2229 *  verified, the MAC address is set, then the shared initialization
2230 *  routines are called.
2231 *
2232 **********************************************************************/
2233static int
2234em_hardware_init(struct adapter *adapter)
2235{
2236	device_t dev = adapter->dev;
2237	uint16_t rx_buffer_size;
2238
2239	INIT_DEBUGOUT("em_hardware_init: begin");
2240	/* Issue a global reset */
2241	em_reset_hw(&adapter->hw);
2242
2243	/* When hardware is reset, fifo_head is also reset */
2244	adapter->tx_fifo_head = 0;
2245
2246	/* Make sure we have a good EEPROM before we read from it */
2247	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2248		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2249		return (EIO);
2250	}
2251
2252	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2253		device_printf(dev, "EEPROM read error while reading part "
2254		    "number\n");
2255		return (EIO);
2256	}
2257
2258	/* Set up smart power down as default off on newer adapters. */
2259	if (!em_smart_pwr_down &&
2260	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2261		uint16_t phy_tmp = 0;
2262
2263		/* Speed up time to link by disabling smart power down. */
2264		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2265		phy_tmp &= ~IGP02E1000_PM_SPD;
2266		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2267	}
2268
2269	/*
2270	 * These parameters control the automatic generation (Tx) and
2271	 * response (Rx) to Ethernet PAUSE frames.
2272	 * - High water mark should allow for at least two frames to be
2273	 *   received after sending an XOFF.
2274	 * - Low water mark works best when it is very near the high water mark.
2275	 *   This allows the receiver to restart by sending XON when it has
2276	 *   drained a bit. Here we use an arbitary value of 1500 which will
2277	 *   restart after one full frame is pulled from the buffer. There
2278	 *   could be several smaller frames in the buffer and if so they will
2279	 *   not trigger the XON until their total number reduces the buffer
2280	 *   by 1500.
2281	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2282	 */
2283	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2284
2285	adapter->hw.fc_high_water = rx_buffer_size -
2286	    roundup2(adapter->hw.max_frame_size, 1024);
2287	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2288	if (adapter->hw.mac_type == em_80003es2lan)
2289		adapter->hw.fc_pause_time = 0xFFFF;
2290	else
2291		adapter->hw.fc_pause_time = 0x1000;
2292	adapter->hw.fc_send_xon = TRUE;
2293	adapter->hw.fc = em_fc_full;
2294
2295	if (em_init_hw(&adapter->hw) < 0) {
2296		device_printf(dev, "Hardware Initialization Failed");
2297		return (EIO);
2298	}
2299
2300	em_check_for_link(&adapter->hw);
2301
2302	return (0);
2303}
2304
2305/*********************************************************************
2306 *
2307 *  Setup networking device structure and register an interface.
2308 *
2309 **********************************************************************/
2310static void
2311em_setup_interface(device_t dev, struct adapter *adapter)
2312{
2313	struct ifnet   *ifp;
2314	INIT_DEBUGOUT("em_setup_interface: begin");
2315
2316	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2317	if (ifp == NULL)
2318		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2319	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2320	ifp->if_mtu = ETHERMTU;
2321	ifp->if_init =  em_init;
2322	ifp->if_softc = adapter;
2323	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2324	ifp->if_ioctl = em_ioctl;
2325	ifp->if_start = em_start;
2326	ifp->if_watchdog = em_watchdog;
2327	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2328	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2329	IFQ_SET_READY(&ifp->if_snd);
2330
2331	ether_ifattach(ifp, adapter->hw.mac_addr);
2332
2333	ifp->if_capabilities = ifp->if_capenable = 0;
2334
2335	if (adapter->hw.mac_type >= em_82543) {
2336		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2337		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2338	}
2339
2340        /* Enable TSO if available */
2341        if ((adapter->hw.mac_type > em_82544) &&
2342            (adapter->hw.mac_type != em_82547)) {
2343                ifp->if_capabilities |= IFCAP_TSO;
2344                ifp->if_capenable |= IFCAP_TSO;
2345        }
2346
2347	/*
2348	 * Tell the upper layer(s) we support long frames.
2349	 */
2350	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2351	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2352	ifp->if_capenable |= IFCAP_VLAN_MTU;
2353
2354#ifdef DEVICE_POLLING
2355	ifp->if_capabilities |= IFCAP_POLLING;
2356#endif
2357
2358	/*
2359	 * Specify the media types supported by this adapter and register
2360	 * callbacks to update media and link information
2361	 */
2362	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2363	    em_media_status);
2364	if ((adapter->hw.media_type == em_media_type_fiber) ||
2365	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2366		u_char fiber_type = IFM_1000_SX;	/* default type; */
2367
2368		if (adapter->hw.mac_type == em_82545)
2369			fiber_type = IFM_1000_LX;
2370		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2371		    0, NULL);
2372		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2373	} else {
2374		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2375		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2376			    0, NULL);
2377		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2378			    0, NULL);
2379		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2380			    0, NULL);
2381		if (adapter->hw.phy_type != em_phy_ife) {
2382			ifmedia_add(&adapter->media,
2383				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2384			ifmedia_add(&adapter->media,
2385				IFM_ETHER | IFM_1000_T, 0, NULL);
2386		}
2387	}
2388	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2389	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2390}
2391
2392
2393/*********************************************************************
2394 *
2395 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2396 *
2397 **********************************************************************/
2398static void
2399em_smartspeed(struct adapter *adapter)
2400{
2401	uint16_t phy_tmp;
2402
2403	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2404	    adapter->hw.autoneg == 0 ||
2405	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2406		return;
2407
2408	if (adapter->smartspeed == 0) {
2409		/* If Master/Slave config fault is asserted twice,
2410		 * we assume back-to-back */
2411		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2412		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2413			return;
2414		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2415		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2416			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2417			if(phy_tmp & CR_1000T_MS_ENABLE) {
2418				phy_tmp &= ~CR_1000T_MS_ENABLE;
2419				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2420				    phy_tmp);
2421				adapter->smartspeed++;
2422				if(adapter->hw.autoneg &&
2423				   !em_phy_setup_autoneg(&adapter->hw) &&
2424				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2425				    &phy_tmp)) {
2426					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2427						    MII_CR_RESTART_AUTO_NEG);
2428					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2429					    phy_tmp);
2430				}
2431			}
2432		}
2433		return;
2434	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2435		/* If still no link, perhaps using 2/3 pair cable */
2436		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2437		phy_tmp |= CR_1000T_MS_ENABLE;
2438		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2439		if(adapter->hw.autoneg &&
2440		   !em_phy_setup_autoneg(&adapter->hw) &&
2441		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2442			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2443				    MII_CR_RESTART_AUTO_NEG);
2444			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2445		}
2446	}
2447	/* Restart process after EM_SMARTSPEED_MAX iterations */
2448	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2449		adapter->smartspeed = 0;
2450}
2451
2452
2453/*
2454 * Manage DMA'able memory.
2455 */
2456static void
2457em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2458{
2459	if (error)
2460		return;
2461	*(bus_addr_t *) arg = segs[0].ds_addr;
2462}
2463
2464static int
2465em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2466	int mapflags)
2467{
2468	int error;
2469
2470	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2471				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2472				BUS_SPACE_MAXADDR,	/* lowaddr */
2473				BUS_SPACE_MAXADDR,	/* highaddr */
2474				NULL, NULL,		/* filter, filterarg */
2475				size,			/* maxsize */
2476				1,			/* nsegments */
2477				size,			/* maxsegsize */
2478				0,			/* flags */
2479				NULL,			/* lockfunc */
2480				NULL,			/* lockarg */
2481				&dma->dma_tag);
2482	if (error) {
2483		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2484		    __func__, error);
2485		goto fail_0;
2486	}
2487
2488	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2489	    BUS_DMA_NOWAIT, &dma->dma_map);
2490	if (error) {
2491		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2492		    __func__, (uintmax_t)size, error);
2493		goto fail_2;
2494	}
2495
2496	dma->dma_paddr = 0;
2497	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2498	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2499	if (error || dma->dma_paddr == 0) {
2500		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2501		    __func__, error);
2502		goto fail_3;
2503	}
2504
2505	return (0);
2506
2507fail_3:
2508	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2509fail_2:
2510	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2511	bus_dma_tag_destroy(dma->dma_tag);
2512fail_0:
2513	dma->dma_map = NULL;
2514	dma->dma_tag = NULL;
2515
2516	return (error);
2517}
2518
2519static void
2520em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2521{
2522	if (dma->dma_tag == NULL)
2523		return;
2524	if (dma->dma_map != NULL) {
2525		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2526		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2527		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2528		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2529		dma->dma_map = NULL;
2530	}
2531	bus_dma_tag_destroy(dma->dma_tag);
2532	dma->dma_tag = NULL;
2533}
2534
2535
2536/*********************************************************************
2537 *
2538 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2539 *  the information needed to transmit a packet on the wire.
2540 *
2541 **********************************************************************/
2542static int
2543em_allocate_transmit_structures(struct adapter *adapter)
2544{
2545	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2546	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2547	if (adapter->tx_buffer_area == NULL) {
2548		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2549		return (ENOMEM);
2550	}
2551
2552	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2553
2554	return (0);
2555}
2556
2557/*********************************************************************
2558 *
2559 *  Allocate and initialize transmit structures.
2560 *
2561 **********************************************************************/
2562static int
2563em_setup_transmit_structures(struct adapter *adapter)
2564{
2565        struct ifnet   *ifp = adapter->ifp;
2566	device_t dev = adapter->dev;
2567	struct em_buffer *tx_buffer;
2568	bus_size_t size, segsize;
2569	int error, i;
2570
2571	/*
2572	 * Setup DMA descriptor areas.
2573	 */
2574	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2575
2576        /* Overrides for TSO - want large sizes */
2577        if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2578                size = EM_TSO_SIZE;
2579                segsize = PAGE_SIZE;
2580        }
2581
2582	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2583				1, 0,			/* alignment, bounds */
2584				BUS_SPACE_MAXADDR,	/* lowaddr */
2585				BUS_SPACE_MAXADDR,	/* highaddr */
2586				NULL, NULL,		/* filter, filterarg */
2587				size,			/* maxsize */
2588				EM_MAX_SCATTER,		/* nsegments */
2589				segsize,		/* maxsegsize */
2590				0,			/* flags */
2591				NULL,		/* lockfunc */
2592				NULL,		/* lockarg */
2593				&adapter->txtag)) != 0) {
2594		device_printf(dev, "Unable to allocate TX DMA tag\n");
2595		goto fail;
2596	}
2597
2598	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2599		goto fail;
2600
2601	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2602	tx_buffer = adapter->tx_buffer_area;
2603	for (i = 0; i < adapter->num_tx_desc; i++) {
2604		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2605		if (error != 0) {
2606			device_printf(dev, "Unable to create TX DMA map\n");
2607			goto fail;
2608		}
2609		tx_buffer++;
2610	}
2611
2612	adapter->next_avail_tx_desc = 0;
2613	adapter->oldest_used_tx_desc = 0;
2614
2615	/* Set number of descriptors available */
2616	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2617
2618	/* Set checksum context */
2619	adapter->active_checksum_context = OFFLOAD_NONE;
2620	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2621	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2622
2623	return (0);
2624
2625fail:
2626	em_free_transmit_structures(adapter);
2627	return (error);
2628}
2629
2630/*********************************************************************
2631 *
2632 *  Enable transmit unit.
2633 *
2634 **********************************************************************/
2635static void
2636em_initialize_transmit_unit(struct adapter *adapter)
2637{
2638	uint32_t	reg_tctl, reg_tarc;
2639	uint32_t	reg_tipg = 0;
2640	uint64_t	bus_addr;
2641
2642	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2643	/* Setup the Base and Length of the Tx Descriptor Ring */
2644	bus_addr = adapter->txdma.dma_paddr;
2645	E1000_WRITE_REG(&adapter->hw, TDLEN,
2646	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2647	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2648	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2649
2650	/* Setup the HW Tx Head and Tail descriptor pointers */
2651	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2652	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2653
2654
2655	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2656	    E1000_READ_REG(&adapter->hw, TDLEN));
2657
2658	/* Set the default values for the Tx Inter Packet Gap timer */
2659	switch (adapter->hw.mac_type) {
2660	case em_82542_rev2_0:
2661	case em_82542_rev2_1:
2662		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2663		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2664		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2665		break;
2666	case em_80003es2lan:
2667		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2668		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2669		    E1000_TIPG_IPGR2_SHIFT;
2670		break;
2671	default:
2672		if ((adapter->hw.media_type == em_media_type_fiber) ||
2673		    (adapter->hw.media_type == em_media_type_internal_serdes))
2674			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2675		else
2676			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2677		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2678		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2679	}
2680
2681	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2682	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2683	if(adapter->hw.mac_type >= em_82540)
2684		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2685
2686	/* Do adapter specific tweaks before we enable the transmitter. */
2687	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2688		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2689		reg_tarc |= (1 << 25);
2690		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2691		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2692		reg_tarc |= (1 << 25);
2693		reg_tarc &= ~(1 << 28);
2694		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2695	} else if (adapter->hw.mac_type == em_80003es2lan) {
2696		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2697		reg_tarc |= 1;
2698		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2699		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2700		reg_tarc |= 1;
2701		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2702	}
2703
2704	/* Program the Transmit Control Register */
2705	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2706		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2707	if (adapter->hw.mac_type >= em_82571)
2708		reg_tctl |= E1000_TCTL_MULR;
2709	if (adapter->link_duplex == FULL_DUPLEX) {
2710		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2711	} else {
2712		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2713	}
2714	/* This write will effectively turn on the transmit unit. */
2715	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2716
2717	/* Setup Transmit Descriptor Settings for this adapter */
2718	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2719
2720	if (adapter->tx_int_delay.value > 0)
2721		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2722}
2723
2724/*********************************************************************
2725 *
2726 *  Free all transmit related data structures.
2727 *
2728 **********************************************************************/
2729static void
2730em_free_transmit_structures(struct adapter *adapter)
2731{
2732	struct em_buffer *tx_buffer;
2733	int i;
2734
2735	INIT_DEBUGOUT("free_transmit_structures: begin");
2736
2737	if (adapter->tx_buffer_area != NULL) {
2738		tx_buffer = adapter->tx_buffer_area;
2739		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2740			if (tx_buffer->m_head != NULL) {
2741				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2742				    BUS_DMASYNC_POSTWRITE);
2743				bus_dmamap_unload(adapter->txtag,
2744				    tx_buffer->map);
2745				m_freem(tx_buffer->m_head);
2746				tx_buffer->m_head = NULL;
2747			} else if (tx_buffer->map != NULL)
2748				bus_dmamap_unload(adapter->txtag,
2749				    tx_buffer->map);
2750			if (tx_buffer->map != NULL) {
2751				bus_dmamap_destroy(adapter->txtag,
2752				    tx_buffer->map);
2753				tx_buffer->map = NULL;
2754			}
2755		}
2756	}
2757	if (adapter->tx_buffer_area != NULL) {
2758		free(adapter->tx_buffer_area, M_DEVBUF);
2759		adapter->tx_buffer_area = NULL;
2760	}
2761	if (adapter->txtag != NULL) {
2762		bus_dma_tag_destroy(adapter->txtag);
2763		adapter->txtag = NULL;
2764	}
2765}
2766
2767/*********************************************************************
2768 *
2769 *  The offload context needs to be set when we transfer the first
2770 *  packet of a particular protocol (TCP/UDP). We change the
2771 *  context only if the protocol type changes.
2772 *
2773 **********************************************************************/
2774static void
2775em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2776    uint32_t *txd_upper, uint32_t *txd_lower)
2777{
2778	struct em_context_desc *TXD;
2779	struct em_buffer *tx_buffer;
2780	int curr_txd;
2781
2782	if (mp->m_pkthdr.csum_flags) {
2783
2784		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2785			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2786			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2787			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2788				return;
2789			else
2790				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2791
2792		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2793			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2794			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2795			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2796				return;
2797			else
2798				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2799		} else {
2800			*txd_upper = 0;
2801			*txd_lower = 0;
2802			return;
2803		}
2804	} else {
2805		*txd_upper = 0;
2806		*txd_lower = 0;
2807		return;
2808	}
2809
2810	/* If we reach this point, the checksum offload context
2811	 * needs to be reset.
2812	 */
2813	curr_txd = adapter->next_avail_tx_desc;
2814	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2815	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2816
2817	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2818	TXD->lower_setup.ip_fields.ipcso =
2819		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2820	TXD->lower_setup.ip_fields.ipcse =
2821		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2822
2823	TXD->upper_setup.tcp_fields.tucss =
2824		ETHER_HDR_LEN + sizeof(struct ip);
2825	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2826
2827	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2828		TXD->upper_setup.tcp_fields.tucso =
2829			ETHER_HDR_LEN + sizeof(struct ip) +
2830			offsetof(struct tcphdr, th_sum);
2831	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2832		TXD->upper_setup.tcp_fields.tucso =
2833			ETHER_HDR_LEN + sizeof(struct ip) +
2834			offsetof(struct udphdr, uh_sum);
2835	}
2836
2837	TXD->tcp_seg_setup.data = htole32(0);
2838	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2839
2840	tx_buffer->m_head = NULL;
2841
2842	if (++curr_txd == adapter->num_tx_desc)
2843		curr_txd = 0;
2844
2845	adapter->num_tx_desc_avail--;
2846	adapter->next_avail_tx_desc = curr_txd;
2847}
2848
2849/**********************************************************************
2850 *
2851 *  Setup work for hardware segmentation offload (TSO)
2852 *
2853 **********************************************************************/
2854static boolean_t
2855em_tso_setup(struct adapter *adapter,
2856             struct mbuf *mp,
2857             uint32_t *txd_upper,
2858             uint32_t *txd_lower)
2859{
2860        struct em_context_desc *TXD;
2861        struct em_buffer *tx_buffer;
2862        struct ip *ip;
2863        struct tcphdr *th;
2864        int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2865
2866        if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2867            (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2868                return FALSE;
2869        }
2870
2871        *txd_lower = (E1000_TXD_CMD_DEXT |
2872                      E1000_TXD_DTYP_D |
2873                      E1000_TXD_CMD_TSE);
2874
2875        *txd_upper = (E1000_TXD_POPTS_IXSM |
2876                      E1000_TXD_POPTS_TXSM) << 8;
2877
2878        curr_txd = adapter->next_avail_tx_desc;
2879        tx_buffer = &adapter->tx_buffer_area[curr_txd];
2880        TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2881
2882        mp->m_data += sizeof(struct ether_header);
2883        ip = mtod(mp, struct ip *);
2884        ip->ip_len = 0;
2885        ip->ip_sum = 0;
2886        ip_hlen = ip->ip_hl << 2 ;
2887        th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2888        tcp_hlen = th->th_off << 2;
2889
2890        hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2891	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2892	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2893
2894        mp->m_data -= sizeof(struct ether_header);
2895        TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2896        TXD->lower_setup.ip_fields.ipcso =
2897                ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2898        TXD->lower_setup.ip_fields.ipcse =
2899                htole16(ETHER_HDR_LEN + ip_hlen - 1);
2900
2901        TXD->upper_setup.tcp_fields.tucss =
2902                ETHER_HDR_LEN + ip_hlen;
2903        TXD->upper_setup.tcp_fields.tucse = 0;
2904        TXD->upper_setup.tcp_fields.tucso =
2905                ETHER_HDR_LEN + ip_hlen +
2906                offsetof(struct tcphdr, th_sum);
2907        TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2908        TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2909        TXD->cmd_and_length = htole32(adapter->txd_cmd |
2910                                E1000_TXD_CMD_DEXT |
2911                                E1000_TXD_CMD_TSE |
2912                                E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2913                                (mp->m_pkthdr.len - (hdr_len)));
2914
2915        tx_buffer->m_head = NULL;
2916
2917        if (++curr_txd == adapter->num_tx_desc)
2918                curr_txd = 0;
2919
2920        adapter->num_tx_desc_avail--;
2921        adapter->next_avail_tx_desc = curr_txd;
2922        adapter->tx_tso = TRUE;
2923
2924        return TRUE;
2925}
2926
2927/**********************************************************************
2928 *
2929 *  Examine each tx_buffer in the used queue. If the hardware is done
2930 *  processing the packet then free associated resources. The
2931 *  tx_buffer is put back on the free queue.
2932 *
2933 **********************************************************************/
2934static void
2935em_txeof(struct adapter *adapter)
2936{
2937	int i, num_avail;
2938	struct em_buffer *tx_buffer;
2939	struct em_tx_desc   *tx_desc;
2940	struct ifnet   *ifp = adapter->ifp;
2941
2942	EM_LOCK_ASSERT(adapter);
2943
2944	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2945		return;
2946
2947	num_avail = adapter->num_tx_desc_avail;
2948	i = adapter->oldest_used_tx_desc;
2949
2950	tx_buffer = &adapter->tx_buffer_area[i];
2951	tx_desc = &adapter->tx_desc_base[i];
2952
2953	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2954	    BUS_DMASYNC_POSTREAD);
2955	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2956
2957		tx_desc->upper.data = 0;
2958		num_avail++;
2959
2960		if (tx_buffer->m_head) {
2961			ifp->if_opackets++;
2962			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2963			    BUS_DMASYNC_POSTWRITE);
2964			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2965
2966			m_freem(tx_buffer->m_head);
2967			tx_buffer->m_head = NULL;
2968		}
2969
2970		if (++i == adapter->num_tx_desc)
2971			i = 0;
2972
2973		tx_buffer = &adapter->tx_buffer_area[i];
2974		tx_desc = &adapter->tx_desc_base[i];
2975	}
2976	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2977	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2978
2979	adapter->oldest_used_tx_desc = i;
2980
2981	/*
2982	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2983	 * that it is OK to send packets.
2984	 * If there are no pending descriptors, clear the timeout. Otherwise,
2985	 * if some descriptors have been freed, restart the timeout.
2986	 */
2987	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2988		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2989		if (num_avail == adapter->num_tx_desc)
2990			ifp->if_timer = 0;
2991		else if (num_avail != adapter->num_tx_desc_avail)
2992			ifp->if_timer = EM_TX_TIMEOUT;
2993	}
2994	adapter->num_tx_desc_avail = num_avail;
2995}
2996
2997/*********************************************************************
2998 *
2999 *  Get a buffer from system mbuf buffer pool.
3000 *
3001 **********************************************************************/
3002static int
3003em_get_buf(struct adapter *adapter, int i)
3004{
3005	struct mbuf		*m;
3006	bus_dma_segment_t	segs[1];
3007	bus_dmamap_t		map;
3008	struct em_buffer	*rx_buffer;
3009	int			error, nsegs;
3010
3011	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3012	if (m == NULL) {
3013		adapter->mbuf_cluster_failed++;
3014		return (ENOBUFS);
3015	}
3016	m->m_len = m->m_pkthdr.len = MCLBYTES;
3017	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3018		m_adj(m, ETHER_ALIGN);
3019
3020	/*
3021	 * Using memory from the mbuf cluster pool, invoke the
3022	 * bus_dma machinery to arrange the memory mapping.
3023	 */
3024	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3025	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3026	if (error != 0) {
3027		m_free(m);
3028		return (error);
3029	}
3030	/* If nsegs is wrong then the stack is corrupt. */
3031	KASSERT(nsegs == 1, ("Too many segments returned!"));
3032
3033	rx_buffer = &adapter->rx_buffer_area[i];
3034	if (rx_buffer->m_head != NULL)
3035		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3036
3037	map = rx_buffer->map;
3038	rx_buffer->map = adapter->rx_sparemap;
3039	adapter->rx_sparemap = map;
3040	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3041	rx_buffer->m_head = m;
3042
3043	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3044
3045	return (0);
3046}
3047
3048/*********************************************************************
3049 *
3050 *  Allocate memory for rx_buffer structures. Since we use one
3051 *  rx_buffer per received packet, the maximum number of rx_buffer's
3052 *  that we'll need is equal to the number of receive descriptors
3053 *  that we've allocated.
3054 *
3055 **********************************************************************/
3056static int
3057em_allocate_receive_structures(struct adapter *adapter)
3058{
3059	device_t dev = adapter->dev;
3060	struct em_buffer *rx_buffer;
3061	int i, error;
3062
3063	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3064	    M_DEVBUF, M_NOWAIT);
3065	if (adapter->rx_buffer_area == NULL) {
3066		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3067		return (ENOMEM);
3068	}
3069
3070	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3071
3072	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3073				1, 0,			/* alignment, bounds */
3074				BUS_SPACE_MAXADDR,	/* lowaddr */
3075				BUS_SPACE_MAXADDR,	/* highaddr */
3076				NULL, NULL,		/* filter, filterarg */
3077				MCLBYTES,		/* maxsize */
3078				1,			/* nsegments */
3079				MCLBYTES,		/* maxsegsize */
3080				0,			/* flags */
3081				NULL,			/* lockfunc */
3082				NULL,			/* lockarg */
3083				&adapter->rxtag);
3084	if (error) {
3085		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3086		    __func__, error);
3087		goto fail;
3088	}
3089
3090	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3091	    &adapter->rx_sparemap);
3092	if (error) {
3093		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3094		    __func__, error);
3095		goto fail;
3096	}
3097	rx_buffer = adapter->rx_buffer_area;
3098	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3099		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3100		    &rx_buffer->map);
3101		if (error) {
3102			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3103			    __func__, error);
3104			goto fail;
3105		}
3106	}
3107
3108	for (i = 0; i < adapter->num_rx_desc; i++) {
3109		error = em_get_buf(adapter, i);
3110		if (error)
3111			goto fail;
3112	}
3113	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3114	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3115
3116	return (0);
3117
3118fail:
3119	em_free_receive_structures(adapter);
3120	return (error);
3121}
3122
3123/*********************************************************************
3124 *
3125 *  Allocate and initialize receive structures.
3126 *
3127 **********************************************************************/
3128static int
3129em_setup_receive_structures(struct adapter *adapter)
3130{
3131	int error;
3132
3133	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3134
3135	if ((error = em_allocate_receive_structures(adapter)) != 0)
3136		return (error);
3137
3138	/* Setup our descriptor pointers */
3139	adapter->next_rx_desc_to_check = 0;
3140
3141	return (0);
3142}
3143
3144/*********************************************************************
3145 *
3146 *  Enable receive unit.
3147 *
3148 **********************************************************************/
3149static void
3150em_initialize_receive_unit(struct adapter *adapter)
3151{
3152	struct ifnet	*ifp = adapter->ifp;
3153	uint64_t	bus_addr;
3154	uint32_t	reg_rctl;
3155	uint32_t	reg_rxcsum;
3156
3157	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3158
3159	/*
3160	 * Make sure receives are disabled while setting
3161	 * up the descriptor ring
3162	 */
3163	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3164
3165	/* Set the Receive Delay Timer Register */
3166	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3167
3168	if(adapter->hw.mac_type >= em_82540) {
3169		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3170
3171		/*
3172		 * Set the interrupt throttling rate. Value is calculated
3173		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3174		 */
3175#define MAX_INTS_PER_SEC	8000
3176#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3177		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3178	}
3179
3180	/* Setup the Base and Length of the Rx Descriptor Ring */
3181	bus_addr = adapter->rxdma.dma_paddr;
3182	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3183			sizeof(struct em_rx_desc));
3184	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3185	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3186
3187	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3188	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3189	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3190
3191	/* Setup the Receive Control Register */
3192	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3193		   E1000_RCTL_RDMTS_HALF |
3194		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3195
3196	if (adapter->hw.tbi_compatibility_on == TRUE)
3197		reg_rctl |= E1000_RCTL_SBP;
3198
3199
3200	switch (adapter->rx_buffer_len) {
3201	default:
3202	case EM_RXBUFFER_2048:
3203		reg_rctl |= E1000_RCTL_SZ_2048;
3204		break;
3205	case EM_RXBUFFER_4096:
3206		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3207		break;
3208	case EM_RXBUFFER_8192:
3209		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3210		break;
3211	case EM_RXBUFFER_16384:
3212		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3213		break;
3214	}
3215
3216	if (ifp->if_mtu > ETHERMTU)
3217		reg_rctl |= E1000_RCTL_LPE;
3218
3219	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3220	if ((adapter->hw.mac_type >= em_82543) &&
3221	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3222		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3223		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3224		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3225	}
3226
3227	/* Enable Receives */
3228	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3229}
3230
3231/*********************************************************************
3232 *
3233 *  Free receive related data structures.
3234 *
3235 **********************************************************************/
3236static void
3237em_free_receive_structures(struct adapter *adapter)
3238{
3239	struct em_buffer *rx_buffer;
3240	int i;
3241
3242	INIT_DEBUGOUT("free_receive_structures: begin");
3243
3244	if (adapter->rx_sparemap) {
3245		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3246		adapter->rx_sparemap = NULL;
3247	}
3248	if (adapter->rx_buffer_area != NULL) {
3249		rx_buffer = adapter->rx_buffer_area;
3250		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3251			if (rx_buffer->m_head != NULL) {
3252				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3253				    BUS_DMASYNC_POSTREAD);
3254				bus_dmamap_unload(adapter->rxtag,
3255				    rx_buffer->map);
3256				m_freem(rx_buffer->m_head);
3257				rx_buffer->m_head = NULL;
3258			} else if (rx_buffer->map != NULL)
3259				bus_dmamap_unload(adapter->rxtag,
3260				    rx_buffer->map);
3261			if (rx_buffer->map != NULL) {
3262				bus_dmamap_destroy(adapter->rxtag,
3263				    rx_buffer->map);
3264				rx_buffer->map = NULL;
3265			}
3266		}
3267	}
3268	if (adapter->rx_buffer_area != NULL) {
3269		free(adapter->rx_buffer_area, M_DEVBUF);
3270		adapter->rx_buffer_area = NULL;
3271	}
3272	if (adapter->rxtag != NULL) {
3273		bus_dma_tag_destroy(adapter->rxtag);
3274		adapter->rxtag = NULL;
3275	}
3276}
3277
3278/*********************************************************************
3279 *
3280 *  This routine executes in interrupt context. It replenishes
3281 *  the mbufs in the descriptor and sends data which has been
3282 *  dma'ed into host memory to upper layer.
3283 *
3284 *  We loop at most count times if count is > 0, or until done if
3285 *  count < 0.
3286 *
3287 *********************************************************************/
3288static int
3289em_rxeof(struct adapter *adapter, int count)
3290{
3291	struct ifnet	*ifp;
3292	struct mbuf	*mp;
3293	uint8_t		accept_frame = 0;
3294	uint8_t		eop = 0;
3295	uint16_t 	len, desc_len, prev_len_adj;
3296	int		i;
3297
3298	/* Pointer to the receive descriptor being examined. */
3299	struct em_rx_desc   *current_desc;
3300	uint8_t		status;
3301
3302	ifp = adapter->ifp;
3303	i = adapter->next_rx_desc_to_check;
3304	current_desc = &adapter->rx_desc_base[i];
3305	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3306	    BUS_DMASYNC_POSTREAD);
3307
3308	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3309		return (0);
3310
3311	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3312	    (count != 0) &&
3313	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3314		struct mbuf *m = NULL;
3315
3316		mp = adapter->rx_buffer_area[i].m_head;
3317		/*
3318		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3319		 * needs to access the last received byte in the mbuf.
3320		 */
3321		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3322		    BUS_DMASYNC_POSTREAD);
3323
3324		accept_frame = 1;
3325		prev_len_adj = 0;
3326		desc_len = le16toh(current_desc->length);
3327		status = current_desc->status;
3328		if (status & E1000_RXD_STAT_EOP) {
3329			count--;
3330			eop = 1;
3331			if (desc_len < ETHER_CRC_LEN) {
3332				len = 0;
3333				prev_len_adj = ETHER_CRC_LEN - desc_len;
3334			} else
3335				len = desc_len - ETHER_CRC_LEN;
3336		} else {
3337			eop = 0;
3338			len = desc_len;
3339		}
3340
3341		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3342			uint8_t		last_byte;
3343			uint32_t	pkt_len = desc_len;
3344
3345			if (adapter->fmp != NULL)
3346				pkt_len += adapter->fmp->m_pkthdr.len;
3347
3348			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3349			if (TBI_ACCEPT(&adapter->hw, status,
3350			    current_desc->errors, pkt_len, last_byte)) {
3351				em_tbi_adjust_stats(&adapter->hw,
3352				    &adapter->stats, pkt_len,
3353				    adapter->hw.mac_addr);
3354				if (len > 0)
3355					len--;
3356			} else
3357				accept_frame = 0;
3358		}
3359
3360		if (accept_frame) {
3361			if (em_get_buf(adapter, i) != 0) {
3362				ifp->if_iqdrops++;
3363				goto discard;
3364			}
3365
3366			/* Assign correct length to the current fragment */
3367			mp->m_len = len;
3368
3369			if (adapter->fmp == NULL) {
3370				mp->m_pkthdr.len = len;
3371				adapter->fmp = mp; /* Store the first mbuf */
3372				adapter->lmp = mp;
3373			} else {
3374				/* Chain mbuf's together */
3375				mp->m_flags &= ~M_PKTHDR;
3376				/*
3377				 * Adjust length of previous mbuf in chain if
3378				 * we received less than 4 bytes in the last
3379				 * descriptor.
3380				 */
3381				if (prev_len_adj > 0) {
3382					adapter->lmp->m_len -= prev_len_adj;
3383					adapter->fmp->m_pkthdr.len -=
3384					    prev_len_adj;
3385				}
3386				adapter->lmp->m_next = mp;
3387				adapter->lmp = adapter->lmp->m_next;
3388				adapter->fmp->m_pkthdr.len += len;
3389			}
3390
3391			if (eop) {
3392				adapter->fmp->m_pkthdr.rcvif = ifp;
3393				ifp->if_ipackets++;
3394				em_receive_checksum(adapter, current_desc,
3395				    adapter->fmp);
3396#ifndef __NO_STRICT_ALIGNMENT
3397				if (adapter->hw.max_frame_size >
3398				    (MCLBYTES - ETHER_ALIGN) &&
3399				    em_fixup_rx(adapter) != 0)
3400					goto skip;
3401#endif
3402				if (status & E1000_RXD_STAT_VP)
3403					VLAN_INPUT_TAG(ifp, adapter->fmp,
3404					    (le16toh(current_desc->special) &
3405					    E1000_RXD_SPC_VLAN_MASK));
3406#ifndef __NO_STRICT_ALIGNMENT
3407skip:
3408#endif
3409				m = adapter->fmp;
3410				adapter->fmp = NULL;
3411				adapter->lmp = NULL;
3412			}
3413		} else {
3414			ifp->if_ierrors++;
3415discard:
3416			/* Reuse loaded DMA map and just update mbuf chain */
3417			mp = adapter->rx_buffer_area[i].m_head;
3418			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3419			mp->m_data = mp->m_ext.ext_buf;
3420			mp->m_next = NULL;
3421			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3422				m_adj(mp, ETHER_ALIGN);
3423			if (adapter->fmp != NULL) {
3424				m_freem(adapter->fmp);
3425				adapter->fmp = NULL;
3426				adapter->lmp = NULL;
3427			}
3428			m = NULL;
3429		}
3430
3431		/* Zero out the receive descriptors status. */
3432		current_desc->status = 0;
3433		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3434		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3435
3436		/* Advance our pointers to the next descriptor. */
3437		if (++i == adapter->num_rx_desc)
3438			i = 0;
3439		if (m != NULL) {
3440			adapter->next_rx_desc_to_check = i;
3441#ifdef DEVICE_POLLING
3442			EM_UNLOCK(adapter);
3443			(*ifp->if_input)(ifp, m);
3444			EM_LOCK(adapter);
3445#else
3446			(*ifp->if_input)(ifp, m);
3447#endif
3448			i = adapter->next_rx_desc_to_check;
3449		}
3450		current_desc = &adapter->rx_desc_base[i];
3451	}
3452	adapter->next_rx_desc_to_check = i;
3453
3454	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3455	if (--i < 0)
3456		i = adapter->num_rx_desc - 1;
3457	E1000_WRITE_REG(&adapter->hw, RDT, i);
3458	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3459		return (0);
3460
3461	return (1);
3462}
3463
3464#ifndef __NO_STRICT_ALIGNMENT
3465/*
3466 * When jumbo frames are enabled we should realign entire payload on
3467 * architecures with strict alignment. This is serious design mistake of 8254x
3468 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3469 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3470 * payload. On architecures without strict alignment restrictions 8254x still
3471 * performs unaligned memory access which would reduce the performance too.
3472 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3473 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3474 * existing mbuf chain.
3475 *
3476 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3477 * not used at all on architectures with strict alignment.
3478 */
3479static int
3480em_fixup_rx(struct adapter *adapter)
3481{
3482	struct mbuf *m, *n;
3483	int error;
3484
3485	error = 0;
3486	m = adapter->fmp;
3487	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3488		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3489		m->m_data += ETHER_HDR_LEN;
3490	} else {
3491		MGETHDR(n, M_DONTWAIT, MT_DATA);
3492		if (n != NULL) {
3493			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3494			m->m_data += ETHER_HDR_LEN;
3495			m->m_len -= ETHER_HDR_LEN;
3496			n->m_len = ETHER_HDR_LEN;
3497			M_MOVE_PKTHDR(n, m);
3498			n->m_next = m;
3499			adapter->fmp = n;
3500		} else {
3501			adapter->ifp->if_iqdrops++;
3502			adapter->mbuf_alloc_failed++;
3503			m_freem(adapter->fmp);
3504			adapter->fmp = NULL;
3505			adapter->lmp = NULL;
3506			error = ENOBUFS;
3507		}
3508	}
3509
3510	return (error);
3511}
3512#endif
3513
3514/*********************************************************************
3515 *
3516 *  Verify that the hardware indicated that the checksum is valid.
3517 *  Inform the stack about the status of checksum so that stack
3518 *  doesn't spend time verifying the checksum.
3519 *
3520 *********************************************************************/
3521static void
3522em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3523		    struct mbuf *mp)
3524{
3525	/* 82543 or newer only */
3526	if ((adapter->hw.mac_type < em_82543) ||
3527	    /* Ignore Checksum bit is set */
3528	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3529		mp->m_pkthdr.csum_flags = 0;
3530		return;
3531	}
3532
3533	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3534		/* Did it pass? */
3535		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3536			/* IP Checksum Good */
3537			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3538			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3539
3540		} else {
3541			mp->m_pkthdr.csum_flags = 0;
3542		}
3543	}
3544
3545	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3546		/* Did it pass? */
3547		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3548			mp->m_pkthdr.csum_flags |=
3549			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3550			mp->m_pkthdr.csum_data = htons(0xffff);
3551		}
3552	}
3553}
3554
3555
3556static void
3557em_enable_vlans(struct adapter *adapter)
3558{
3559	uint32_t ctrl;
3560
3561	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3562
3563	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3564	ctrl |= E1000_CTRL_VME;
3565	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3566}
3567
3568static void
3569em_disable_vlans(struct adapter *adapter)
3570{
3571	uint32_t ctrl;
3572
3573	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3574	ctrl &= ~E1000_CTRL_VME;
3575	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3576}
3577
3578static void
3579em_enable_intr(struct adapter *adapter)
3580{
3581	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3582}
3583
3584static void
3585em_disable_intr(struct adapter *adapter)
3586{
3587	/*
3588	 * The first version of 82542 had an errata where when link was forced
3589	 * it would stay up even up even if the cable was disconnected.
3590	 * Sequence errors were used to detect the disconnect and then the
3591	 * driver would unforce the link. This code in the in the ISR. For this
3592	 * to work correctly the Sequence error interrupt had to be enabled
3593	 * all the time.
3594	 */
3595
3596	if (adapter->hw.mac_type == em_82542_rev2_0)
3597	    E1000_WRITE_REG(&adapter->hw, IMC,
3598		(0xffffffff & ~E1000_IMC_RXSEQ));
3599	else
3600	    E1000_WRITE_REG(&adapter->hw, IMC,
3601		0xffffffff);
3602}
3603
3604static int
3605em_is_valid_ether_addr(uint8_t *addr)
3606{
3607	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3608
3609	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3610		return (FALSE);
3611	}
3612
3613	return (TRUE);
3614}
3615
3616void
3617em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3618{
3619	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3620}
3621
3622void
3623em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3624{
3625	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3626}
3627
3628void
3629em_pci_set_mwi(struct em_hw *hw)
3630{
3631	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3632	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3633}
3634
3635void
3636em_pci_clear_mwi(struct em_hw *hw)
3637{
3638	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3639	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3640}
3641
3642/*********************************************************************
3643* 82544 Coexistence issue workaround.
3644*    There are 2 issues.
3645*       1. Transmit Hang issue.
3646*    To detect this issue, following equation can be used...
3647*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3648*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3649*
3650*       2. DAC issue.
3651*    To detect this issue, following equation can be used...
3652*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3653*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3654*
3655*
3656*    WORKAROUND:
3657*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3658*
3659*** *********************************************************************/
3660static uint32_t
3661em_fill_descriptors (bus_addr_t address, uint32_t length,
3662		PDESC_ARRAY desc_array)
3663{
3664	/* Since issue is sensitive to length and address.*/
3665	/* Let us first check the address...*/
3666	uint32_t safe_terminator;
3667	if (length <= 4) {
3668		desc_array->descriptor[0].address = address;
3669		desc_array->descriptor[0].length = length;
3670		desc_array->elements = 1;
3671		return (desc_array->elements);
3672	}
3673	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3674	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3675	if (safe_terminator == 0   ||
3676	(safe_terminator > 4   &&
3677	safe_terminator < 9)   ||
3678	(safe_terminator > 0xC &&
3679	safe_terminator <= 0xF)) {
3680		desc_array->descriptor[0].address = address;
3681		desc_array->descriptor[0].length = length;
3682		desc_array->elements = 1;
3683		return (desc_array->elements);
3684	}
3685
3686	desc_array->descriptor[0].address = address;
3687	desc_array->descriptor[0].length = length - 4;
3688	desc_array->descriptor[1].address = address + (length - 4);
3689	desc_array->descriptor[1].length = 4;
3690	desc_array->elements = 2;
3691	return (desc_array->elements);
3692}
3693
3694/**********************************************************************
3695 *
3696 *  Update the board statistics counters.
3697 *
3698 **********************************************************************/
3699static void
3700em_update_stats_counters(struct adapter *adapter)
3701{
3702	struct ifnet   *ifp;
3703
3704	if(adapter->hw.media_type == em_media_type_copper ||
3705	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3706		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3707		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3708	}
3709	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3710	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3711	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3712	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3713
3714	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3715	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3716	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3717	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3718	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3719	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3720	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3721	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3722	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3723	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3724	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3725	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3726	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3727	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3728	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3729	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3730	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3731	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3732	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3733	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3734
3735	/* For the 64-bit byte counters the low dword must be read first. */
3736	/* Both registers clear on the read of the high dword */
3737
3738	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3739	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3740	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3741	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3742
3743	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3744	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3745	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3746	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3747	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3748
3749	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3750	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3751	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3752	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3753
3754	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3755	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3756	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3757	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3758	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3759	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3760	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3761	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3762	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3763	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3764
3765	if (adapter->hw.mac_type >= em_82543) {
3766		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3767		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3768		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3769		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3770		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3771		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3772	}
3773	ifp = adapter->ifp;
3774
3775	ifp->if_collisions = adapter->stats.colc;
3776
3777	/* Rx Errors */
3778	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3779	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3780	    adapter->stats.mpc + adapter->stats.cexterr;
3781
3782	/* Tx Errors */
3783	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3784	    adapter->watchdog_events;
3785}
3786
3787
3788/**********************************************************************
3789 *
3790 *  This routine is called only when em_display_debug_stats is enabled.
3791 *  This routine provides a way to take a look at important statistics
3792 *  maintained by the driver and hardware.
3793 *
3794 **********************************************************************/
3795static void
3796em_print_debug_info(struct adapter *adapter)
3797{
3798	device_t dev = adapter->dev;
3799	uint8_t *hw_addr = adapter->hw.hw_addr;
3800
3801	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3802	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3803	    E1000_READ_REG(&adapter->hw, CTRL),
3804	    E1000_READ_REG(&adapter->hw, RCTL));
3805	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3806	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3807	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3808	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3809	    adapter->hw.fc_high_water,
3810	    adapter->hw.fc_low_water);
3811	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3812	    E1000_READ_REG(&adapter->hw, TIDV),
3813	    E1000_READ_REG(&adapter->hw, TADV));
3814	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3815	    E1000_READ_REG(&adapter->hw, RDTR),
3816	    E1000_READ_REG(&adapter->hw, RADV));
3817	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3818	    (long long)adapter->tx_fifo_wrk_cnt,
3819	    (long long)adapter->tx_fifo_reset_cnt);
3820	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3821	    E1000_READ_REG(&adapter->hw, TDH),
3822	    E1000_READ_REG(&adapter->hw, TDT));
3823	device_printf(dev, "Num Tx descriptors avail = %d\n",
3824	    adapter->num_tx_desc_avail);
3825	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3826	    adapter->no_tx_desc_avail1);
3827	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3828	    adapter->no_tx_desc_avail2);
3829	device_printf(dev, "Std mbuf failed = %ld\n",
3830	    adapter->mbuf_alloc_failed);
3831	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3832	    adapter->mbuf_cluster_failed);
3833}
3834
3835static void
3836em_print_hw_stats(struct adapter *adapter)
3837{
3838	device_t dev = adapter->dev;
3839
3840	device_printf(dev, "Excessive collisions = %lld\n",
3841	    (long long)adapter->stats.ecol);
3842	device_printf(dev, "Symbol errors = %lld\n",
3843	    (long long)adapter->stats.symerrs);
3844	device_printf(dev, "Sequence errors = %lld\n",
3845	    (long long)adapter->stats.sec);
3846	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3847
3848	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3849	device_printf(dev, "Receive No Buffers = %lld\n",
3850	    (long long)adapter->stats.rnbc);
3851	/* RLEC is inaccurate on some hardware, calculate our own. */
3852	device_printf(dev, "Receive Length Errors = %lld\n",
3853	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3854	device_printf(dev, "Receive errors = %lld\n",
3855	    (long long)adapter->stats.rxerrc);
3856	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3857	device_printf(dev, "Alignment errors = %lld\n",
3858	    (long long)adapter->stats.algnerrc);
3859	device_printf(dev, "Carrier extension errors = %lld\n",
3860	    (long long)adapter->stats.cexterr);
3861	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3862	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3863
3864	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3865	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3866	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3867	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3868
3869	device_printf(dev, "Good Packets Rcvd = %lld\n",
3870	    (long long)adapter->stats.gprc);
3871	device_printf(dev, "Good Packets Xmtd = %lld\n",
3872	    (long long)adapter->stats.gptc);
3873        device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3874            (long long)adapter->stats.tsctc);
3875        device_printf(dev, "TSO Contexts Failed = %lld\n",
3876            (long long)adapter->stats.tsctfc);
3877}
3878
3879static int
3880em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3881{
3882	struct adapter *adapter;
3883	int error;
3884	int result;
3885
3886	result = -1;
3887	error = sysctl_handle_int(oidp, &result, 0, req);
3888
3889	if (error || !req->newptr)
3890		return (error);
3891
3892	if (result == 1) {
3893		adapter = (struct adapter *)arg1;
3894		em_print_debug_info(adapter);
3895	}
3896
3897	return (error);
3898}
3899
3900
3901static int
3902em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3903{
3904	struct adapter *adapter;
3905	int error;
3906	int result;
3907
3908	result = -1;
3909	error = sysctl_handle_int(oidp, &result, 0, req);
3910
3911	if (error || !req->newptr)
3912		return (error);
3913
3914	if (result == 1) {
3915		adapter = (struct adapter *)arg1;
3916		em_print_hw_stats(adapter);
3917	}
3918
3919	return (error);
3920}
3921
3922static int
3923em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3924{
3925	struct em_int_delay_info *info;
3926	struct adapter *adapter;
3927	uint32_t regval;
3928	int error;
3929	int usecs;
3930	int ticks;
3931
3932	info = (struct em_int_delay_info *)arg1;
3933	usecs = info->value;
3934	error = sysctl_handle_int(oidp, &usecs, 0, req);
3935	if (error != 0 || req->newptr == NULL)
3936		return (error);
3937	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3938		return (EINVAL);
3939	info->value = usecs;
3940	ticks = E1000_USECS_TO_TICKS(usecs);
3941
3942	adapter = info->adapter;
3943
3944	EM_LOCK(adapter);
3945	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3946	regval = (regval & ~0xffff) | (ticks & 0xffff);
3947	/* Handle a few special cases. */
3948	switch (info->offset) {
3949	case E1000_RDTR:
3950	case E1000_82542_RDTR:
3951		regval |= E1000_RDT_FPDB;
3952		break;
3953	case E1000_TIDV:
3954	case E1000_82542_TIDV:
3955		if (ticks == 0) {
3956			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3957			/* Don't write 0 into the TIDV register. */
3958			regval++;
3959		} else
3960			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3961		break;
3962	}
3963	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3964	EM_UNLOCK(adapter);
3965	return (0);
3966}
3967
3968static void
3969em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3970	const char *description, struct em_int_delay_info *info,
3971	int offset, int value)
3972{
3973	info->adapter = adapter;
3974	info->offset = offset;
3975	info->value = value;
3976	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3977	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3978	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3979	    info, 0, em_sysctl_int_delay, "I", description);
3980}
3981
3982#ifndef DEVICE_POLLING
3983static void
3984em_add_int_process_limit(struct adapter *adapter, const char *name,
3985	const char *description, int *limit, int value)
3986{
3987	*limit = value;
3988	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3989	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3990	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3991}
3992#endif
3993