if_em.c revision 162235
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162235 2006-09-11 20:59:01Z pdeuskar $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78#include <dev/em/if_em_hw.h>
79#include <dev/em/if_em.h>
80
81/*********************************************************************
82 *  Set this to one to display debug statistics
83 *********************************************************************/
84int	em_display_debug_stats = 0;
85
86/*********************************************************************
87 *  Driver version
88 *********************************************************************/
89
90char em_driver_version[] = "Version - 6.1.4 - TSO";
91
92
93/*********************************************************************
94 *  PCI Device ID Table
95 *
96 *  Used by probe to select devices to load on
97 *  Last field stores an index into em_strings
98 *  Last entry must be all 0s
99 *
100 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
101 *********************************************************************/
102
103static em_vendor_info_t em_vendor_info_array[] =
104{
105	/* Intel(R) PRO/1000 Network Connection */
106	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
111
112	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119
120	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
129
130	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146
147	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150
151	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
155						PCI_ANY_ID, PCI_ANY_ID, 0},
156
157	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
161
162	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
166						PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
168						PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
170						PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
176
177	/* required last entry */
178	{ 0, 0, 0, 0, 0}
179};
180
181/*********************************************************************
182 *  Table of branding strings for all supported NICs.
183 *********************************************************************/
184
185static char *em_strings[] = {
186	"Intel(R) PRO/1000 Network Connection"
187};
188
189/*********************************************************************
190 *  Function prototypes
191 *********************************************************************/
192static int	em_probe(device_t);
193static int	em_attach(device_t);
194static int	em_detach(device_t);
195static int	em_shutdown(device_t);
196static int	em_suspend(device_t);
197static int	em_resume(device_t);
198static void	em_start(struct ifnet *);
199static void	em_start_locked(struct ifnet *ifp);
200static int	em_ioctl(struct ifnet *, u_long, caddr_t);
201static void	em_watchdog(struct ifnet *);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_intr(struct adapter *);
210static void	em_free_intr(struct adapter *);
211static void	em_free_pci_resources(struct adapter *);
212static void	em_local_timer(void *);
213static int	em_hardware_init(struct adapter *);
214static void	em_setup_interface(device_t, struct adapter *);
215static int	em_setup_transmit_structures(struct adapter *);
216static void	em_initialize_transmit_unit(struct adapter *);
217static int	em_setup_receive_structures(struct adapter *);
218static void	em_initialize_receive_unit(struct adapter *);
219static void	em_enable_intr(struct adapter *);
220static void	em_disable_intr(struct adapter *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_receive_structures(struct adapter *);
223static void	em_update_stats_counters(struct adapter *);
224static void	em_txeof(struct adapter *);
225static int	em_allocate_receive_structures(struct adapter *);
226static int	em_allocate_transmit_structures(struct adapter *);
227static int	em_rxeof(struct adapter *, int);
228#ifndef __NO_STRICT_ALIGNMENT
229static int	em_fixup_rx(struct adapter *);
230#endif
231static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
232		    struct mbuf *);
233static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
234		    uint32_t *, uint32_t *);
235static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
236		    uint32_t *, uint32_t *);
237static void	em_set_promisc(struct adapter *);
238static void	em_disable_promisc(struct adapter *);
239static void	em_set_multi(struct adapter *);
240static void	em_print_hw_stats(struct adapter *);
241static void	em_update_link_status(struct adapter *);
242static int	em_get_buf(struct adapter *, int);
243static void	em_enable_vlans(struct adapter *);
244static void	em_disable_vlans(struct adapter *);
245static int	em_encap(struct adapter *, struct mbuf **);
246static void	em_smartspeed(struct adapter *);
247static int	em_82547_fifo_workaround(struct adapter *, int);
248static void	em_82547_update_fifo_head(struct adapter *, int);
249static int	em_82547_tx_fifo_reset(struct adapter *);
250static void	em_82547_move_tail(void *arg);
251static void	em_82547_move_tail_locked(struct adapter *);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static int 	em_is_valid_ether_addr(uint8_t *);
257static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
258static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
259static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
260		    PDESC_ARRAY desc_array);
261static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
262static void	em_add_int_delay_sysctl(struct adapter *, const char *,
263		const char *, struct em_int_delay_info *, int, int);
264
265/*
266 * Fast interrupt handler and legacy ithread/polling modes are
267 * mutually exclusive.
268 */
269#ifdef DEVICE_POLLING
270static poll_handler_t em_poll;
271static void	em_intr(void *);
272#else
273static void	em_intr_fast(void *);
274static void	em_add_int_process_limit(struct adapter *, const char *,
275		const char *, int *, int);
276static void	em_handle_rxtx(void *context, int pending);
277static void	em_handle_link(void *context, int pending);
278#endif
279
280/*********************************************************************
281 *  FreeBSD Device Interface Entry Points
282 *********************************************************************/
283
284static device_method_t em_methods[] = {
285	/* Device interface */
286	DEVMETHOD(device_probe, em_probe),
287	DEVMETHOD(device_attach, em_attach),
288	DEVMETHOD(device_detach, em_detach),
289	DEVMETHOD(device_shutdown, em_shutdown),
290	DEVMETHOD(device_suspend, em_suspend),
291	DEVMETHOD(device_resume, em_resume),
292	{0, 0}
293};
294
295static driver_t em_driver = {
296	"em", em_methods, sizeof(struct adapter),
297};
298
299static devclass_t em_devclass;
300DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
301MODULE_DEPEND(em, pci, 1, 1, 1);
302MODULE_DEPEND(em, ether, 1, 1, 1);
303
304/*********************************************************************
305 *  Tunable default values.
306 *********************************************************************/
307
308#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
309#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
310#define M_TSO_LEN			66
311
312static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
313static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
314static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
315static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
316static int em_rxd = EM_DEFAULT_RXD;
317static int em_txd = EM_DEFAULT_TXD;
318static int em_smart_pwr_down = FALSE;
319
320TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
322TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rxd", &em_rxd);
325TUNABLE_INT("hw.em.txd", &em_txd);
326TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
327#ifndef DEVICE_POLLING
328static int em_rx_process_limit = 100;
329TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
330#endif
331
332/*********************************************************************
333 *  Device identification routine
334 *
335 *  em_probe determines if the driver should be loaded on
336 *  adapter based on PCI vendor/device id of the adapter.
337 *
338 *  return BUS_PROBE_DEFAULT on success, positive on failure
339 *********************************************************************/
340
341static int
342em_probe(device_t dev)
343{
344	char		adapter_name[60];
345	uint16_t	pci_vendor_id = 0;
346	uint16_t	pci_device_id = 0;
347	uint16_t	pci_subvendor_id = 0;
348	uint16_t	pci_subdevice_id = 0;
349	em_vendor_info_t *ent;
350
351	INIT_DEBUGOUT("em_probe: begin");
352
353	pci_vendor_id = pci_get_vendor(dev);
354	if (pci_vendor_id != EM_VENDOR_ID)
355		return (ENXIO);
356
357	pci_device_id = pci_get_device(dev);
358	pci_subvendor_id = pci_get_subvendor(dev);
359	pci_subdevice_id = pci_get_subdevice(dev);
360
361	ent = em_vendor_info_array;
362	while (ent->vendor_id != 0) {
363		if ((pci_vendor_id == ent->vendor_id) &&
364		    (pci_device_id == ent->device_id) &&
365
366		    ((pci_subvendor_id == ent->subvendor_id) ||
367		    (ent->subvendor_id == PCI_ANY_ID)) &&
368
369		    ((pci_subdevice_id == ent->subdevice_id) ||
370		    (ent->subdevice_id == PCI_ANY_ID))) {
371			sprintf(adapter_name, "%s %s",
372				em_strings[ent->index],
373				em_driver_version);
374			device_set_desc_copy(dev, adapter_name);
375			return (BUS_PROBE_DEFAULT);
376		}
377		ent++;
378	}
379
380	return (ENXIO);
381}
382
383/*********************************************************************
384 *  Device initialization routine
385 *
386 *  The attach entry point is called when the driver is being loaded.
387 *  This routine identifies the type of hardware, allocates all resources
388 *  and initializes the hardware.
389 *
390 *  return 0 on success, positive on failure
391 *********************************************************************/
392
393static int
394em_attach(device_t dev)
395{
396	struct adapter	*adapter;
397	int		tsize, rsize;
398	int		error = 0;
399
400	INIT_DEBUGOUT("em_attach: begin");
401
402	adapter = device_get_softc(dev);
403	adapter->dev = adapter->osdep.dev = dev;
404	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
405
406	/* SYSCTL stuff */
407	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
409	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
410	    em_sysctl_debug_info, "I", "Debug Information");
411
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_stats, "I", "Statistics");
416
417	callout_init(&adapter->timer, CALLOUT_MPSAFE);
418	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
419
420	/* Determine hardware revision */
421	em_identify_hardware(adapter);
422
423	/* Set up some sysctls for the tunable interrupt delays */
424	em_add_int_delay_sysctl(adapter, "rx_int_delay",
425	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
426	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
427	em_add_int_delay_sysctl(adapter, "tx_int_delay",
428	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
429	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
430	if (adapter->hw.mac_type >= em_82540) {
431		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
432		    "receive interrupt delay limit in usecs",
433		    &adapter->rx_abs_int_delay,
434		    E1000_REG_OFFSET(&adapter->hw, RADV),
435		    em_rx_abs_int_delay_dflt);
436		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
437		    "transmit interrupt delay limit in usecs",
438		    &adapter->tx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, TADV),
440		    em_tx_abs_int_delay_dflt);
441	}
442
443#ifndef DEVICE_POLLING
444	/* Sysctls for limiting the amount of work done in the taskqueue */
445	em_add_int_process_limit(adapter, "rx_processing_limit",
446	    "max number of rx packets to process", &adapter->rx_process_limit,
447	    em_rx_process_limit);
448#endif
449
450	/*
451	 * Validate number of transmit and receive descriptors. It
452	 * must not exceed hardware maximum, and must be multiple
453	 * of EM_DBA_ALIGN.
454	 */
455	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
456	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
457	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
458	    (em_txd < EM_MIN_TXD)) {
459		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
460		    EM_DEFAULT_TXD, em_txd);
461		adapter->num_tx_desc = EM_DEFAULT_TXD;
462	} else
463		adapter->num_tx_desc = em_txd;
464	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
465	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
466	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
467	    (em_rxd < EM_MIN_RXD)) {
468		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
469		    EM_DEFAULT_RXD, em_rxd);
470		adapter->num_rx_desc = EM_DEFAULT_RXD;
471	} else
472		adapter->num_rx_desc = em_rxd;
473
474	adapter->hw.autoneg = DO_AUTO_NEG;
475	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
476	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
477	adapter->hw.tbi_compatibility_en = TRUE;
478	adapter->rx_buffer_len = EM_RXBUFFER_2048;
479
480	adapter->hw.phy_init_script = 1;
481	adapter->hw.phy_reset_disable = FALSE;
482
483#ifndef EM_MASTER_SLAVE
484	adapter->hw.master_slave = em_ms_hw_default;
485#else
486	adapter->hw.master_slave = EM_MASTER_SLAVE;
487#endif
488	/*
489	 * Set the max frame size assuming standard ethernet
490	 * sized frames.
491	 */
492	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
493
494	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
495
496	/*
497	 * This controls when hardware reports transmit completion
498	 * status.
499	 */
500	adapter->hw.report_tx_early = 1;
501	if (em_allocate_pci_resources(adapter)) {
502		device_printf(dev, "Allocation of PCI resources failed\n");
503		error = ENXIO;
504		goto err_pci;
505	}
506
507	/* Initialize eeprom parameters */
508	em_init_eeprom_params(&adapter->hw);
509
510	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
511	    EM_DBA_ALIGN);
512
513	/* Allocate Transmit Descriptor ring */
514	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
515		device_printf(dev, "Unable to allocate tx_desc memory\n");
516		error = ENOMEM;
517		goto err_tx_desc;
518	}
519	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
520
521	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
522	    EM_DBA_ALIGN);
523
524	/* Allocate Receive Descriptor ring */
525	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
526		device_printf(dev, "Unable to allocate rx_desc memory\n");
527		error = ENOMEM;
528		goto err_rx_desc;
529	}
530	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
531
532	/* Initialize the hardware */
533	if (em_hardware_init(adapter)) {
534		device_printf(dev, "Unable to initialize the hardware\n");
535		error = EIO;
536		goto err_hw_init;
537	}
538
539	/* Copy the permanent MAC address out of the EEPROM */
540	if (em_read_mac_addr(&adapter->hw) < 0) {
541		device_printf(dev, "EEPROM read error while reading MAC"
542		    " address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
548		device_printf(dev, "Invalid MAC address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	/* Setup OS specific network interface */
554	em_setup_interface(dev, adapter);
555
556	em_allocate_intr(adapter);
557
558	/* Initialize statistics */
559	em_clear_hw_cntrs(&adapter->hw);
560	em_update_stats_counters(adapter);
561	adapter->hw.get_link_status = 1;
562	em_update_link_status(adapter);
563
564	/* Indicate SOL/IDER usage */
565	if (em_check_phy_reset_block(&adapter->hw))
566		device_printf(dev,
567		    "PHY reset is blocked due to SOL/IDER session.\n");
568
569	/* Identify 82544 on PCIX */
570	em_get_bus_info(&adapter->hw);
571	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
572		adapter->pcix_82544 = TRUE;
573	else
574		adapter->pcix_82544 = FALSE;
575
576	INIT_DEBUGOUT("em_attach: end");
577
578	return (0);
579
580err_hw_init:
581	em_dma_free(adapter, &adapter->rxdma);
582err_rx_desc:
583	em_dma_free(adapter, &adapter->txdma);
584err_tx_desc:
585err_pci:
586	em_free_intr(adapter);
587	em_free_pci_resources(adapter);
588	EM_LOCK_DESTROY(adapter);
589
590	return (error);
591}
592
593/*********************************************************************
594 *  Device removal routine
595 *
596 *  The detach entry point is called when the driver is being removed.
597 *  This routine stops the adapter and deallocates all the resources
598 *  that were allocated for driver operation.
599 *
600 *  return 0 on success, positive on failure
601 *********************************************************************/
602
603static int
604em_detach(device_t dev)
605{
606	struct adapter	*adapter = device_get_softc(dev);
607	struct ifnet	*ifp = adapter->ifp;
608
609	INIT_DEBUGOUT("em_detach: begin");
610
611#ifdef DEVICE_POLLING
612	if (ifp->if_capenable & IFCAP_POLLING)
613		ether_poll_deregister(ifp);
614#endif
615
616	em_free_intr(adapter);
617	EM_LOCK(adapter);
618	adapter->in_detach = 1;
619	em_stop(adapter);
620	em_phy_hw_reset(&adapter->hw);
621	EM_UNLOCK(adapter);
622	ether_ifdetach(adapter->ifp);
623
624	em_free_pci_resources(adapter);
625	bus_generic_detach(dev);
626	if_free(ifp);
627
628	/* Free Transmit Descriptor ring */
629	if (adapter->tx_desc_base) {
630		em_dma_free(adapter, &adapter->txdma);
631		adapter->tx_desc_base = NULL;
632	}
633
634	/* Free Receive Descriptor ring */
635	if (adapter->rx_desc_base) {
636		em_dma_free(adapter, &adapter->rxdma);
637		adapter->rx_desc_base = NULL;
638	}
639
640	EM_LOCK_DESTROY(adapter);
641
642	return (0);
643}
644
645/*********************************************************************
646 *
647 *  Shutdown entry point
648 *
649 **********************************************************************/
650
651static int
652em_shutdown(device_t dev)
653{
654	struct adapter *adapter = device_get_softc(dev);
655	EM_LOCK(adapter);
656	em_stop(adapter);
657	EM_UNLOCK(adapter);
658	return (0);
659}
660
661/*
662 * Suspend/resume device methods.
663 */
664static int
665em_suspend(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668
669	EM_LOCK(adapter);
670	em_stop(adapter);
671	EM_UNLOCK(adapter);
672
673	return bus_generic_suspend(dev);
674}
675
676static int
677em_resume(device_t dev)
678{
679	struct adapter *adapter = device_get_softc(dev);
680	struct ifnet *ifp = adapter->ifp;
681
682	EM_LOCK(adapter);
683	em_init_locked(adapter);
684	if ((ifp->if_flags & IFF_UP) &&
685	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
686		em_start_locked(ifp);
687	EM_UNLOCK(adapter);
688
689	return bus_generic_resume(dev);
690}
691
692
693/*********************************************************************
694 *  Transmit entry point
695 *
696 *  em_start is called by the stack to initiate a transmit.
697 *  The driver will remain in this routine as long as there are
698 *  packets to transmit and transmit resources are available.
699 *  In case resources are not available stack is notified and
700 *  the packet is requeued.
701 **********************************************************************/
702
703static void
704em_start_locked(struct ifnet *ifp)
705{
706	struct adapter	*adapter = ifp->if_softc;
707	struct mbuf	*m_head;
708
709	EM_LOCK_ASSERT(adapter);
710
711	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
712	    IFF_DRV_RUNNING)
713		return;
714	if (!adapter->link_active)
715		return;
716
717	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
718
719		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
720		if (m_head == NULL)
721			break;
722		/*
723		 * em_encap() can modify our pointer, and or make it NULL on
724		 * failure.  In that event, we can't requeue.
725		 */
726		if (em_encap(adapter, &m_head)) {
727			if (m_head == NULL)
728				break;
729			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
730			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
731			break;
732		}
733
734		/* Send a copy of the frame to the BPF listener */
735		BPF_MTAP(ifp, m_head);
736
737		/* Set timeout in case hardware has problems transmitting. */
738		ifp->if_timer = EM_TX_TIMEOUT;
739	}
740}
741
742static void
743em_start(struct ifnet *ifp)
744{
745	struct adapter *adapter = ifp->if_softc;
746
747	EM_LOCK(adapter);
748	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749		em_start_locked(ifp);
750	EM_UNLOCK(adapter);
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  em_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775	case SIOCGIFADDR:
776		if (ifa->ifa_addr->sa_family == AF_INET) {
777			/*
778			 * XXX
779			 * Since resetting hardware takes a very long time
780			 * and results in link renegotiation we only
781			 * initialize the hardware only when it is absolutely
782			 * required.
783			 */
784			ifp->if_flags |= IFF_UP;
785			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
786				EM_LOCK(adapter);
787				em_init_locked(adapter);
788				EM_UNLOCK(adapter);
789			}
790			arp_ifinit(ifp, ifa);
791		} else
792			error = ether_ioctl(ifp, command, data);
793		break;
794	case SIOCSIFMTU:
795	    {
796		int max_frame_size;
797		uint16_t eeprom_data = 0;
798
799		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
800
801		EM_LOCK(adapter);
802		switch (adapter->hw.mac_type) {
803		case em_82573:
804			/*
805			 * 82573 only supports jumbo frames
806			 * if ASPM is disabled.
807			 */
808			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
809			    &eeprom_data);
810			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
811				max_frame_size = ETHER_MAX_LEN;
812				break;
813			}
814			/* Allow Jumbo frames - fall thru */
815		case em_82571:
816		case em_82572:
817		case em_80003es2lan:	/* Limit Jumbo Frame size */
818			max_frame_size = 9234;
819			break;
820		case em_ich8lan:
821			/* ICH8 does not support jumbo frames */
822			max_frame_size = ETHER_MAX_LEN;
823			break;
824		default:
825			max_frame_size = MAX_JUMBO_FRAME_SIZE;
826		}
827		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
828		    ETHER_CRC_LEN) {
829			EM_UNLOCK(adapter);
830			error = EINVAL;
831			break;
832		}
833
834		ifp->if_mtu = ifr->ifr_mtu;
835		adapter->hw.max_frame_size =
836		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
837		em_init_locked(adapter);
838		EM_UNLOCK(adapter);
839		break;
840	    }
841	case SIOCSIFFLAGS:
842		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
843		EM_LOCK(adapter);
844		if (ifp->if_flags & IFF_UP) {
845			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
846				if ((ifp->if_flags ^ adapter->if_flags) &
847				    IFF_PROMISC) {
848					em_disable_promisc(adapter);
849					em_set_promisc(adapter);
850				}
851			} else
852				em_init_locked(adapter);
853		} else {
854			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
855				em_stop(adapter);
856			}
857		}
858		adapter->if_flags = ifp->if_flags;
859		EM_UNLOCK(adapter);
860		break;
861	case SIOCADDMULTI:
862	case SIOCDELMULTI:
863		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
864		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
865			EM_LOCK(adapter);
866			em_disable_intr(adapter);
867			em_set_multi(adapter);
868			if (adapter->hw.mac_type == em_82542_rev2_0) {
869				em_initialize_receive_unit(adapter);
870			}
871#ifdef DEVICE_POLLING
872			if (!(ifp->if_capenable & IFCAP_POLLING))
873#endif
874				em_enable_intr(adapter);
875			EM_UNLOCK(adapter);
876		}
877		break;
878	case SIOCSIFMEDIA:
879	case SIOCGIFMEDIA:
880		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
881		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
882		break;
883	case SIOCSIFCAP:
884	    {
885		int mask, reinit;
886
887		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
888		reinit = 0;
889		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
890#ifdef DEVICE_POLLING
891		if (mask & IFCAP_POLLING) {
892			if (ifr->ifr_reqcap & IFCAP_POLLING) {
893				error = ether_poll_register(em_poll, ifp);
894				if (error)
895					return (error);
896				EM_LOCK(adapter);
897				em_disable_intr(adapter);
898				ifp->if_capenable |= IFCAP_POLLING;
899				EM_UNLOCK(adapter);
900			} else {
901				error = ether_poll_deregister(ifp);
902				/* Enable interrupt even in error case */
903				EM_LOCK(adapter);
904				em_enable_intr(adapter);
905				ifp->if_capenable &= ~IFCAP_POLLING;
906				EM_UNLOCK(adapter);
907			}
908		}
909#endif
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO) {
915			ifp->if_capenable ^= IFCAP_TSO;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
923			em_init(adapter);
924		VLAN_CAPABILITIES(ifp);
925		break;
926	    }
927	default:
928		error = ether_ioctl(ifp, command, data);
929		break;
930	}
931
932	return (error);
933}
934
935/*********************************************************************
936 *  Watchdog entry point
937 *
938 *  This routine is called whenever hardware quits transmitting.
939 *
940 **********************************************************************/
941
942static void
943em_watchdog(struct ifnet *ifp)
944{
945	struct adapter *adapter = ifp->if_softc;
946
947	EM_LOCK(adapter);
948	/* If we are in this routine because of pause frames, then
949	 * don't reset the hardware.
950	 */
951	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
952		ifp->if_timer = EM_TX_TIMEOUT;
953		EM_UNLOCK(adapter);
954		return;
955	}
956
957	/*
958	 * Reclaim first as there is a possibility of losing Tx completion
959	 * interrupts. Possible cause of missing Tx completion interrupts
960	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
961	 * or chipset bug.
962	 */
963	em_txeof(adapter);
964	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
965		EM_UNLOCK(adapter);
966		return;
967	}
968
969	if (em_check_for_link(&adapter->hw) == 0)
970		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
971
972	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973	adapter->watchdog_events++;
974
975	em_init_locked(adapter);
976	EM_UNLOCK(adapter);
977}
978
979/*********************************************************************
980 *  Init entry point
981 *
982 *  This routine is used in two ways. It is used by the stack as
983 *  init entry point in network interface structure. It is also used
984 *  by the driver as a hw/sw initialization routine to get to a
985 *  consistent state.
986 *
987 *  return 0 on success, positive on failure
988 **********************************************************************/
989
990static void
991em_init_locked(struct adapter *adapter)
992{
993	struct ifnet	*ifp = adapter->ifp;
994	device_t	dev = adapter->dev;
995	uint32_t	pba;
996
997	INIT_DEBUGOUT("em_init: begin");
998
999	EM_LOCK_ASSERT(adapter);
1000
1001	em_stop(adapter);
1002
1003	/*
1004	 * Packet Buffer Allocation (PBA)
1005	 * Writing PBA sets the receive portion of the buffer
1006	 * the remainder is used for the transmit buffer.
1007	 *
1008	 * Devices before the 82547 had a Packet Buffer of 64K.
1009	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1010	 * After the 82547 the buffer was reduced to 40K.
1011	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1012	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1013	 */
1014	switch (adapter->hw.mac_type) {
1015	case em_82547:
1016	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1017		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1019		else
1020			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1021		adapter->tx_fifo_head = 0;
1022		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1023		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1024		break;
1025	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1026	case em_82571: /* 82571: Total Packet Buffer is 48K */
1027	case em_82572: /* 82572: Total Packet Buffer is 48K */
1028			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		break;
1030	case em_82573: /* 82573: Total Packet Buffer is 32K */
1031		/* Jumbo frames not supported */
1032			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1033		break;
1034	case em_ich8lan:
1035		pba = E1000_PBA_8K;
1036		break;
1037	default:
1038		/* Devices before 82547 had a Packet Buffer of 64K.   */
1039		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1040			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1041		else
1042			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1043	}
1044
1045	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1046	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1047
1048	/* Get the latest mac address, User can use a LAA */
1049	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1050
1051	/* Initialize the hardware */
1052	if (em_hardware_init(adapter)) {
1053		device_printf(dev, "Unable to initialize the hardware\n");
1054		return;
1055	}
1056	em_update_link_status(adapter);
1057
1058	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1059		em_enable_vlans(adapter);
1060
1061	/* Prepare transmit descriptors and buffers */
1062	if (em_setup_transmit_structures(adapter)) {
1063		device_printf(dev, "Could not setup transmit structures\n");
1064		em_stop(adapter);
1065		return;
1066	}
1067	em_initialize_transmit_unit(adapter);
1068
1069	/* Setup Multicast table */
1070	em_set_multi(adapter);
1071
1072	/* Prepare receive descriptors and buffers */
1073	if (em_setup_receive_structures(adapter)) {
1074		device_printf(dev, "Could not setup receive structures\n");
1075		em_stop(adapter);
1076		return;
1077	}
1078	em_initialize_receive_unit(adapter);
1079
1080	/* Don't lose promiscuous settings */
1081	em_set_promisc(adapter);
1082
1083	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1084	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1085
1086	ifp->if_hwassist = 0;
1087	if (adapter->hw.mac_type >= em_82543) {
1088		if (ifp->if_capenable & IFCAP_TXCSUM)
1089			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1090		if (ifp->if_capenable & IFCAP_TSO)
1091			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1092	}
1093
1094	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1095	em_clear_hw_cntrs(&adapter->hw);
1096#ifdef DEVICE_POLLING
1097	/*
1098	 * Only enable interrupts if we are not polling, make sure
1099	 * they are off otherwise.
1100	 */
1101	if (ifp->if_capenable & IFCAP_POLLING)
1102		em_disable_intr(adapter);
1103	else
1104#endif /* DEVICE_POLLING */
1105		em_enable_intr(adapter);
1106
1107	/* Don't reset the phy next time init gets called */
1108	adapter->hw.phy_reset_disable = TRUE;
1109}
1110
1111static void
1112em_init(void *arg)
1113{
1114	struct adapter *adapter = arg;
1115
1116	EM_LOCK(adapter);
1117	em_init_locked(adapter);
1118	EM_UNLOCK(adapter);
1119}
1120
1121
1122#ifdef DEVICE_POLLING
1123/*********************************************************************
1124 *
1125 *  Legacy polling routine
1126 *
1127 *********************************************************************/
1128static void
1129em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1130{
1131	struct adapter *adapter = ifp->if_softc;
1132	uint32_t reg_icr;
1133
1134	EM_LOCK(adapter);
1135	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1136		EM_UNLOCK(adapter);
1137		return;
1138	}
1139
1140	if (cmd == POLL_AND_CHECK_STATUS) {
1141		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1142		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1143			callout_stop(&adapter->timer);
1144			adapter->hw.get_link_status = 1;
1145			em_check_for_link(&adapter->hw);
1146			em_update_link_status(adapter);
1147			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1148		}
1149	}
1150	em_rxeof(adapter, count);
1151	em_txeof(adapter);
1152
1153	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1154		em_start_locked(ifp);
1155	EM_UNLOCK(adapter);
1156}
1157
1158/*********************************************************************
1159 *
1160 *  Legacy Interrupt Service routine
1161 *
1162 *********************************************************************/
1163static void
1164em_intr(void *arg)
1165{
1166	struct adapter	*adapter = arg;
1167	struct ifnet	*ifp;
1168	uint32_t	reg_icr;
1169
1170	EM_LOCK(adapter);
1171
1172	ifp = adapter->ifp;
1173
1174	if (ifp->if_capenable & IFCAP_POLLING) {
1175		EM_UNLOCK(adapter);
1176		return;
1177	}
1178
1179	for (;;) {
1180		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1181		if (adapter->hw.mac_type >= em_82571 &&
1182		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1183			break;
1184		else if (reg_icr == 0)
1185			break;
1186
1187		/*
1188		 * XXX: some laptops trigger several spurious interrupts
1189		 * on em(4) when in the resume cycle. The ICR register
1190		 * reports all-ones value in this case. Processing such
1191		 * interrupts would lead to a freeze. I don't know why.
1192		 */
1193		if (reg_icr == 0xffffffff)
1194			break;
1195
1196		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1197			em_rxeof(adapter, -1);
1198			em_txeof(adapter);
1199		}
1200
1201		/* Link status change */
1202		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1203			callout_stop(&adapter->timer);
1204			adapter->hw.get_link_status = 1;
1205			em_check_for_link(&adapter->hw);
1206			em_update_link_status(adapter);
1207			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1208		}
1209
1210		if (reg_icr & E1000_ICR_RXO)
1211			adapter->rx_overruns++;
1212	}
1213
1214	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1215	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1216		em_start_locked(ifp);
1217
1218	EM_UNLOCK(adapter);
1219}
1220
1221#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1222
1223static void
1224em_handle_link(void *context, int pending)
1225{
1226	struct adapter	*adapter = context;
1227	struct ifnet *ifp;
1228
1229	ifp = adapter->ifp;
1230
1231	EM_LOCK(adapter);
1232
1233	callout_stop(&adapter->timer);
1234	adapter->hw.get_link_status = 1;
1235	em_check_for_link(&adapter->hw);
1236	em_update_link_status(adapter);
1237	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1238	EM_UNLOCK(adapter);
1239}
1240
1241static void
1242em_handle_rxtx(void *context, int pending)
1243{
1244	struct adapter	*adapter = context;
1245	struct ifnet	*ifp;
1246
1247	NET_LOCK_GIANT();
1248	ifp = adapter->ifp;
1249
1250	/*
1251	 * TODO:
1252	 * It should be possible to run the tx clean loop without the lock.
1253	 */
1254	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1255		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1256			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1257		EM_LOCK(adapter);
1258		em_txeof(adapter);
1259
1260		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1261			em_start_locked(ifp);
1262		EM_UNLOCK(adapter);
1263	}
1264
1265	em_enable_intr(adapter);
1266	NET_UNLOCK_GIANT();
1267}
1268
1269/*********************************************************************
1270 *
1271 *  Fast Interrupt Service routine
1272 *
1273 *********************************************************************/
1274static void
1275em_intr_fast(void *arg)
1276{
1277	struct adapter	*adapter = arg;
1278	struct ifnet	*ifp;
1279	uint32_t	reg_icr;
1280
1281	ifp = adapter->ifp;
1282
1283	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1284
1285	/* Hot eject?  */
1286	if (reg_icr == 0xffffffff)
1287		return;
1288
1289	/* Definitely not our interrupt.  */
1290	if (reg_icr == 0x0)
1291		return;
1292
1293	/*
1294	 * Starting with the 82571 chip, bit 31 should be used to
1295	 * determine whether the interrupt belongs to us.
1296	 */
1297	if (adapter->hw.mac_type >= em_82571 &&
1298	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1299		return;
1300
1301	/*
1302	 * Mask interrupts until the taskqueue is finished running.  This is
1303	 * cheap, just assume that it is needed.  This also works around the
1304	 * MSI message reordering errata on certain systems.
1305	 */
1306	em_disable_intr(adapter);
1307	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1308
1309	/* Link status change */
1310	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1311		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1312
1313	if (reg_icr & E1000_ICR_RXO)
1314		adapter->rx_overruns++;
1315}
1316#endif /* ! DEVICE_POLLING */
1317
1318/*********************************************************************
1319 *
1320 *  Media Ioctl callback
1321 *
1322 *  This routine is called whenever the user queries the status of
1323 *  the interface using ifconfig.
1324 *
1325 **********************************************************************/
1326static void
1327em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1328{
1329	struct adapter *adapter = ifp->if_softc;
1330
1331	INIT_DEBUGOUT("em_media_status: begin");
1332
1333	EM_LOCK(adapter);
1334	em_check_for_link(&adapter->hw);
1335	em_update_link_status(adapter);
1336
1337	ifmr->ifm_status = IFM_AVALID;
1338	ifmr->ifm_active = IFM_ETHER;
1339
1340	if (!adapter->link_active) {
1341		EM_UNLOCK(adapter);
1342		return;
1343	}
1344
1345	ifmr->ifm_status |= IFM_ACTIVE;
1346
1347	if ((adapter->hw.media_type == em_media_type_fiber) ||
1348	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1349		if (adapter->hw.mac_type == em_82545)
1350			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1351		else
1352			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1353	} else {
1354		switch (adapter->link_speed) {
1355		case 10:
1356			ifmr->ifm_active |= IFM_10_T;
1357			break;
1358		case 100:
1359			ifmr->ifm_active |= IFM_100_TX;
1360			break;
1361		case 1000:
1362			ifmr->ifm_active |= IFM_1000_T;
1363			break;
1364		}
1365		if (adapter->link_duplex == FULL_DUPLEX)
1366			ifmr->ifm_active |= IFM_FDX;
1367		else
1368			ifmr->ifm_active |= IFM_HDX;
1369	}
1370	EM_UNLOCK(adapter);
1371}
1372
1373/*********************************************************************
1374 *
1375 *  Media Ioctl callback
1376 *
1377 *  This routine is called when the user changes speed/duplex using
1378 *  media/mediopt option with ifconfig.
1379 *
1380 **********************************************************************/
1381static int
1382em_media_change(struct ifnet *ifp)
1383{
1384	struct adapter *adapter = ifp->if_softc;
1385	struct ifmedia  *ifm = &adapter->media;
1386
1387	INIT_DEBUGOUT("em_media_change: begin");
1388
1389	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1390		return (EINVAL);
1391
1392	EM_LOCK(adapter);
1393	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1394	case IFM_AUTO:
1395		adapter->hw.autoneg = DO_AUTO_NEG;
1396		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1397		break;
1398	case IFM_1000_LX:
1399	case IFM_1000_SX:
1400	case IFM_1000_T:
1401		adapter->hw.autoneg = DO_AUTO_NEG;
1402		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1403		break;
1404	case IFM_100_TX:
1405		adapter->hw.autoneg = FALSE;
1406		adapter->hw.autoneg_advertised = 0;
1407		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1408			adapter->hw.forced_speed_duplex = em_100_full;
1409		else
1410			adapter->hw.forced_speed_duplex = em_100_half;
1411		break;
1412	case IFM_10_T:
1413		adapter->hw.autoneg = FALSE;
1414		adapter->hw.autoneg_advertised = 0;
1415		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1416			adapter->hw.forced_speed_duplex = em_10_full;
1417		else
1418			adapter->hw.forced_speed_duplex = em_10_half;
1419		break;
1420	default:
1421		device_printf(adapter->dev, "Unsupported media type\n");
1422	}
1423
1424	/* As the speed/duplex settings my have changed we need to
1425	 * reset the PHY.
1426	 */
1427	adapter->hw.phy_reset_disable = FALSE;
1428
1429	em_init_locked(adapter);
1430	EM_UNLOCK(adapter);
1431
1432	return (0);
1433}
1434
1435/*********************************************************************
1436 *
1437 *  This routine maps the mbufs to tx descriptors.
1438 *
1439 *  return 0 on success, positive on failure
1440 **********************************************************************/
1441static int
1442em_encap(struct adapter *adapter, struct mbuf **m_headp)
1443{
1444	struct ifnet		*ifp = adapter->ifp;
1445	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1446	bus_dmamap_t		map;
1447	struct em_buffer	*tx_buffer, *tx_buffer_last;
1448	struct em_tx_desc	*current_tx_desc;
1449	struct mbuf		*m_head;
1450	struct m_tag		*mtag;
1451	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1452	int			nsegs, i, j;
1453	int			error, do_tso, tso_desc = 0;
1454
1455	m_head = *m_headp;
1456	current_tx_desc = NULL;
1457	txd_upper = txd_lower = txd_used = txd_saved = 0;
1458
1459	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1460
1461	/*
1462	 * Force a cleanup if number of TX descriptors
1463	 * available hits the threshold.
1464	 */
1465	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1466		em_txeof(adapter);
1467		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1468			adapter->no_tx_desc_avail1++;
1469			return (ENOBUFS);
1470		}
1471	}
1472
1473	/* Find out if we are in vlan mode. */
1474	mtag = VLAN_OUTPUT_TAG(ifp, m_head);
1475
1476	/*
1477	 * When operating in promiscuous mode, hardware encapsulation for
1478	 * packets is disabled.  This means we have to add the vlan
1479	 * encapsulation in the driver, since it will have come down from the
1480	 * VLAN layer with a tag instead of a VLAN header.
1481	 */
1482	if (mtag != NULL && adapter->em_insert_vlan_header) {
1483		struct ether_vlan_header *evl;
1484		struct ether_header eh;
1485
1486		m_head = m_pullup(m_head, sizeof(eh));
1487		if (m_head == NULL) {
1488			*m_headp = NULL;
1489			return (ENOBUFS);
1490		}
1491		eh = *mtod(m_head, struct ether_header *);
1492		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1493		if (m_head == NULL) {
1494			*m_headp = NULL;
1495			return (ENOBUFS);
1496		}
1497		m_head = m_pullup(m_head, sizeof(*evl));
1498		if (m_head == NULL) {
1499			*m_headp = NULL;
1500			return (ENOBUFS);
1501		}
1502		evl = mtod(m_head, struct ether_vlan_header *);
1503		bcopy(&eh, evl, sizeof(*evl));
1504		evl->evl_proto = evl->evl_encap_proto;
1505		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1506		evl->evl_tag = htons(VLAN_TAG_VALUE(mtag));
1507		m_tag_delete(m_head, mtag);
1508		mtag = NULL;
1509		*m_headp = m_head;
1510	}
1511
1512	/*
1513	 * TSO workaround:
1514	 *  If an mbuf is only header we need
1515	 *     to pull 4 bytes of data into it.
1516	 */
1517	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1518		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1519		*m_headp = m_head;
1520		if (m_head == NULL) {
1521			return (ENOBUFS);
1522		}
1523	}
1524
1525	/*
1526	 * Map the packet for DMA.
1527	 */
1528	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1529	tx_buffer_last = tx_buffer;
1530	map = tx_buffer->map;
1531	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1532	    &nsegs, BUS_DMA_NOWAIT);
1533	if (error == EFBIG) {
1534		struct mbuf *m;
1535
1536		m = m_defrag(*m_headp, M_DONTWAIT);
1537		if (m == NULL) {
1538			/* Assume m_defrag(9) used only m_get(9). */
1539			adapter->mbuf_alloc_failed++;
1540			m_freem(*m_headp);
1541			*m_headp = NULL;
1542			return (ENOBUFS);
1543		}
1544		*m_headp = m;
1545		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1546		    segs, &nsegs, BUS_DMA_NOWAIT);
1547		if (error != 0) {
1548			adapter->no_tx_dma_setup++;
1549			m_freem(*m_headp);
1550			*m_headp = NULL;
1551			return (error);
1552		}
1553	} else if (error != 0) {
1554		adapter->no_tx_dma_setup++;
1555		return (error);
1556	}
1557	if (nsegs == 0) {
1558		m_freem(*m_headp);
1559		*m_headp = NULL;
1560		return (EIO);
1561	}
1562
1563	/*
1564	 * TSO Hardware workaround, if this packet is not
1565	 * TSO, and is only a single descriptor long, and
1566	 * it follows a TSO burst, then we need to add a
1567	 * sentinel descriptor to prevent premature writeback.
1568	 */
1569	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1570		if (nsegs == 1)
1571			tso_desc = TRUE;
1572		adapter->tx_tso = FALSE;
1573	}
1574
1575	if (nsegs > adapter->num_tx_desc_avail - 2) {
1576		adapter->no_tx_desc_avail2++;
1577		bus_dmamap_unload(adapter->txtag, map);
1578		return (ENOBUFS);
1579	}
1580
1581	/* Do hardware assists */
1582	m_head = *m_headp;
1583	if (ifp->if_hwassist > 0) {
1584		if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1585			/* we need to make a final sentinel transmit desc */
1586			tso_desc = TRUE;
1587		} else
1588			em_transmit_checksum_setup(adapter,  m_head,
1589			    &txd_upper, &txd_lower);
1590	}
1591
1592	i = adapter->next_avail_tx_desc;
1593	if (adapter->pcix_82544)
1594		txd_saved = i;
1595
1596	for (j = 0; j < nsegs; j++) {
1597		bus_size_t seg_len;
1598		bus_addr_t seg_addr;
1599		/* If adapter is 82544 and on PCIX bus. */
1600		if(adapter->pcix_82544) {
1601			DESC_ARRAY	desc_array;
1602			uint32_t	array_elements, counter;
1603
1604			/*
1605			 * Check the Address and Length combination and
1606			 * split the data accordingly
1607			 */
1608			array_elements = em_fill_descriptors(segs[j].ds_addr,
1609			    segs[j].ds_len, &desc_array);
1610			for (counter = 0; counter < array_elements; counter++) {
1611				if (txd_used == adapter->num_tx_desc_avail) {
1612					adapter->next_avail_tx_desc = txd_saved;
1613					adapter->no_tx_desc_avail2++;
1614					bus_dmamap_unload(adapter->txtag, map);
1615					return (ENOBUFS);
1616				}
1617				tx_buffer = &adapter->tx_buffer_area[i];
1618				current_tx_desc = &adapter->tx_desc_base[i];
1619				current_tx_desc->buffer_addr = htole64(
1620					desc_array.descriptor[counter].address);
1621				current_tx_desc->lower.data = htole32(
1622					(adapter->txd_cmd | txd_lower |
1623					(uint16_t)desc_array.descriptor[counter].length));
1624				current_tx_desc->upper.data = htole32((txd_upper));
1625				if (++i == adapter->num_tx_desc)
1626					i = 0;
1627
1628				tx_buffer->m_head = NULL;
1629				txd_used++;
1630			}
1631		} else {
1632			tx_buffer = &adapter->tx_buffer_area[i];
1633			current_tx_desc = &adapter->tx_desc_base[i];
1634			seg_addr = htole64(segs[j].ds_addr);
1635			seg_len  = segs[j].ds_len;
1636			/*
1637			** TSO Workaround:
1638			** If this is the last descriptor, we want to
1639			** split it so we have a small final sentinel
1640			*/
1641			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1642				seg_len -= 4;
1643				current_tx_desc->buffer_addr = seg_addr;
1644				current_tx_desc->lower.data = htole32(
1645				adapter->txd_cmd | txd_lower | seg_len);
1646				current_tx_desc->upper.data =
1647				    htole32(txd_upper);
1648				if (++i == adapter->num_tx_desc)
1649					i = 0;
1650				/* Now make the sentinel */
1651				++txd_used; /* using an extra txd */
1652				current_tx_desc = &adapter->tx_desc_base[i];
1653				tx_buffer = &adapter->tx_buffer_area[i];
1654				current_tx_desc->buffer_addr =
1655				    seg_addr + seg_len;
1656				current_tx_desc->lower.data = htole32(
1657				adapter->txd_cmd | txd_lower | 4);
1658				current_tx_desc->upper.data =
1659				    htole32(txd_upper);
1660				if (++i == adapter->num_tx_desc)
1661					i = 0;
1662			} else {
1663				current_tx_desc->buffer_addr = seg_addr;
1664				current_tx_desc->lower.data = htole32(
1665				adapter->txd_cmd | txd_lower | seg_len);
1666				current_tx_desc->upper.data =
1667				    htole32(txd_upper);
1668				if (++i == adapter->num_tx_desc)
1669					i = 0;
1670			}
1671			tx_buffer->m_head = NULL;
1672		}
1673	}
1674
1675	adapter->next_avail_tx_desc = i;
1676	if (adapter->pcix_82544)
1677		adapter->num_tx_desc_avail -= txd_used;
1678	else {
1679		adapter->num_tx_desc_avail -= nsegs;
1680		if (tso_desc) /* TSO used an extra for sentinel */
1681			adapter->num_tx_desc_avail -= txd_used;
1682	}
1683
1684	if (mtag != NULL) {
1685		/* Set the vlan id. */
1686		current_tx_desc->upper.fields.special =
1687		    htole16(VLAN_TAG_VALUE(mtag));
1688
1689		/* Tell hardware to add tag. */
1690		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1691	}
1692
1693	tx_buffer->m_head = m_head;
1694	tx_buffer_last->map = tx_buffer->map;
1695	tx_buffer->map = map;
1696	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1697
1698	/*
1699	 * Last Descriptor of Packet needs End Of Packet (EOP).
1700	 */
1701	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1702
1703	/*
1704	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1705	 * that this frame is available to transmit.
1706	 */
1707	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1708	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1709	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1710		em_82547_move_tail_locked(adapter);
1711	else {
1712		E1000_WRITE_REG(&adapter->hw, TDT, i);
1713		if (adapter->hw.mac_type == em_82547)
1714			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1715	}
1716
1717	return (0);
1718}
1719
1720/*********************************************************************
1721 *
1722 * 82547 workaround to avoid controller hang in half-duplex environment.
1723 * The workaround is to avoid queuing a large packet that would span
1724 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1725 * in this case. We do that only when FIFO is quiescent.
1726 *
1727 **********************************************************************/
1728static void
1729em_82547_move_tail_locked(struct adapter *adapter)
1730{
1731	uint16_t hw_tdt;
1732	uint16_t sw_tdt;
1733	struct em_tx_desc *tx_desc;
1734	uint16_t length = 0;
1735	boolean_t eop = 0;
1736
1737	EM_LOCK_ASSERT(adapter);
1738
1739	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1740	sw_tdt = adapter->next_avail_tx_desc;
1741
1742	while (hw_tdt != sw_tdt) {
1743		tx_desc = &adapter->tx_desc_base[hw_tdt];
1744		length += tx_desc->lower.flags.length;
1745		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1746		if(++hw_tdt == adapter->num_tx_desc)
1747			hw_tdt = 0;
1748
1749		if (eop) {
1750			if (em_82547_fifo_workaround(adapter, length)) {
1751				adapter->tx_fifo_wrk_cnt++;
1752				callout_reset(&adapter->tx_fifo_timer, 1,
1753					em_82547_move_tail, adapter);
1754				break;
1755			}
1756			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1757			em_82547_update_fifo_head(adapter, length);
1758			length = 0;
1759		}
1760	}
1761}
1762
1763static void
1764em_82547_move_tail(void *arg)
1765{
1766	struct adapter *adapter = arg;
1767
1768	EM_LOCK(adapter);
1769	em_82547_move_tail_locked(adapter);
1770	EM_UNLOCK(adapter);
1771}
1772
1773static int
1774em_82547_fifo_workaround(struct adapter *adapter, int len)
1775{
1776	int fifo_space, fifo_pkt_len;
1777
1778	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1779
1780	if (adapter->link_duplex == HALF_DUPLEX) {
1781		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1782
1783		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1784			if (em_82547_tx_fifo_reset(adapter))
1785				return (0);
1786			else
1787				return (1);
1788		}
1789	}
1790
1791	return (0);
1792}
1793
1794static void
1795em_82547_update_fifo_head(struct adapter *adapter, int len)
1796{
1797	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1798
1799	/* tx_fifo_head is always 16 byte aligned */
1800	adapter->tx_fifo_head += fifo_pkt_len;
1801	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1802		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1803	}
1804}
1805
1806
1807static int
1808em_82547_tx_fifo_reset(struct adapter *adapter)
1809{
1810	uint32_t tctl;
1811
1812	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1813	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1814	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1815	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1816
1817		/* Disable TX unit */
1818		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1819		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1820
1821		/* Reset FIFO pointers */
1822		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1823		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1824		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1825		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1826
1827		/* Re-enable TX unit */
1828		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1829		E1000_WRITE_FLUSH(&adapter->hw);
1830
1831		adapter->tx_fifo_head = 0;
1832		adapter->tx_fifo_reset_cnt++;
1833
1834		return (TRUE);
1835	}
1836	else {
1837		return (FALSE);
1838	}
1839}
1840
1841static void
1842em_set_promisc(struct adapter *adapter)
1843{
1844	struct ifnet	*ifp = adapter->ifp;
1845	uint32_t	reg_rctl;
1846
1847	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1848
1849	if (ifp->if_flags & IFF_PROMISC) {
1850		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1851		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1852		/* Disable VLAN stripping in promiscous mode
1853		 * This enables bridging of vlan tagged frames to occur
1854		 * and also allows vlan tags to be seen in tcpdump
1855		 */
1856		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1857			em_disable_vlans(adapter);
1858		adapter->em_insert_vlan_header = 1;
1859	} else if (ifp->if_flags & IFF_ALLMULTI) {
1860		reg_rctl |= E1000_RCTL_MPE;
1861		reg_rctl &= ~E1000_RCTL_UPE;
1862		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1863		adapter->em_insert_vlan_header = 0;
1864	} else
1865		adapter->em_insert_vlan_header = 0;
1866}
1867
1868static void
1869em_disable_promisc(struct adapter *adapter)
1870{
1871	struct ifnet	*ifp = adapter->ifp;
1872	uint32_t	reg_rctl;
1873
1874	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1875
1876	reg_rctl &=  (~E1000_RCTL_UPE);
1877	reg_rctl &=  (~E1000_RCTL_MPE);
1878	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1879
1880	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1881		em_enable_vlans(adapter);
1882	adapter->em_insert_vlan_header = 0;
1883}
1884
1885
1886/*********************************************************************
1887 *  Multicast Update
1888 *
1889 *  This routine is called whenever multicast address list is updated.
1890 *
1891 **********************************************************************/
1892
1893static void
1894em_set_multi(struct adapter *adapter)
1895{
1896	struct ifnet	*ifp = adapter->ifp;
1897	struct ifmultiaddr *ifma;
1898	uint32_t reg_rctl = 0;
1899	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1900	int mcnt = 0;
1901
1902	IOCTL_DEBUGOUT("em_set_multi: begin");
1903
1904	if (adapter->hw.mac_type == em_82542_rev2_0) {
1905		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1906		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1907			em_pci_clear_mwi(&adapter->hw);
1908		reg_rctl |= E1000_RCTL_RST;
1909		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1910		msec_delay(5);
1911	}
1912
1913	IF_ADDR_LOCK(ifp);
1914	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1915		if (ifma->ifma_addr->sa_family != AF_LINK)
1916			continue;
1917
1918		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1919			break;
1920
1921		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1922		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1923		mcnt++;
1924	}
1925	IF_ADDR_UNLOCK(ifp);
1926
1927	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1928		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1929		reg_rctl |= E1000_RCTL_MPE;
1930		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1931	} else
1932		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1933
1934	if (adapter->hw.mac_type == em_82542_rev2_0) {
1935		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1936		reg_rctl &= ~E1000_RCTL_RST;
1937		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1938		msec_delay(5);
1939		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1940			em_pci_set_mwi(&adapter->hw);
1941	}
1942}
1943
1944
1945/*********************************************************************
1946 *  Timer routine
1947 *
1948 *  This routine checks for link status and updates statistics.
1949 *
1950 **********************************************************************/
1951
1952static void
1953em_local_timer(void *arg)
1954{
1955	struct adapter	*adapter = arg;
1956	struct ifnet	*ifp = adapter->ifp;
1957
1958	EM_LOCK(adapter);
1959
1960	em_check_for_link(&adapter->hw);
1961	em_update_link_status(adapter);
1962	em_update_stats_counters(adapter);
1963	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1964		em_print_hw_stats(adapter);
1965	em_smartspeed(adapter);
1966
1967	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1968
1969	EM_UNLOCK(adapter);
1970}
1971
1972static void
1973em_update_link_status(struct adapter *adapter)
1974{
1975	struct ifnet *ifp = adapter->ifp;
1976	device_t dev = adapter->dev;
1977
1978	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1979		if (adapter->link_active == 0) {
1980			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1981			    &adapter->link_duplex);
1982			/* Check if we may set SPEED_MODE bit on PCI-E */
1983			if ((adapter->link_speed == SPEED_1000) &&
1984			    ((adapter->hw.mac_type == em_82571) ||
1985			    (adapter->hw.mac_type == em_82572))) {
1986				int tarc0;
1987
1988				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1989				tarc0 |= SPEED_MODE_BIT;
1990				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1991			}
1992			if (bootverbose)
1993				device_printf(dev, "Link is up %d Mbps %s\n",
1994				    adapter->link_speed,
1995				    ((adapter->link_duplex == FULL_DUPLEX) ?
1996				    "Full Duplex" : "Half Duplex"));
1997			adapter->link_active = 1;
1998			adapter->smartspeed = 0;
1999			ifp->if_baudrate = adapter->link_speed * 1000000;
2000			if_link_state_change(ifp, LINK_STATE_UP);
2001		}
2002	} else {
2003		if (adapter->link_active == 1) {
2004			ifp->if_baudrate = adapter->link_speed = 0;
2005			adapter->link_duplex = 0;
2006			if (bootverbose)
2007				device_printf(dev, "Link is Down\n");
2008			adapter->link_active = 0;
2009			if_link_state_change(ifp, LINK_STATE_DOWN);
2010		}
2011	}
2012}
2013
2014/*********************************************************************
2015 *
2016 *  This routine disables all traffic on the adapter by issuing a
2017 *  global reset on the MAC and deallocates TX/RX buffers.
2018 *
2019 **********************************************************************/
2020
2021static void
2022em_stop(void *arg)
2023{
2024	struct adapter	*adapter = arg;
2025	struct ifnet	*ifp = adapter->ifp;
2026
2027	EM_LOCK_ASSERT(adapter);
2028
2029	INIT_DEBUGOUT("em_stop: begin");
2030
2031	em_disable_intr(adapter);
2032	em_reset_hw(&adapter->hw);
2033	callout_stop(&adapter->timer);
2034	callout_stop(&adapter->tx_fifo_timer);
2035	em_free_transmit_structures(adapter);
2036	em_free_receive_structures(adapter);
2037
2038	/* Tell the stack that the interface is no longer active */
2039	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2040}
2041
2042
2043/********************************************************************
2044 *
2045 *  Determine hardware revision.
2046 *
2047 **********************************************************************/
2048static void
2049em_identify_hardware(struct adapter *adapter)
2050{
2051	device_t dev = adapter->dev;
2052
2053	/* Make sure our PCI config space has the necessary stuff set */
2054	pci_enable_busmaster(dev);
2055	pci_enable_io(dev, SYS_RES_MEMORY);
2056	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2057
2058	/* Save off the information about this board */
2059	adapter->hw.vendor_id = pci_get_vendor(dev);
2060	adapter->hw.device_id = pci_get_device(dev);
2061	adapter->hw.revision_id = pci_get_revid(dev);
2062	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2063	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2064
2065	/* Identify the MAC */
2066	if (em_set_mac_type(&adapter->hw))
2067		device_printf(dev, "Unknown MAC Type\n");
2068
2069	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2070	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2071		adapter->hw.phy_init_script = TRUE;
2072}
2073
2074static int
2075em_allocate_pci_resources(struct adapter *adapter)
2076{
2077	device_t	dev = adapter->dev;
2078	int		val, rid;
2079
2080	rid = PCIR_BAR(0);
2081	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2082	    &rid, RF_ACTIVE);
2083	if (adapter->res_memory == NULL) {
2084		device_printf(dev, "Unable to allocate bus resource: memory\n");
2085		return (ENXIO);
2086	}
2087	adapter->osdep.mem_bus_space_tag =
2088	rman_get_bustag(adapter->res_memory);
2089	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2090	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2091
2092	if (adapter->hw.mac_type > em_82543) {
2093		/* Figure our where our IO BAR is ? */
2094		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2095			val = pci_read_config(dev, rid, 4);
2096			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2097				adapter->io_rid = rid;
2098				break;
2099			}
2100			rid += 4;
2101			/* check for 64bit BAR */
2102			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2103				rid += 4;
2104		}
2105		if (rid >= PCIR_CIS) {
2106			device_printf(dev, "Unable to locate IO BAR\n");
2107			return (ENXIO);
2108		}
2109		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2110		    &adapter->io_rid, RF_ACTIVE);
2111		if (adapter->res_ioport == NULL) {
2112			device_printf(dev, "Unable to allocate bus resource: "
2113			    "ioport\n");
2114			return (ENXIO);
2115		}
2116		adapter->hw.io_base = 0;
2117		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2118		adapter->osdep.io_bus_space_handle =
2119		    rman_get_bushandle(adapter->res_ioport);
2120	}
2121
2122	/* For ICH8 we need to find the flash memory. */
2123	if (adapter->hw.mac_type == em_ich8lan) {
2124		rid = EM_FLASH;
2125
2126		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2127		    &rid, RF_ACTIVE);
2128		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2129		adapter->osdep.flash_bus_space_handle =
2130		    rman_get_bushandle(adapter->flash_mem);
2131	}
2132
2133	rid = 0x0;
2134	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2135	    RF_SHAREABLE | RF_ACTIVE);
2136	if (adapter->res_interrupt == NULL) {
2137		device_printf(dev, "Unable to allocate bus resource: "
2138		    "interrupt\n");
2139		return (ENXIO);
2140	}
2141
2142	adapter->hw.back = &adapter->osdep;
2143
2144	return (0);
2145}
2146
2147int
2148em_allocate_intr(struct adapter *adapter)
2149{
2150	device_t dev = adapter->dev;
2151	int error;
2152
2153	/* Manually turn off all interrupts */
2154	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2155
2156#ifdef DEVICE_POLLING
2157	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2158	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2159	    &adapter->int_handler_tag)) != 0) {
2160		device_printf(dev, "Failed to register interrupt handler");
2161		return (error);
2162	}
2163#else
2164	/*
2165	 * Try allocating a fast interrupt and the associated deferred
2166	 * processing contexts.
2167	 */
2168	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2169	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2170	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2171	    taskqueue_thread_enqueue, &adapter->tq);
2172	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2173	    device_get_nameunit(adapter->dev));
2174	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2175	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2176	    &adapter->int_handler_tag)) != 0) {
2177		device_printf(dev, "Failed to register fast interrupt "
2178			    "handler: %d\n", error);
2179		taskqueue_free(adapter->tq);
2180		adapter->tq = NULL;
2181		return (error);
2182	}
2183#endif
2184
2185	em_enable_intr(adapter);
2186	return (0);
2187}
2188
2189static void
2190em_free_intr(struct adapter *adapter)
2191{
2192	device_t dev = adapter->dev;
2193
2194	if (adapter->int_handler_tag != NULL) {
2195		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2196		adapter->int_handler_tag = NULL;
2197	}
2198	if (adapter->tq != NULL) {
2199		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2200		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2201		taskqueue_free(adapter->tq);
2202		adapter->tq = NULL;
2203	}
2204}
2205
2206static void
2207em_free_pci_resources(struct adapter *adapter)
2208{
2209	device_t dev = adapter->dev;
2210
2211	if (adapter->res_interrupt != NULL)
2212		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2213
2214	if (adapter->res_memory != NULL)
2215		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2216		    adapter->res_memory);
2217
2218	if (adapter->flash_mem != NULL)
2219		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2220		    adapter->flash_mem);
2221
2222	if (adapter->res_ioport != NULL)
2223		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2224		    adapter->res_ioport);
2225}
2226
2227/*********************************************************************
2228 *
2229 *  Initialize the hardware to a configuration as specified by the
2230 *  adapter structure. The controller is reset, the EEPROM is
2231 *  verified, the MAC address is set, then the shared initialization
2232 *  routines are called.
2233 *
2234 **********************************************************************/
2235static int
2236em_hardware_init(struct adapter *adapter)
2237{
2238	device_t dev = adapter->dev;
2239	uint16_t rx_buffer_size;
2240
2241	INIT_DEBUGOUT("em_hardware_init: begin");
2242	/* Issue a global reset */
2243	em_reset_hw(&adapter->hw);
2244
2245	/* When hardware is reset, fifo_head is also reset */
2246	adapter->tx_fifo_head = 0;
2247
2248	/* Make sure we have a good EEPROM before we read from it */
2249	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2250		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2251		return (EIO);
2252	}
2253
2254	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2255		device_printf(dev, "EEPROM read error while reading part "
2256		    "number\n");
2257		return (EIO);
2258	}
2259
2260	/* Set up smart power down as default off on newer adapters. */
2261	if (!em_smart_pwr_down &&
2262	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2263		uint16_t phy_tmp = 0;
2264
2265		/* Speed up time to link by disabling smart power down. */
2266		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2267		phy_tmp &= ~IGP02E1000_PM_SPD;
2268		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2269	}
2270
2271	/*
2272	 * These parameters control the automatic generation (Tx) and
2273	 * response (Rx) to Ethernet PAUSE frames.
2274	 * - High water mark should allow for at least two frames to be
2275	 *   received after sending an XOFF.
2276	 * - Low water mark works best when it is very near the high water mark.
2277	 *   This allows the receiver to restart by sending XON when it has
2278	 *   drained a bit. Here we use an arbitary value of 1500 which will
2279	 *   restart after one full frame is pulled from the buffer. There
2280	 *   could be several smaller frames in the buffer and if so they will
2281	 *   not trigger the XON until their total number reduces the buffer
2282	 *   by 1500.
2283	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2284	 */
2285	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2286
2287	adapter->hw.fc_high_water = rx_buffer_size -
2288	    roundup2(adapter->hw.max_frame_size, 1024);
2289	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2290	if (adapter->hw.mac_type == em_80003es2lan)
2291		adapter->hw.fc_pause_time = 0xFFFF;
2292	else
2293		adapter->hw.fc_pause_time = 0x1000;
2294	adapter->hw.fc_send_xon = TRUE;
2295	adapter->hw.fc = em_fc_full;
2296
2297	if (em_init_hw(&adapter->hw) < 0) {
2298		device_printf(dev, "Hardware Initialization Failed");
2299		return (EIO);
2300	}
2301
2302	em_check_for_link(&adapter->hw);
2303
2304	return (0);
2305}
2306
2307/*********************************************************************
2308 *
2309 *  Setup networking device structure and register an interface.
2310 *
2311 **********************************************************************/
2312static void
2313em_setup_interface(device_t dev, struct adapter *adapter)
2314{
2315	struct ifnet   *ifp;
2316	INIT_DEBUGOUT("em_setup_interface: begin");
2317
2318	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2319	if (ifp == NULL)
2320		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2321	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2322	ifp->if_mtu = ETHERMTU;
2323	ifp->if_init =  em_init;
2324	ifp->if_softc = adapter;
2325	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2326	ifp->if_ioctl = em_ioctl;
2327	ifp->if_start = em_start;
2328	ifp->if_watchdog = em_watchdog;
2329	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2330	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2331	IFQ_SET_READY(&ifp->if_snd);
2332
2333	ether_ifattach(ifp, adapter->hw.mac_addr);
2334
2335	ifp->if_capabilities = ifp->if_capenable = 0;
2336
2337	if (adapter->hw.mac_type >= em_82543) {
2338		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2339		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2340	}
2341
2342	/* Enable TSO if available */
2343	if ((adapter->hw.mac_type > em_82544) &&
2344	    (adapter->hw.mac_type != em_82547)) {
2345		ifp->if_capabilities |= IFCAP_TSO;
2346		ifp->if_capenable |= IFCAP_TSO;
2347	}
2348
2349	/*
2350	 * Tell the upper layer(s) we support long frames.
2351	 */
2352	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2353	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2354	ifp->if_capenable |= IFCAP_VLAN_MTU;
2355
2356#ifdef DEVICE_POLLING
2357	ifp->if_capabilities |= IFCAP_POLLING;
2358#endif
2359
2360	/*
2361	 * Specify the media types supported by this adapter and register
2362	 * callbacks to update media and link information
2363	 */
2364	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2365	    em_media_status);
2366	if ((adapter->hw.media_type == em_media_type_fiber) ||
2367	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2368		u_char fiber_type = IFM_1000_SX;	/* default type; */
2369
2370		if (adapter->hw.mac_type == em_82545)
2371			fiber_type = IFM_1000_LX;
2372		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2373		    0, NULL);
2374		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2375	} else {
2376		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2377		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2378			    0, NULL);
2379		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2380			    0, NULL);
2381		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2382			    0, NULL);
2383		if (adapter->hw.phy_type != em_phy_ife) {
2384			ifmedia_add(&adapter->media,
2385				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2386			ifmedia_add(&adapter->media,
2387				IFM_ETHER | IFM_1000_T, 0, NULL);
2388		}
2389	}
2390	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2391	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2392}
2393
2394
2395/*********************************************************************
2396 *
2397 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2398 *
2399 **********************************************************************/
2400static void
2401em_smartspeed(struct adapter *adapter)
2402{
2403	uint16_t phy_tmp;
2404
2405	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2406	    adapter->hw.autoneg == 0 ||
2407	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2408		return;
2409
2410	if (adapter->smartspeed == 0) {
2411		/* If Master/Slave config fault is asserted twice,
2412		 * we assume back-to-back */
2413		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2414		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2415			return;
2416		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2417		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2418			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2419			if(phy_tmp & CR_1000T_MS_ENABLE) {
2420				phy_tmp &= ~CR_1000T_MS_ENABLE;
2421				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2422				    phy_tmp);
2423				adapter->smartspeed++;
2424				if(adapter->hw.autoneg &&
2425				   !em_phy_setup_autoneg(&adapter->hw) &&
2426				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2427				    &phy_tmp)) {
2428					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2429						    MII_CR_RESTART_AUTO_NEG);
2430					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2431					    phy_tmp);
2432				}
2433			}
2434		}
2435		return;
2436	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2437		/* If still no link, perhaps using 2/3 pair cable */
2438		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2439		phy_tmp |= CR_1000T_MS_ENABLE;
2440		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2441		if(adapter->hw.autoneg &&
2442		   !em_phy_setup_autoneg(&adapter->hw) &&
2443		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2444			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2445				    MII_CR_RESTART_AUTO_NEG);
2446			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2447		}
2448	}
2449	/* Restart process after EM_SMARTSPEED_MAX iterations */
2450	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2451		adapter->smartspeed = 0;
2452}
2453
2454
2455/*
2456 * Manage DMA'able memory.
2457 */
2458static void
2459em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2460{
2461	if (error)
2462		return;
2463	*(bus_addr_t *) arg = segs[0].ds_addr;
2464}
2465
2466static int
2467em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2468	int mapflags)
2469{
2470	int error;
2471
2472	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2473				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2474				BUS_SPACE_MAXADDR,	/* lowaddr */
2475				BUS_SPACE_MAXADDR,	/* highaddr */
2476				NULL, NULL,		/* filter, filterarg */
2477				size,			/* maxsize */
2478				1,			/* nsegments */
2479				size,			/* maxsegsize */
2480				0,			/* flags */
2481				NULL,			/* lockfunc */
2482				NULL,			/* lockarg */
2483				&dma->dma_tag);
2484	if (error) {
2485		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2486		    __func__, error);
2487		goto fail_0;
2488	}
2489
2490	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2491	    BUS_DMA_NOWAIT, &dma->dma_map);
2492	if (error) {
2493		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2494		    __func__, (uintmax_t)size, error);
2495		goto fail_2;
2496	}
2497
2498	dma->dma_paddr = 0;
2499	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2500	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2501	if (error || dma->dma_paddr == 0) {
2502		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2503		    __func__, error);
2504		goto fail_3;
2505	}
2506
2507	return (0);
2508
2509fail_3:
2510	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2511fail_2:
2512	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2513	bus_dma_tag_destroy(dma->dma_tag);
2514fail_0:
2515	dma->dma_map = NULL;
2516	dma->dma_tag = NULL;
2517
2518	return (error);
2519}
2520
2521static void
2522em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2523{
2524	if (dma->dma_tag == NULL)
2525		return;
2526	if (dma->dma_map != NULL) {
2527		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2528		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2529		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2530		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2531		dma->dma_map = NULL;
2532	}
2533	bus_dma_tag_destroy(dma->dma_tag);
2534	dma->dma_tag = NULL;
2535}
2536
2537
2538/*********************************************************************
2539 *
2540 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2541 *  the information needed to transmit a packet on the wire.
2542 *
2543 **********************************************************************/
2544static int
2545em_allocate_transmit_structures(struct adapter *adapter)
2546{
2547	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2548	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2549	if (adapter->tx_buffer_area == NULL) {
2550		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2551		return (ENOMEM);
2552	}
2553
2554	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2555
2556	return (0);
2557}
2558
2559/*********************************************************************
2560 *
2561 *  Allocate and initialize transmit structures.
2562 *
2563 **********************************************************************/
2564static int
2565em_setup_transmit_structures(struct adapter *adapter)
2566{
2567	struct ifnet   *ifp = adapter->ifp;
2568	device_t dev = adapter->dev;
2569	struct em_buffer *tx_buffer;
2570	bus_size_t size, segsize;
2571	int error, i;
2572
2573	/*
2574	 * Setup DMA descriptor areas.
2575	 */
2576	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2577
2578	/* Overrides for TSO - want large sizes */
2579	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2580		size = EM_TSO_SIZE;
2581		segsize = PAGE_SIZE;
2582	}
2583
2584	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2585				1, 0,			/* alignment, bounds */
2586				BUS_SPACE_MAXADDR,	/* lowaddr */
2587				BUS_SPACE_MAXADDR,	/* highaddr */
2588				NULL, NULL,		/* filter, filterarg */
2589				size,			/* maxsize */
2590				EM_MAX_SCATTER,		/* nsegments */
2591				segsize,		/* maxsegsize */
2592				0,			/* flags */
2593				NULL,		/* lockfunc */
2594				NULL,		/* lockarg */
2595				&adapter->txtag)) != 0) {
2596		device_printf(dev, "Unable to allocate TX DMA tag\n");
2597		goto fail;
2598	}
2599
2600	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2601		goto fail;
2602
2603	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2604	tx_buffer = adapter->tx_buffer_area;
2605	for (i = 0; i < adapter->num_tx_desc; i++) {
2606		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2607		if (error != 0) {
2608			device_printf(dev, "Unable to create TX DMA map\n");
2609			goto fail;
2610		}
2611		tx_buffer++;
2612	}
2613
2614	adapter->next_avail_tx_desc = 0;
2615	adapter->oldest_used_tx_desc = 0;
2616
2617	/* Set number of descriptors available */
2618	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2619
2620	/* Set checksum context */
2621	adapter->active_checksum_context = OFFLOAD_NONE;
2622	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2623	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2624
2625	return (0);
2626
2627fail:
2628	em_free_transmit_structures(adapter);
2629	return (error);
2630}
2631
2632/*********************************************************************
2633 *
2634 *  Enable transmit unit.
2635 *
2636 **********************************************************************/
2637static void
2638em_initialize_transmit_unit(struct adapter *adapter)
2639{
2640	uint32_t	reg_tctl, reg_tarc;
2641	uint32_t	reg_tipg = 0;
2642	uint64_t	bus_addr;
2643
2644	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2645	/* Setup the Base and Length of the Tx Descriptor Ring */
2646	bus_addr = adapter->txdma.dma_paddr;
2647	E1000_WRITE_REG(&adapter->hw, TDLEN,
2648	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2649	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2650	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2651
2652	/* Setup the HW Tx Head and Tail descriptor pointers */
2653	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2654	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2655
2656
2657	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2658	    E1000_READ_REG(&adapter->hw, TDLEN));
2659
2660	/* Set the default values for the Tx Inter Packet Gap timer */
2661	switch (adapter->hw.mac_type) {
2662	case em_82542_rev2_0:
2663	case em_82542_rev2_1:
2664		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2665		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2666		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2667		break;
2668	case em_80003es2lan:
2669		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2670		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2671		    E1000_TIPG_IPGR2_SHIFT;
2672		break;
2673	default:
2674		if ((adapter->hw.media_type == em_media_type_fiber) ||
2675		    (adapter->hw.media_type == em_media_type_internal_serdes))
2676			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2677		else
2678			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2679		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2680		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2681	}
2682
2683	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2684	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2685	if(adapter->hw.mac_type >= em_82540)
2686		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2687
2688	/* Do adapter specific tweaks before we enable the transmitter. */
2689	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2690		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2691		reg_tarc |= (1 << 25);
2692		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2693		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2694		reg_tarc |= (1 << 25);
2695		reg_tarc &= ~(1 << 28);
2696		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2697	} else if (adapter->hw.mac_type == em_80003es2lan) {
2698		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2699		reg_tarc |= 1;
2700		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2701		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2702		reg_tarc |= 1;
2703		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2704	}
2705
2706	/* Program the Transmit Control Register */
2707	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2708		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2709	if (adapter->hw.mac_type >= em_82571)
2710		reg_tctl |= E1000_TCTL_MULR;
2711	if (adapter->link_duplex == FULL_DUPLEX) {
2712		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2713	} else {
2714		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2715	}
2716	/* This write will effectively turn on the transmit unit. */
2717	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2718
2719	/* Setup Transmit Descriptor Settings for this adapter */
2720	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2721
2722	if (adapter->tx_int_delay.value > 0)
2723		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2724}
2725
2726/*********************************************************************
2727 *
2728 *  Free all transmit related data structures.
2729 *
2730 **********************************************************************/
2731static void
2732em_free_transmit_structures(struct adapter *adapter)
2733{
2734	struct em_buffer *tx_buffer;
2735	int i;
2736
2737	INIT_DEBUGOUT("free_transmit_structures: begin");
2738
2739	if (adapter->tx_buffer_area != NULL) {
2740		tx_buffer = adapter->tx_buffer_area;
2741		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2742			if (tx_buffer->m_head != NULL) {
2743				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2744				    BUS_DMASYNC_POSTWRITE);
2745				bus_dmamap_unload(adapter->txtag,
2746				    tx_buffer->map);
2747				m_freem(tx_buffer->m_head);
2748				tx_buffer->m_head = NULL;
2749			} else if (tx_buffer->map != NULL)
2750				bus_dmamap_unload(adapter->txtag,
2751				    tx_buffer->map);
2752			if (tx_buffer->map != NULL) {
2753				bus_dmamap_destroy(adapter->txtag,
2754				    tx_buffer->map);
2755				tx_buffer->map = NULL;
2756			}
2757		}
2758	}
2759	if (adapter->tx_buffer_area != NULL) {
2760		free(adapter->tx_buffer_area, M_DEVBUF);
2761		adapter->tx_buffer_area = NULL;
2762	}
2763	if (adapter->txtag != NULL) {
2764		bus_dma_tag_destroy(adapter->txtag);
2765		adapter->txtag = NULL;
2766	}
2767}
2768
2769/*********************************************************************
2770 *
2771 *  The offload context needs to be set when we transfer the first
2772 *  packet of a particular protocol (TCP/UDP). We change the
2773 *  context only if the protocol type changes.
2774 *
2775 **********************************************************************/
2776static void
2777em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2778    uint32_t *txd_upper, uint32_t *txd_lower)
2779{
2780	struct em_context_desc *TXD;
2781	struct em_buffer *tx_buffer;
2782	int curr_txd;
2783
2784	if (mp->m_pkthdr.csum_flags) {
2785
2786		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2787			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2788			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2789			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2790				return;
2791			else
2792				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2793
2794		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2795			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2796			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2797			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2798				return;
2799			else
2800				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2801		} else {
2802			*txd_upper = 0;
2803			*txd_lower = 0;
2804			return;
2805		}
2806	} else {
2807		*txd_upper = 0;
2808		*txd_lower = 0;
2809		return;
2810	}
2811
2812	/* If we reach this point, the checksum offload context
2813	 * needs to be reset.
2814	 */
2815	curr_txd = adapter->next_avail_tx_desc;
2816	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2817	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2818
2819	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2820	TXD->lower_setup.ip_fields.ipcso =
2821		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2822	TXD->lower_setup.ip_fields.ipcse =
2823		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2824
2825	TXD->upper_setup.tcp_fields.tucss =
2826		ETHER_HDR_LEN + sizeof(struct ip);
2827	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2828
2829	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2830		TXD->upper_setup.tcp_fields.tucso =
2831			ETHER_HDR_LEN + sizeof(struct ip) +
2832			offsetof(struct tcphdr, th_sum);
2833	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2834		TXD->upper_setup.tcp_fields.tucso =
2835			ETHER_HDR_LEN + sizeof(struct ip) +
2836			offsetof(struct udphdr, uh_sum);
2837	}
2838
2839	TXD->tcp_seg_setup.data = htole32(0);
2840	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2841
2842	tx_buffer->m_head = NULL;
2843
2844	if (++curr_txd == adapter->num_tx_desc)
2845		curr_txd = 0;
2846
2847	adapter->num_tx_desc_avail--;
2848	adapter->next_avail_tx_desc = curr_txd;
2849}
2850
2851/**********************************************************************
2852 *
2853 *  Setup work for hardware segmentation offload (TSO)
2854 *
2855 **********************************************************************/
2856static boolean_t
2857em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2858   uint32_t *txd_lower)
2859{
2860	struct em_context_desc *TXD;
2861	struct em_buffer *tx_buffer;
2862	struct ip *ip;
2863	struct tcphdr *th;
2864	int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2865
2866	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2867	    (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2868		return FALSE;
2869	}
2870
2871	*txd_lower = (E1000_TXD_CMD_DEXT |
2872		      E1000_TXD_DTYP_D |
2873		      E1000_TXD_CMD_TSE);
2874
2875	*txd_upper = (E1000_TXD_POPTS_IXSM |
2876		      E1000_TXD_POPTS_TXSM) << 8;
2877
2878	curr_txd = adapter->next_avail_tx_desc;
2879	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2880	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2881
2882	mp->m_data += sizeof(struct ether_header);
2883	ip = mtod(mp, struct ip *);
2884	ip->ip_len = 0;
2885	ip->ip_sum = 0;
2886	ip_hlen = ip->ip_hl << 2 ;
2887	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2888	tcp_hlen = th->th_off << 2;
2889
2890	hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2891	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2892	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2893
2894	mp->m_data -= sizeof(struct ether_header);
2895	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2896	TXD->lower_setup.ip_fields.ipcso =
2897		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2898	TXD->lower_setup.ip_fields.ipcse =
2899		htole16(ETHER_HDR_LEN + ip_hlen - 1);
2900
2901	TXD->upper_setup.tcp_fields.tucss =
2902		ETHER_HDR_LEN + ip_hlen;
2903	TXD->upper_setup.tcp_fields.tucse = 0;
2904	TXD->upper_setup.tcp_fields.tucso =
2905		ETHER_HDR_LEN + ip_hlen +
2906		offsetof(struct tcphdr, th_sum);
2907	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2908	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2909	TXD->cmd_and_length = htole32(adapter->txd_cmd |
2910				E1000_TXD_CMD_DEXT |
2911				E1000_TXD_CMD_TSE |
2912				E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2913				(mp->m_pkthdr.len - (hdr_len)));
2914
2915	tx_buffer->m_head = NULL;
2916
2917	if (++curr_txd == adapter->num_tx_desc)
2918		curr_txd = 0;
2919
2920	adapter->num_tx_desc_avail--;
2921	adapter->next_avail_tx_desc = curr_txd;
2922	adapter->tx_tso = TRUE;
2923
2924	return TRUE;
2925}
2926
2927/**********************************************************************
2928 *
2929 *  Examine each tx_buffer in the used queue. If the hardware is done
2930 *  processing the packet then free associated resources. The
2931 *  tx_buffer is put back on the free queue.
2932 *
2933 **********************************************************************/
2934static void
2935em_txeof(struct adapter *adapter)
2936{
2937	int i, num_avail;
2938	struct em_buffer *tx_buffer;
2939	struct em_tx_desc   *tx_desc;
2940	struct ifnet   *ifp = adapter->ifp;
2941
2942	EM_LOCK_ASSERT(adapter);
2943
2944	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2945		return;
2946
2947	num_avail = adapter->num_tx_desc_avail;
2948	i = adapter->oldest_used_tx_desc;
2949
2950	tx_buffer = &adapter->tx_buffer_area[i];
2951	tx_desc = &adapter->tx_desc_base[i];
2952
2953	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2954	    BUS_DMASYNC_POSTREAD);
2955	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2956
2957		tx_desc->upper.data = 0;
2958		num_avail++;
2959
2960		if (tx_buffer->m_head) {
2961			ifp->if_opackets++;
2962			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2963			    BUS_DMASYNC_POSTWRITE);
2964			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2965
2966			m_freem(tx_buffer->m_head);
2967			tx_buffer->m_head = NULL;
2968		}
2969
2970		if (++i == adapter->num_tx_desc)
2971			i = 0;
2972
2973		tx_buffer = &adapter->tx_buffer_area[i];
2974		tx_desc = &adapter->tx_desc_base[i];
2975	}
2976	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2977	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2978
2979	adapter->oldest_used_tx_desc = i;
2980
2981	/*
2982	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2983	 * that it is OK to send packets.
2984	 * If there are no pending descriptors, clear the timeout. Otherwise,
2985	 * if some descriptors have been freed, restart the timeout.
2986	 */
2987	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2988		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2989		if (num_avail == adapter->num_tx_desc)
2990			ifp->if_timer = 0;
2991		else if (num_avail != adapter->num_tx_desc_avail)
2992			ifp->if_timer = EM_TX_TIMEOUT;
2993	}
2994	adapter->num_tx_desc_avail = num_avail;
2995}
2996
2997/*********************************************************************
2998 *
2999 *  Get a buffer from system mbuf buffer pool.
3000 *
3001 **********************************************************************/
3002static int
3003em_get_buf(struct adapter *adapter, int i)
3004{
3005	struct mbuf		*m;
3006	bus_dma_segment_t	segs[1];
3007	bus_dmamap_t		map;
3008	struct em_buffer	*rx_buffer;
3009	int			error, nsegs;
3010
3011	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3012	if (m == NULL) {
3013		adapter->mbuf_cluster_failed++;
3014		return (ENOBUFS);
3015	}
3016	m->m_len = m->m_pkthdr.len = MCLBYTES;
3017	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3018		m_adj(m, ETHER_ALIGN);
3019
3020	/*
3021	 * Using memory from the mbuf cluster pool, invoke the
3022	 * bus_dma machinery to arrange the memory mapping.
3023	 */
3024	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3025	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3026	if (error != 0) {
3027		m_free(m);
3028		return (error);
3029	}
3030	/* If nsegs is wrong then the stack is corrupt. */
3031	KASSERT(nsegs == 1, ("Too many segments returned!"));
3032
3033	rx_buffer = &adapter->rx_buffer_area[i];
3034	if (rx_buffer->m_head != NULL)
3035		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3036
3037	map = rx_buffer->map;
3038	rx_buffer->map = adapter->rx_sparemap;
3039	adapter->rx_sparemap = map;
3040	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3041	rx_buffer->m_head = m;
3042
3043	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3044
3045	return (0);
3046}
3047
3048/*********************************************************************
3049 *
3050 *  Allocate memory for rx_buffer structures. Since we use one
3051 *  rx_buffer per received packet, the maximum number of rx_buffer's
3052 *  that we'll need is equal to the number of receive descriptors
3053 *  that we've allocated.
3054 *
3055 **********************************************************************/
3056static int
3057em_allocate_receive_structures(struct adapter *adapter)
3058{
3059	device_t dev = adapter->dev;
3060	struct em_buffer *rx_buffer;
3061	int i, error;
3062
3063	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3064	    M_DEVBUF, M_NOWAIT);
3065	if (adapter->rx_buffer_area == NULL) {
3066		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3067		return (ENOMEM);
3068	}
3069
3070	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3071
3072	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3073				1, 0,			/* alignment, bounds */
3074				BUS_SPACE_MAXADDR,	/* lowaddr */
3075				BUS_SPACE_MAXADDR,	/* highaddr */
3076				NULL, NULL,		/* filter, filterarg */
3077				MCLBYTES,		/* maxsize */
3078				1,			/* nsegments */
3079				MCLBYTES,		/* maxsegsize */
3080				0,			/* flags */
3081				NULL,			/* lockfunc */
3082				NULL,			/* lockarg */
3083				&adapter->rxtag);
3084	if (error) {
3085		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3086		    __func__, error);
3087		goto fail;
3088	}
3089
3090	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3091	    &adapter->rx_sparemap);
3092	if (error) {
3093		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3094		    __func__, error);
3095		goto fail;
3096	}
3097	rx_buffer = adapter->rx_buffer_area;
3098	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3099		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3100		    &rx_buffer->map);
3101		if (error) {
3102			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3103			    __func__, error);
3104			goto fail;
3105		}
3106	}
3107
3108	for (i = 0; i < adapter->num_rx_desc; i++) {
3109		error = em_get_buf(adapter, i);
3110		if (error)
3111			goto fail;
3112	}
3113	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3114	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3115
3116	return (0);
3117
3118fail:
3119	em_free_receive_structures(adapter);
3120	return (error);
3121}
3122
3123/*********************************************************************
3124 *
3125 *  Allocate and initialize receive structures.
3126 *
3127 **********************************************************************/
3128static int
3129em_setup_receive_structures(struct adapter *adapter)
3130{
3131	int error;
3132
3133	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3134
3135	if ((error = em_allocate_receive_structures(adapter)) != 0)
3136		return (error);
3137
3138	/* Setup our descriptor pointers */
3139	adapter->next_rx_desc_to_check = 0;
3140
3141	return (0);
3142}
3143
3144/*********************************************************************
3145 *
3146 *  Enable receive unit.
3147 *
3148 **********************************************************************/
3149static void
3150em_initialize_receive_unit(struct adapter *adapter)
3151{
3152	struct ifnet	*ifp = adapter->ifp;
3153	uint64_t	bus_addr;
3154	uint32_t	reg_rctl;
3155	uint32_t	reg_rxcsum;
3156
3157	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3158
3159	/*
3160	 * Make sure receives are disabled while setting
3161	 * up the descriptor ring
3162	 */
3163	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3164
3165	/* Set the Receive Delay Timer Register */
3166	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3167
3168	if(adapter->hw.mac_type >= em_82540) {
3169		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3170
3171		/*
3172		 * Set the interrupt throttling rate. Value is calculated
3173		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3174		 */
3175#define MAX_INTS_PER_SEC	8000
3176#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3177		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3178	}
3179
3180	/* Setup the Base and Length of the Rx Descriptor Ring */
3181	bus_addr = adapter->rxdma.dma_paddr;
3182	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3183			sizeof(struct em_rx_desc));
3184	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3185	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3186
3187	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3188	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3189	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3190
3191	/* Setup the Receive Control Register */
3192	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3193		   E1000_RCTL_RDMTS_HALF |
3194		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3195
3196	if (adapter->hw.tbi_compatibility_on == TRUE)
3197		reg_rctl |= E1000_RCTL_SBP;
3198
3199
3200	switch (adapter->rx_buffer_len) {
3201	default:
3202	case EM_RXBUFFER_2048:
3203		reg_rctl |= E1000_RCTL_SZ_2048;
3204		break;
3205	case EM_RXBUFFER_4096:
3206		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3207		break;
3208	case EM_RXBUFFER_8192:
3209		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3210		break;
3211	case EM_RXBUFFER_16384:
3212		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3213		break;
3214	}
3215
3216	if (ifp->if_mtu > ETHERMTU)
3217		reg_rctl |= E1000_RCTL_LPE;
3218
3219	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3220	if ((adapter->hw.mac_type >= em_82543) &&
3221	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3222		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3223		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3224		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3225	}
3226
3227	/* Enable Receives */
3228	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3229}
3230
3231/*********************************************************************
3232 *
3233 *  Free receive related data structures.
3234 *
3235 **********************************************************************/
3236static void
3237em_free_receive_structures(struct adapter *adapter)
3238{
3239	struct em_buffer *rx_buffer;
3240	int i;
3241
3242	INIT_DEBUGOUT("free_receive_structures: begin");
3243
3244	if (adapter->rx_sparemap) {
3245		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3246		adapter->rx_sparemap = NULL;
3247	}
3248	if (adapter->rx_buffer_area != NULL) {
3249		rx_buffer = adapter->rx_buffer_area;
3250		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3251			if (rx_buffer->m_head != NULL) {
3252				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3253				    BUS_DMASYNC_POSTREAD);
3254				bus_dmamap_unload(adapter->rxtag,
3255				    rx_buffer->map);
3256				m_freem(rx_buffer->m_head);
3257				rx_buffer->m_head = NULL;
3258			} else if (rx_buffer->map != NULL)
3259				bus_dmamap_unload(adapter->rxtag,
3260				    rx_buffer->map);
3261			if (rx_buffer->map != NULL) {
3262				bus_dmamap_destroy(adapter->rxtag,
3263				    rx_buffer->map);
3264				rx_buffer->map = NULL;
3265			}
3266		}
3267	}
3268	if (adapter->rx_buffer_area != NULL) {
3269		free(adapter->rx_buffer_area, M_DEVBUF);
3270		adapter->rx_buffer_area = NULL;
3271	}
3272	if (adapter->rxtag != NULL) {
3273		bus_dma_tag_destroy(adapter->rxtag);
3274		adapter->rxtag = NULL;
3275	}
3276}
3277
3278/*********************************************************************
3279 *
3280 *  This routine executes in interrupt context. It replenishes
3281 *  the mbufs in the descriptor and sends data which has been
3282 *  dma'ed into host memory to upper layer.
3283 *
3284 *  We loop at most count times if count is > 0, or until done if
3285 *  count < 0.
3286 *
3287 *********************************************************************/
3288static int
3289em_rxeof(struct adapter *adapter, int count)
3290{
3291	struct ifnet	*ifp;
3292	struct mbuf	*mp;
3293	uint8_t		accept_frame = 0;
3294	uint8_t		eop = 0;
3295	uint16_t 	len, desc_len, prev_len_adj;
3296	int		i;
3297
3298	/* Pointer to the receive descriptor being examined. */
3299	struct em_rx_desc   *current_desc;
3300	uint8_t		status;
3301
3302	ifp = adapter->ifp;
3303	i = adapter->next_rx_desc_to_check;
3304	current_desc = &adapter->rx_desc_base[i];
3305	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3306	    BUS_DMASYNC_POSTREAD);
3307
3308	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3309		return (0);
3310
3311	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3312	    (count != 0) &&
3313	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3314		struct mbuf *m = NULL;
3315
3316		mp = adapter->rx_buffer_area[i].m_head;
3317		/*
3318		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3319		 * needs to access the last received byte in the mbuf.
3320		 */
3321		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3322		    BUS_DMASYNC_POSTREAD);
3323
3324		accept_frame = 1;
3325		prev_len_adj = 0;
3326		desc_len = le16toh(current_desc->length);
3327		status = current_desc->status;
3328		if (status & E1000_RXD_STAT_EOP) {
3329			count--;
3330			eop = 1;
3331			if (desc_len < ETHER_CRC_LEN) {
3332				len = 0;
3333				prev_len_adj = ETHER_CRC_LEN - desc_len;
3334			} else
3335				len = desc_len - ETHER_CRC_LEN;
3336		} else {
3337			eop = 0;
3338			len = desc_len;
3339		}
3340
3341		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3342			uint8_t		last_byte;
3343			uint32_t	pkt_len = desc_len;
3344
3345			if (adapter->fmp != NULL)
3346				pkt_len += adapter->fmp->m_pkthdr.len;
3347
3348			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3349			if (TBI_ACCEPT(&adapter->hw, status,
3350			    current_desc->errors, pkt_len, last_byte)) {
3351				em_tbi_adjust_stats(&adapter->hw,
3352				    &adapter->stats, pkt_len,
3353				    adapter->hw.mac_addr);
3354				if (len > 0)
3355					len--;
3356			} else
3357				accept_frame = 0;
3358		}
3359
3360		if (accept_frame) {
3361			if (em_get_buf(adapter, i) != 0) {
3362				ifp->if_iqdrops++;
3363				goto discard;
3364			}
3365
3366			/* Assign correct length to the current fragment */
3367			mp->m_len = len;
3368
3369			if (adapter->fmp == NULL) {
3370				mp->m_pkthdr.len = len;
3371				adapter->fmp = mp; /* Store the first mbuf */
3372				adapter->lmp = mp;
3373			} else {
3374				/* Chain mbuf's together */
3375				mp->m_flags &= ~M_PKTHDR;
3376				/*
3377				 * Adjust length of previous mbuf in chain if
3378				 * we received less than 4 bytes in the last
3379				 * descriptor.
3380				 */
3381				if (prev_len_adj > 0) {
3382					adapter->lmp->m_len -= prev_len_adj;
3383					adapter->fmp->m_pkthdr.len -=
3384					    prev_len_adj;
3385				}
3386				adapter->lmp->m_next = mp;
3387				adapter->lmp = adapter->lmp->m_next;
3388				adapter->fmp->m_pkthdr.len += len;
3389			}
3390
3391			if (eop) {
3392				adapter->fmp->m_pkthdr.rcvif = ifp;
3393				ifp->if_ipackets++;
3394				em_receive_checksum(adapter, current_desc,
3395				    adapter->fmp);
3396#ifndef __NO_STRICT_ALIGNMENT
3397				if (adapter->hw.max_frame_size >
3398				    (MCLBYTES - ETHER_ALIGN) &&
3399				    em_fixup_rx(adapter) != 0)
3400					goto skip;
3401#endif
3402				if (status & E1000_RXD_STAT_VP)
3403					VLAN_INPUT_TAG(ifp, adapter->fmp,
3404					    (le16toh(current_desc->special) &
3405					    E1000_RXD_SPC_VLAN_MASK));
3406#ifndef __NO_STRICT_ALIGNMENT
3407skip:
3408#endif
3409				m = adapter->fmp;
3410				adapter->fmp = NULL;
3411				adapter->lmp = NULL;
3412			}
3413		} else {
3414			ifp->if_ierrors++;
3415discard:
3416			/* Reuse loaded DMA map and just update mbuf chain */
3417			mp = adapter->rx_buffer_area[i].m_head;
3418			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3419			mp->m_data = mp->m_ext.ext_buf;
3420			mp->m_next = NULL;
3421			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3422				m_adj(mp, ETHER_ALIGN);
3423			if (adapter->fmp != NULL) {
3424				m_freem(adapter->fmp);
3425				adapter->fmp = NULL;
3426				adapter->lmp = NULL;
3427			}
3428			m = NULL;
3429		}
3430
3431		/* Zero out the receive descriptors status. */
3432		current_desc->status = 0;
3433		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3434		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3435
3436		/* Advance our pointers to the next descriptor. */
3437		if (++i == adapter->num_rx_desc)
3438			i = 0;
3439		if (m != NULL) {
3440			adapter->next_rx_desc_to_check = i;
3441#ifdef DEVICE_POLLING
3442			EM_UNLOCK(adapter);
3443			(*ifp->if_input)(ifp, m);
3444			EM_LOCK(adapter);
3445#else
3446			(*ifp->if_input)(ifp, m);
3447#endif
3448			i = adapter->next_rx_desc_to_check;
3449		}
3450		current_desc = &adapter->rx_desc_base[i];
3451	}
3452	adapter->next_rx_desc_to_check = i;
3453
3454	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3455	if (--i < 0)
3456		i = adapter->num_rx_desc - 1;
3457	E1000_WRITE_REG(&adapter->hw, RDT, i);
3458	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3459		return (0);
3460
3461	return (1);
3462}
3463
3464#ifndef __NO_STRICT_ALIGNMENT
3465/*
3466 * When jumbo frames are enabled we should realign entire payload on
3467 * architecures with strict alignment. This is serious design mistake of 8254x
3468 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3469 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3470 * payload. On architecures without strict alignment restrictions 8254x still
3471 * performs unaligned memory access which would reduce the performance too.
3472 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3473 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3474 * existing mbuf chain.
3475 *
3476 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3477 * not used at all on architectures with strict alignment.
3478 */
3479static int
3480em_fixup_rx(struct adapter *adapter)
3481{
3482	struct mbuf *m, *n;
3483	int error;
3484
3485	error = 0;
3486	m = adapter->fmp;
3487	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3488		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3489		m->m_data += ETHER_HDR_LEN;
3490	} else {
3491		MGETHDR(n, M_DONTWAIT, MT_DATA);
3492		if (n != NULL) {
3493			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3494			m->m_data += ETHER_HDR_LEN;
3495			m->m_len -= ETHER_HDR_LEN;
3496			n->m_len = ETHER_HDR_LEN;
3497			M_MOVE_PKTHDR(n, m);
3498			n->m_next = m;
3499			adapter->fmp = n;
3500		} else {
3501			adapter->ifp->if_iqdrops++;
3502			adapter->mbuf_alloc_failed++;
3503			m_freem(adapter->fmp);
3504			adapter->fmp = NULL;
3505			adapter->lmp = NULL;
3506			error = ENOBUFS;
3507		}
3508	}
3509
3510	return (error);
3511}
3512#endif
3513
3514/*********************************************************************
3515 *
3516 *  Verify that the hardware indicated that the checksum is valid.
3517 *  Inform the stack about the status of checksum so that stack
3518 *  doesn't spend time verifying the checksum.
3519 *
3520 *********************************************************************/
3521static void
3522em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3523		    struct mbuf *mp)
3524{
3525	/* 82543 or newer only */
3526	if ((adapter->hw.mac_type < em_82543) ||
3527	    /* Ignore Checksum bit is set */
3528	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3529		mp->m_pkthdr.csum_flags = 0;
3530		return;
3531	}
3532
3533	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3534		/* Did it pass? */
3535		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3536			/* IP Checksum Good */
3537			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3538			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3539
3540		} else {
3541			mp->m_pkthdr.csum_flags = 0;
3542		}
3543	}
3544
3545	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3546		/* Did it pass? */
3547		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3548			mp->m_pkthdr.csum_flags |=
3549			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3550			mp->m_pkthdr.csum_data = htons(0xffff);
3551		}
3552	}
3553}
3554
3555
3556static void
3557em_enable_vlans(struct adapter *adapter)
3558{
3559	uint32_t ctrl;
3560
3561	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3562
3563	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3564	ctrl |= E1000_CTRL_VME;
3565	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3566}
3567
3568static void
3569em_disable_vlans(struct adapter *adapter)
3570{
3571	uint32_t ctrl;
3572
3573	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3574	ctrl &= ~E1000_CTRL_VME;
3575	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3576}
3577
3578static void
3579em_enable_intr(struct adapter *adapter)
3580{
3581	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3582}
3583
3584static void
3585em_disable_intr(struct adapter *adapter)
3586{
3587	/*
3588	 * The first version of 82542 had an errata where when link was forced
3589	 * it would stay up even up even if the cable was disconnected.
3590	 * Sequence errors were used to detect the disconnect and then the
3591	 * driver would unforce the link. This code in the in the ISR. For this
3592	 * to work correctly the Sequence error interrupt had to be enabled
3593	 * all the time.
3594	 */
3595
3596	if (adapter->hw.mac_type == em_82542_rev2_0)
3597	    E1000_WRITE_REG(&adapter->hw, IMC,
3598		(0xffffffff & ~E1000_IMC_RXSEQ));
3599	else
3600	    E1000_WRITE_REG(&adapter->hw, IMC,
3601		0xffffffff);
3602}
3603
3604static int
3605em_is_valid_ether_addr(uint8_t *addr)
3606{
3607	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3608
3609	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3610		return (FALSE);
3611	}
3612
3613	return (TRUE);
3614}
3615
3616void
3617em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3618{
3619	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3620}
3621
3622void
3623em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3624{
3625	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3626}
3627
3628void
3629em_pci_set_mwi(struct em_hw *hw)
3630{
3631	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3632	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3633}
3634
3635void
3636em_pci_clear_mwi(struct em_hw *hw)
3637{
3638	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3639	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3640}
3641
3642/*********************************************************************
3643* 82544 Coexistence issue workaround.
3644*    There are 2 issues.
3645*       1. Transmit Hang issue.
3646*    To detect this issue, following equation can be used...
3647*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3648*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3649*
3650*       2. DAC issue.
3651*    To detect this issue, following equation can be used...
3652*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3653*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3654*
3655*
3656*    WORKAROUND:
3657*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3658*
3659*** *********************************************************************/
3660static uint32_t
3661em_fill_descriptors (bus_addr_t address, uint32_t length,
3662		PDESC_ARRAY desc_array)
3663{
3664	/* Since issue is sensitive to length and address.*/
3665	/* Let us first check the address...*/
3666	uint32_t safe_terminator;
3667	if (length <= 4) {
3668		desc_array->descriptor[0].address = address;
3669		desc_array->descriptor[0].length = length;
3670		desc_array->elements = 1;
3671		return (desc_array->elements);
3672	}
3673	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3674	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3675	if (safe_terminator == 0   ||
3676	(safe_terminator > 4   &&
3677	safe_terminator < 9)   ||
3678	(safe_terminator > 0xC &&
3679	safe_terminator <= 0xF)) {
3680		desc_array->descriptor[0].address = address;
3681		desc_array->descriptor[0].length = length;
3682		desc_array->elements = 1;
3683		return (desc_array->elements);
3684	}
3685
3686	desc_array->descriptor[0].address = address;
3687	desc_array->descriptor[0].length = length - 4;
3688	desc_array->descriptor[1].address = address + (length - 4);
3689	desc_array->descriptor[1].length = 4;
3690	desc_array->elements = 2;
3691	return (desc_array->elements);
3692}
3693
3694/**********************************************************************
3695 *
3696 *  Update the board statistics counters.
3697 *
3698 **********************************************************************/
3699static void
3700em_update_stats_counters(struct adapter *adapter)
3701{
3702	struct ifnet   *ifp;
3703
3704	if(adapter->hw.media_type == em_media_type_copper ||
3705	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3706		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3707		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3708	}
3709	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3710	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3711	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3712	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3713
3714	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3715	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3716	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3717	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3718	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3719	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3720	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3721	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3722	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3723	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3724	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3725	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3726	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3727	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3728	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3729	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3730	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3731	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3732	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3733	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3734
3735	/* For the 64-bit byte counters the low dword must be read first. */
3736	/* Both registers clear on the read of the high dword */
3737
3738	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3739	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3740	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3741	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3742
3743	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3744	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3745	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3746	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3747	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3748
3749	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3750	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3751	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3752	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3753
3754	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3755	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3756	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3757	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3758	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3759	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3760	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3761	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3762	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3763	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3764
3765	if (adapter->hw.mac_type >= em_82543) {
3766		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3767		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3768		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3769		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3770		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3771		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3772	}
3773	ifp = adapter->ifp;
3774
3775	ifp->if_collisions = adapter->stats.colc;
3776
3777	/* Rx Errors */
3778	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3779	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3780	    adapter->stats.mpc + adapter->stats.cexterr;
3781
3782	/* Tx Errors */
3783	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3784	    adapter->watchdog_events;
3785}
3786
3787
3788/**********************************************************************
3789 *
3790 *  This routine is called only when em_display_debug_stats is enabled.
3791 *  This routine provides a way to take a look at important statistics
3792 *  maintained by the driver and hardware.
3793 *
3794 **********************************************************************/
3795static void
3796em_print_debug_info(struct adapter *adapter)
3797{
3798	device_t dev = adapter->dev;
3799	uint8_t *hw_addr = adapter->hw.hw_addr;
3800
3801	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3802	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3803	    E1000_READ_REG(&adapter->hw, CTRL),
3804	    E1000_READ_REG(&adapter->hw, RCTL));
3805	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3806	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3807	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3808	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3809	    adapter->hw.fc_high_water,
3810	    adapter->hw.fc_low_water);
3811	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3812	    E1000_READ_REG(&adapter->hw, TIDV),
3813	    E1000_READ_REG(&adapter->hw, TADV));
3814	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3815	    E1000_READ_REG(&adapter->hw, RDTR),
3816	    E1000_READ_REG(&adapter->hw, RADV));
3817	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3818	    (long long)adapter->tx_fifo_wrk_cnt,
3819	    (long long)adapter->tx_fifo_reset_cnt);
3820	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3821	    E1000_READ_REG(&adapter->hw, TDH),
3822	    E1000_READ_REG(&adapter->hw, TDT));
3823	device_printf(dev, "Num Tx descriptors avail = %d\n",
3824	    adapter->num_tx_desc_avail);
3825	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3826	    adapter->no_tx_desc_avail1);
3827	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3828	    adapter->no_tx_desc_avail2);
3829	device_printf(dev, "Std mbuf failed = %ld\n",
3830	    adapter->mbuf_alloc_failed);
3831	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3832	    adapter->mbuf_cluster_failed);
3833}
3834
3835static void
3836em_print_hw_stats(struct adapter *adapter)
3837{
3838	device_t dev = adapter->dev;
3839
3840	device_printf(dev, "Excessive collisions = %lld\n",
3841	    (long long)adapter->stats.ecol);
3842	device_printf(dev, "Symbol errors = %lld\n",
3843	    (long long)adapter->stats.symerrs);
3844	device_printf(dev, "Sequence errors = %lld\n",
3845	    (long long)adapter->stats.sec);
3846	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3847
3848	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3849	device_printf(dev, "Receive No Buffers = %lld\n",
3850	    (long long)adapter->stats.rnbc);
3851	/* RLEC is inaccurate on some hardware, calculate our own. */
3852	device_printf(dev, "Receive Length Errors = %lld\n",
3853	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3854	device_printf(dev, "Receive errors = %lld\n",
3855	    (long long)adapter->stats.rxerrc);
3856	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3857	device_printf(dev, "Alignment errors = %lld\n",
3858	    (long long)adapter->stats.algnerrc);
3859	device_printf(dev, "Carrier extension errors = %lld\n",
3860	    (long long)adapter->stats.cexterr);
3861	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3862	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3863
3864	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3865	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3866	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3867	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3868
3869	device_printf(dev, "Good Packets Rcvd = %lld\n",
3870	    (long long)adapter->stats.gprc);
3871	device_printf(dev, "Good Packets Xmtd = %lld\n",
3872	    (long long)adapter->stats.gptc);
3873	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3874	    (long long)adapter->stats.tsctc);
3875	device_printf(dev, "TSO Contexts Failed = %lld\n",
3876	    (long long)adapter->stats.tsctfc);
3877}
3878
3879static int
3880em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3881{
3882	struct adapter *adapter;
3883	int error;
3884	int result;
3885
3886	result = -1;
3887	error = sysctl_handle_int(oidp, &result, 0, req);
3888
3889	if (error || !req->newptr)
3890		return (error);
3891
3892	if (result == 1) {
3893		adapter = (struct adapter *)arg1;
3894		em_print_debug_info(adapter);
3895	}
3896
3897	return (error);
3898}
3899
3900
3901static int
3902em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3903{
3904	struct adapter *adapter;
3905	int error;
3906	int result;
3907
3908	result = -1;
3909	error = sysctl_handle_int(oidp, &result, 0, req);
3910
3911	if (error || !req->newptr)
3912		return (error);
3913
3914	if (result == 1) {
3915		adapter = (struct adapter *)arg1;
3916		em_print_hw_stats(adapter);
3917	}
3918
3919	return (error);
3920}
3921
3922static int
3923em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3924{
3925	struct em_int_delay_info *info;
3926	struct adapter *adapter;
3927	uint32_t regval;
3928	int error;
3929	int usecs;
3930	int ticks;
3931
3932	info = (struct em_int_delay_info *)arg1;
3933	usecs = info->value;
3934	error = sysctl_handle_int(oidp, &usecs, 0, req);
3935	if (error != 0 || req->newptr == NULL)
3936		return (error);
3937	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3938		return (EINVAL);
3939	info->value = usecs;
3940	ticks = E1000_USECS_TO_TICKS(usecs);
3941
3942	adapter = info->adapter;
3943
3944	EM_LOCK(adapter);
3945	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3946	regval = (regval & ~0xffff) | (ticks & 0xffff);
3947	/* Handle a few special cases. */
3948	switch (info->offset) {
3949	case E1000_RDTR:
3950	case E1000_82542_RDTR:
3951		regval |= E1000_RDT_FPDB;
3952		break;
3953	case E1000_TIDV:
3954	case E1000_82542_TIDV:
3955		if (ticks == 0) {
3956			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3957			/* Don't write 0 into the TIDV register. */
3958			regval++;
3959		} else
3960			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3961		break;
3962	}
3963	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3964	EM_UNLOCK(adapter);
3965	return (0);
3966}
3967
3968static void
3969em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3970	const char *description, struct em_int_delay_info *info,
3971	int offset, int value)
3972{
3973	info->adapter = adapter;
3974	info->offset = offset;
3975	info->value = value;
3976	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3977	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3978	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3979	    info, 0, em_sysctl_int_delay, "I", description);
3980}
3981
3982#ifndef DEVICE_POLLING
3983static void
3984em_add_int_process_limit(struct adapter *adapter, const char *name,
3985	const char *description, int *limit, int value)
3986{
3987	*limit = value;
3988	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3989	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3990	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3991}
3992#endif
3993