if_em.c revision 162206
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162206 2006-09-10 19:23:27Z pdeuskar $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78#include <dev/em/if_em_hw.h>
79#include <dev/em/if_em.h>
80
81/*********************************************************************
82 *  Set this to one to display debug statistics
83 *********************************************************************/
84int	em_display_debug_stats = 0;
85
86/*********************************************************************
87 *  Driver version
88 *********************************************************************/
89
90char em_driver_version[] = "Version - 6.1.4 - TSO";
91
92
93/*********************************************************************
94 *  PCI Device ID Table
95 *
96 *  Used by probe to select devices to load on
97 *  Last field stores an index into em_strings
98 *  Last entry must be all 0s
99 *
100 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
101 *********************************************************************/
102
103static em_vendor_info_t em_vendor_info_array[] =
104{
105	/* Intel(R) PRO/1000 Network Connection */
106	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
111
112	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119
120	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
129
130	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146
147	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150
151	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
155						PCI_ANY_ID, PCI_ANY_ID, 0},
156
157	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
161
162	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
166						PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
168						PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
170						PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
176
177	/* required last entry */
178	{ 0, 0, 0, 0, 0}
179};
180
181/*********************************************************************
182 *  Table of branding strings for all supported NICs.
183 *********************************************************************/
184
185static char *em_strings[] = {
186	"Intel(R) PRO/1000 Network Connection"
187};
188
189/*********************************************************************
190 *  Function prototypes
191 *********************************************************************/
192static int	em_probe(device_t);
193static int	em_attach(device_t);
194static int	em_detach(device_t);
195static int	em_shutdown(device_t);
196static int	em_suspend(device_t);
197static int	em_resume(device_t);
198static void	em_start(struct ifnet *);
199static void	em_start_locked(struct ifnet *ifp);
200static int	em_ioctl(struct ifnet *, u_long, caddr_t);
201static void	em_watchdog(struct ifnet *);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_intr(struct adapter *);
210static void	em_free_intr(struct adapter *);
211static void	em_free_pci_resources(struct adapter *);
212static void	em_local_timer(void *);
213static int	em_hardware_init(struct adapter *);
214static void	em_setup_interface(device_t, struct adapter *);
215static int	em_setup_transmit_structures(struct adapter *);
216static void	em_initialize_transmit_unit(struct adapter *);
217static int	em_setup_receive_structures(struct adapter *);
218static void	em_initialize_receive_unit(struct adapter *);
219static void	em_enable_intr(struct adapter *);
220static void	em_disable_intr(struct adapter *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_receive_structures(struct adapter *);
223static void	em_update_stats_counters(struct adapter *);
224static void	em_txeof(struct adapter *);
225static int	em_allocate_receive_structures(struct adapter *);
226static int	em_allocate_transmit_structures(struct adapter *);
227static int	em_rxeof(struct adapter *, int);
228#ifndef __NO_STRICT_ALIGNMENT
229static int	em_fixup_rx(struct adapter *);
230#endif
231static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
232		    struct mbuf *);
233static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
234		    uint32_t *, uint32_t *);
235static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
236		    uint32_t *, uint32_t *);
237static void	em_set_promisc(struct adapter *);
238static void	em_disable_promisc(struct adapter *);
239static void	em_set_multi(struct adapter *);
240static void	em_print_hw_stats(struct adapter *);
241static void	em_update_link_status(struct adapter *);
242static int	em_get_buf(struct adapter *, int);
243static void	em_enable_vlans(struct adapter *);
244static void	em_disable_vlans(struct adapter *);
245static int	em_encap(struct adapter *, struct mbuf **);
246static void	em_smartspeed(struct adapter *);
247static int	em_82547_fifo_workaround(struct adapter *, int);
248static void	em_82547_update_fifo_head(struct adapter *, int);
249static int	em_82547_tx_fifo_reset(struct adapter *);
250static void	em_82547_move_tail(void *arg);
251static void	em_82547_move_tail_locked(struct adapter *);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static int 	em_is_valid_ether_addr(uint8_t *);
257static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
258static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
259static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
260		    PDESC_ARRAY desc_array);
261static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
262static void	em_add_int_delay_sysctl(struct adapter *, const char *,
263		const char *, struct em_int_delay_info *, int, int);
264
265/*
266 * Fast interrupt handler and legacy ithread/polling modes are
267 * mutually exclusive.
268 */
269#ifdef DEVICE_POLLING
270static poll_handler_t em_poll;
271static void	em_intr(void *);
272#else
273static void	em_intr_fast(void *);
274static void	em_add_int_process_limit(struct adapter *, const char *,
275		const char *, int *, int);
276static void	em_handle_rxtx(void *context, int pending);
277static void	em_handle_link(void *context, int pending);
278#endif
279
280/*********************************************************************
281 *  FreeBSD Device Interface Entry Points
282 *********************************************************************/
283
284static device_method_t em_methods[] = {
285	/* Device interface */
286	DEVMETHOD(device_probe, em_probe),
287	DEVMETHOD(device_attach, em_attach),
288	DEVMETHOD(device_detach, em_detach),
289	DEVMETHOD(device_shutdown, em_shutdown),
290	DEVMETHOD(device_suspend, em_suspend),
291	DEVMETHOD(device_resume, em_resume),
292	{0, 0}
293};
294
295static driver_t em_driver = {
296	"em", em_methods, sizeof(struct adapter),
297};
298
299static devclass_t em_devclass;
300DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
301MODULE_DEPEND(em, pci, 1, 1, 1);
302MODULE_DEPEND(em, ether, 1, 1, 1);
303
304/*********************************************************************
305 *  Tunable default values.
306 *********************************************************************/
307
308#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
309#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
310#define M_TSO_LEN			66
311
312static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
313static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
314static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
315static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
316static int em_rxd = EM_DEFAULT_RXD;
317static int em_txd = EM_DEFAULT_TXD;
318static int em_smart_pwr_down = FALSE;
319
320TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
322TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rxd", &em_rxd);
325TUNABLE_INT("hw.em.txd", &em_txd);
326TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
327#ifndef DEVICE_POLLING
328static int em_rx_process_limit = 100;
329TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
330#endif
331
332/*********************************************************************
333 *  Device identification routine
334 *
335 *  em_probe determines if the driver should be loaded on
336 *  adapter based on PCI vendor/device id of the adapter.
337 *
338 *  return BUS_PROBE_DEFAULT on success, positive on failure
339 *********************************************************************/
340
341static int
342em_probe(device_t dev)
343{
344	char		adapter_name[60];
345	uint16_t	pci_vendor_id = 0;
346	uint16_t	pci_device_id = 0;
347	uint16_t	pci_subvendor_id = 0;
348	uint16_t	pci_subdevice_id = 0;
349	em_vendor_info_t *ent;
350
351	INIT_DEBUGOUT("em_probe: begin");
352
353	pci_vendor_id = pci_get_vendor(dev);
354	if (pci_vendor_id != EM_VENDOR_ID)
355		return (ENXIO);
356
357	pci_device_id = pci_get_device(dev);
358	pci_subvendor_id = pci_get_subvendor(dev);
359	pci_subdevice_id = pci_get_subdevice(dev);
360
361	ent = em_vendor_info_array;
362	while (ent->vendor_id != 0) {
363		if ((pci_vendor_id == ent->vendor_id) &&
364		    (pci_device_id == ent->device_id) &&
365
366		    ((pci_subvendor_id == ent->subvendor_id) ||
367		    (ent->subvendor_id == PCI_ANY_ID)) &&
368
369		    ((pci_subdevice_id == ent->subdevice_id) ||
370		    (ent->subdevice_id == PCI_ANY_ID))) {
371			sprintf(adapter_name, "%s %s",
372				em_strings[ent->index],
373				em_driver_version);
374			device_set_desc_copy(dev, adapter_name);
375			return (BUS_PROBE_DEFAULT);
376		}
377		ent++;
378	}
379
380	return (ENXIO);
381}
382
383/*********************************************************************
384 *  Device initialization routine
385 *
386 *  The attach entry point is called when the driver is being loaded.
387 *  This routine identifies the type of hardware, allocates all resources
388 *  and initializes the hardware.
389 *
390 *  return 0 on success, positive on failure
391 *********************************************************************/
392
393static int
394em_attach(device_t dev)
395{
396	struct adapter	*adapter;
397	int		tsize, rsize;
398	int		error = 0;
399
400	INIT_DEBUGOUT("em_attach: begin");
401
402	adapter = device_get_softc(dev);
403	adapter->dev = adapter->osdep.dev = dev;
404	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
405
406	/* SYSCTL stuff */
407	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
409	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
410	    em_sysctl_debug_info, "I", "Debug Information");
411
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_stats, "I", "Statistics");
416
417	callout_init(&adapter->timer, CALLOUT_MPSAFE);
418	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
419
420	/* Determine hardware revision */
421	em_identify_hardware(adapter);
422
423	/* Set up some sysctls for the tunable interrupt delays */
424	em_add_int_delay_sysctl(adapter, "rx_int_delay",
425	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
426	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
427	em_add_int_delay_sysctl(adapter, "tx_int_delay",
428	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
429	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
430	if (adapter->hw.mac_type >= em_82540) {
431		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
432		    "receive interrupt delay limit in usecs",
433		    &adapter->rx_abs_int_delay,
434		    E1000_REG_OFFSET(&adapter->hw, RADV),
435		    em_rx_abs_int_delay_dflt);
436		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
437		    "transmit interrupt delay limit in usecs",
438		    &adapter->tx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, TADV),
440		    em_tx_abs_int_delay_dflt);
441	}
442
443#ifndef DEVICE_POLLING
444	/* Sysctls for limiting the amount of work done in the taskqueue */
445	em_add_int_process_limit(adapter, "rx_processing_limit",
446	    "max number of rx packets to process", &adapter->rx_process_limit,
447	    em_rx_process_limit);
448#endif
449
450	/*
451	 * Validate number of transmit and receive descriptors. It
452	 * must not exceed hardware maximum, and must be multiple
453	 * of EM_DBA_ALIGN.
454	 */
455	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
456	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
457	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
458	    (em_txd < EM_MIN_TXD)) {
459		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
460		    EM_DEFAULT_TXD, em_txd);
461		adapter->num_tx_desc = EM_DEFAULT_TXD;
462	} else
463		adapter->num_tx_desc = em_txd;
464	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
465	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
466	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
467	    (em_rxd < EM_MIN_RXD)) {
468		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
469		    EM_DEFAULT_RXD, em_rxd);
470		adapter->num_rx_desc = EM_DEFAULT_RXD;
471	} else
472		adapter->num_rx_desc = em_rxd;
473
474	adapter->hw.autoneg = DO_AUTO_NEG;
475	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
476	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
477	adapter->hw.tbi_compatibility_en = TRUE;
478	adapter->rx_buffer_len = EM_RXBUFFER_2048;
479
480	adapter->hw.phy_init_script = 1;
481	adapter->hw.phy_reset_disable = FALSE;
482
483#ifndef EM_MASTER_SLAVE
484	adapter->hw.master_slave = em_ms_hw_default;
485#else
486	adapter->hw.master_slave = EM_MASTER_SLAVE;
487#endif
488	/*
489	 * Set the max frame size assuming standard ethernet
490	 * sized frames.
491	 */
492	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
493
494	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
495
496	/*
497	 * This controls when hardware reports transmit completion
498	 * status.
499	 */
500	adapter->hw.report_tx_early = 1;
501	if (em_allocate_pci_resources(adapter)) {
502		device_printf(dev, "Allocation of PCI resources failed\n");
503		error = ENXIO;
504		goto err_pci;
505	}
506
507	/* Initialize eeprom parameters */
508	em_init_eeprom_params(&adapter->hw);
509
510	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
511	    EM_DBA_ALIGN);
512
513	/* Allocate Transmit Descriptor ring */
514	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
515		device_printf(dev, "Unable to allocate tx_desc memory\n");
516		error = ENOMEM;
517		goto err_tx_desc;
518	}
519	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
520
521	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
522	    EM_DBA_ALIGN);
523
524	/* Allocate Receive Descriptor ring */
525	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
526		device_printf(dev, "Unable to allocate rx_desc memory\n");
527		error = ENOMEM;
528		goto err_rx_desc;
529	}
530	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
531
532	/* Initialize the hardware */
533	if (em_hardware_init(adapter)) {
534		device_printf(dev, "Unable to initialize the hardware\n");
535		error = EIO;
536		goto err_hw_init;
537	}
538
539	/* Copy the permanent MAC address out of the EEPROM */
540	if (em_read_mac_addr(&adapter->hw) < 0) {
541		device_printf(dev, "EEPROM read error while reading MAC"
542		    " address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
548		device_printf(dev, "Invalid MAC address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	/* Setup OS specific network interface */
554	em_setup_interface(dev, adapter);
555
556	em_allocate_intr(adapter);
557
558	/* Initialize statistics */
559	em_clear_hw_cntrs(&adapter->hw);
560	em_update_stats_counters(adapter);
561	adapter->hw.get_link_status = 1;
562	em_update_link_status(adapter);
563
564	/* Indicate SOL/IDER usage */
565	if (em_check_phy_reset_block(&adapter->hw))
566		device_printf(dev,
567		    "PHY reset is blocked due to SOL/IDER session.\n");
568
569	/* Identify 82544 on PCIX */
570	em_get_bus_info(&adapter->hw);
571	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
572		adapter->pcix_82544 = TRUE;
573	else
574		adapter->pcix_82544 = FALSE;
575
576	INIT_DEBUGOUT("em_attach: end");
577
578	return (0);
579
580err_hw_init:
581	em_dma_free(adapter, &adapter->rxdma);
582err_rx_desc:
583	em_dma_free(adapter, &adapter->txdma);
584err_tx_desc:
585err_pci:
586	em_free_intr(adapter);
587	em_free_pci_resources(adapter);
588	EM_LOCK_DESTROY(adapter);
589
590	return (error);
591}
592
593/*********************************************************************
594 *  Device removal routine
595 *
596 *  The detach entry point is called when the driver is being removed.
597 *  This routine stops the adapter and deallocates all the resources
598 *  that were allocated for driver operation.
599 *
600 *  return 0 on success, positive on failure
601 *********************************************************************/
602
603static int
604em_detach(device_t dev)
605{
606	struct adapter	*adapter = device_get_softc(dev);
607	struct ifnet	*ifp = adapter->ifp;
608
609	INIT_DEBUGOUT("em_detach: begin");
610
611#ifdef DEVICE_POLLING
612	if (ifp->if_capenable & IFCAP_POLLING)
613		ether_poll_deregister(ifp);
614#endif
615
616	em_free_intr(adapter);
617	EM_LOCK(adapter);
618	adapter->in_detach = 1;
619	em_stop(adapter);
620	em_phy_hw_reset(&adapter->hw);
621	EM_UNLOCK(adapter);
622	ether_ifdetach(adapter->ifp);
623
624	em_free_pci_resources(adapter);
625	bus_generic_detach(dev);
626	if_free(ifp);
627
628	/* Free Transmit Descriptor ring */
629	if (adapter->tx_desc_base) {
630		em_dma_free(adapter, &adapter->txdma);
631		adapter->tx_desc_base = NULL;
632	}
633
634	/* Free Receive Descriptor ring */
635	if (adapter->rx_desc_base) {
636		em_dma_free(adapter, &adapter->rxdma);
637		adapter->rx_desc_base = NULL;
638	}
639
640	EM_LOCK_DESTROY(adapter);
641
642	return (0);
643}
644
645/*********************************************************************
646 *
647 *  Shutdown entry point
648 *
649 **********************************************************************/
650
651static int
652em_shutdown(device_t dev)
653{
654	struct adapter *adapter = device_get_softc(dev);
655	EM_LOCK(adapter);
656	em_stop(adapter);
657	EM_UNLOCK(adapter);
658	return (0);
659}
660
661/*
662 * Suspend/resume device methods.
663 */
664static int
665em_suspend(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668
669	EM_LOCK(adapter);
670	em_stop(adapter);
671	EM_UNLOCK(adapter);
672
673	return bus_generic_suspend(dev);
674}
675
676static int
677em_resume(device_t dev)
678{
679	struct adapter *adapter = device_get_softc(dev);
680	struct ifnet *ifp = adapter->ifp;
681
682	EM_LOCK(adapter);
683	em_init_locked(adapter);
684	if ((ifp->if_flags & IFF_UP) &&
685	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
686		em_start_locked(ifp);
687	EM_UNLOCK(adapter);
688
689	return bus_generic_resume(dev);
690}
691
692
693/*********************************************************************
694 *  Transmit entry point
695 *
696 *  em_start is called by the stack to initiate a transmit.
697 *  The driver will remain in this routine as long as there are
698 *  packets to transmit and transmit resources are available.
699 *  In case resources are not available stack is notified and
700 *  the packet is requeued.
701 **********************************************************************/
702
703static void
704em_start_locked(struct ifnet *ifp)
705{
706	struct adapter	*adapter = ifp->if_softc;
707	struct mbuf	*m_head;
708
709	EM_LOCK_ASSERT(adapter);
710
711	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
712	    IFF_DRV_RUNNING)
713		return;
714	if (!adapter->link_active)
715		return;
716
717	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
718
719		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
720		if (m_head == NULL)
721			break;
722		/*
723		 * em_encap() can modify our pointer, and or make it NULL on
724		 * failure.  In that event, we can't requeue.
725		 */
726		if (em_encap(adapter, &m_head)) {
727			if (m_head == NULL)
728				break;
729			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
730			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
731			break;
732		}
733
734		/* Send a copy of the frame to the BPF listener */
735		BPF_MTAP(ifp, m_head);
736
737		/* Set timeout in case hardware has problems transmitting. */
738		ifp->if_timer = EM_TX_TIMEOUT;
739	}
740}
741
742static void
743em_start(struct ifnet *ifp)
744{
745	struct adapter *adapter = ifp->if_softc;
746
747	EM_LOCK(adapter);
748	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749		em_start_locked(ifp);
750	EM_UNLOCK(adapter);
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  em_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775	case SIOCGIFADDR:
776		if (ifa->ifa_addr->sa_family == AF_INET) {
777			/*
778			 * XXX
779			 * Since resetting hardware takes a very long time
780			 * and results in link renegotiation we only
781			 * initialize the hardware only when it is absolutely
782			 * required.
783			 */
784			ifp->if_flags |= IFF_UP;
785			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
786				EM_LOCK(adapter);
787				em_init_locked(adapter);
788				EM_UNLOCK(adapter);
789			}
790			arp_ifinit(ifp, ifa);
791		} else
792			error = ether_ioctl(ifp, command, data);
793		break;
794	case SIOCSIFMTU:
795	    {
796		int max_frame_size;
797		uint16_t eeprom_data = 0;
798
799		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
800
801		EM_LOCK(adapter);
802		switch (adapter->hw.mac_type) {
803		case em_82573:
804			/*
805			 * 82573 only supports jumbo frames
806			 * if ASPM is disabled.
807			 */
808			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
809			    &eeprom_data);
810			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
811				max_frame_size = ETHER_MAX_LEN;
812				break;
813			}
814			/* Allow Jumbo frames - fall thru */
815		case em_82571:
816		case em_82572:
817		case em_80003es2lan:	/* Limit Jumbo Frame size */
818			max_frame_size = 9234;
819			break;
820		case em_ich8lan:
821			/* ICH8 does not support jumbo frames */
822			max_frame_size = ETHER_MAX_LEN;
823			break;
824		default:
825			max_frame_size = MAX_JUMBO_FRAME_SIZE;
826		}
827		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
828		    ETHER_CRC_LEN) {
829			EM_UNLOCK(adapter);
830			error = EINVAL;
831			break;
832		}
833
834		ifp->if_mtu = ifr->ifr_mtu;
835		adapter->hw.max_frame_size =
836		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
837		em_init_locked(adapter);
838		EM_UNLOCK(adapter);
839		break;
840	    }
841	case SIOCSIFFLAGS:
842		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
843		EM_LOCK(adapter);
844		if (ifp->if_flags & IFF_UP) {
845			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
846				if ((ifp->if_flags ^ adapter->if_flags) &
847				    IFF_PROMISC) {
848					em_disable_promisc(adapter);
849					em_set_promisc(adapter);
850				}
851			} else
852				em_init_locked(adapter);
853		} else {
854			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
855				em_stop(adapter);
856			}
857		}
858		adapter->if_flags = ifp->if_flags;
859		EM_UNLOCK(adapter);
860		break;
861	case SIOCADDMULTI:
862	case SIOCDELMULTI:
863		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
864		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
865			EM_LOCK(adapter);
866			em_disable_intr(adapter);
867			em_set_multi(adapter);
868			if (adapter->hw.mac_type == em_82542_rev2_0) {
869				em_initialize_receive_unit(adapter);
870			}
871#ifdef DEVICE_POLLING
872			if (!(ifp->if_capenable & IFCAP_POLLING))
873#endif
874				em_enable_intr(adapter);
875			EM_UNLOCK(adapter);
876		}
877		break;
878	case SIOCSIFMEDIA:
879	case SIOCGIFMEDIA:
880		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
881		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
882		break;
883	case SIOCSIFCAP:
884	    {
885		int mask, reinit;
886
887		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
888		reinit = 0;
889		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
890#ifdef DEVICE_POLLING
891		if (mask & IFCAP_POLLING) {
892			if (ifr->ifr_reqcap & IFCAP_POLLING) {
893				error = ether_poll_register(em_poll, ifp);
894				if (error)
895					return (error);
896				EM_LOCK(adapter);
897				em_disable_intr(adapter);
898				ifp->if_capenable |= IFCAP_POLLING;
899				EM_UNLOCK(adapter);
900			} else {
901				error = ether_poll_deregister(ifp);
902				/* Enable interrupt even in error case */
903				EM_LOCK(adapter);
904				em_enable_intr(adapter);
905				ifp->if_capenable &= ~IFCAP_POLLING;
906				EM_UNLOCK(adapter);
907			}
908		}
909#endif
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO) {
915			ifp->if_capenable ^= IFCAP_TSO;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
923			em_init(adapter);
924		VLAN_CAPABILITIES(ifp);
925		break;
926	    }
927	default:
928		error = ether_ioctl(ifp, command, data);
929		break;
930	}
931
932	return (error);
933}
934
935/*********************************************************************
936 *  Watchdog entry point
937 *
938 *  This routine is called whenever hardware quits transmitting.
939 *
940 **********************************************************************/
941
942static void
943em_watchdog(struct ifnet *ifp)
944{
945	struct adapter *adapter = ifp->if_softc;
946
947	EM_LOCK(adapter);
948	/* If we are in this routine because of pause frames, then
949	 * don't reset the hardware.
950	 */
951	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
952		ifp->if_timer = EM_TX_TIMEOUT;
953		EM_UNLOCK(adapter);
954		return;
955	}
956
957	/*
958	 * Reclaim first as there is a possibility of losing Tx completion
959	 * interrupts. Possible cause of missing Tx completion interrupts
960	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
961	 * or chipset bug.
962	 */
963	em_txeof(adapter);
964	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
965		EM_UNLOCK(adapter);
966		return;
967	}
968
969	if (em_check_for_link(&adapter->hw) == 0)
970		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
971
972	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973	adapter->watchdog_events++;
974
975	em_init_locked(adapter);
976	EM_UNLOCK(adapter);
977}
978
979/*********************************************************************
980 *  Init entry point
981 *
982 *  This routine is used in two ways. It is used by the stack as
983 *  init entry point in network interface structure. It is also used
984 *  by the driver as a hw/sw initialization routine to get to a
985 *  consistent state.
986 *
987 *  return 0 on success, positive on failure
988 **********************************************************************/
989
990static void
991em_init_locked(struct adapter *adapter)
992{
993	struct ifnet	*ifp = adapter->ifp;
994	device_t	dev = adapter->dev;
995	uint32_t	pba;
996
997	INIT_DEBUGOUT("em_init: begin");
998
999	EM_LOCK_ASSERT(adapter);
1000
1001	em_stop(adapter);
1002
1003	/*
1004	 * Packet Buffer Allocation (PBA)
1005	 * Writing PBA sets the receive portion of the buffer
1006	 * the remainder is used for the transmit buffer.
1007	 *
1008	 * Devices before the 82547 had a Packet Buffer of 64K.
1009	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1010	 * After the 82547 the buffer was reduced to 40K.
1011	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1012	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1013	 */
1014	switch (adapter->hw.mac_type) {
1015	case em_82547:
1016	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1017		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1019		else
1020			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1021		adapter->tx_fifo_head = 0;
1022		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1023		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1024		break;
1025	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1026	case em_82571: /* 82571: Total Packet Buffer is 48K */
1027	case em_82572: /* 82572: Total Packet Buffer is 48K */
1028			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		break;
1030	case em_82573: /* 82573: Total Packet Buffer is 32K */
1031		/* Jumbo frames not supported */
1032			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1033		break;
1034	case em_ich8lan:
1035		pba = E1000_PBA_8K;
1036		break;
1037	default:
1038		/* Devices before 82547 had a Packet Buffer of 64K.   */
1039		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1040			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1041		else
1042			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1043	}
1044
1045	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1046	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1047
1048	/* Get the latest mac address, User can use a LAA */
1049	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1050
1051	/* Initialize the hardware */
1052	if (em_hardware_init(adapter)) {
1053		device_printf(dev, "Unable to initialize the hardware\n");
1054		return;
1055	}
1056	em_update_link_status(adapter);
1057
1058	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1059		em_enable_vlans(adapter);
1060
1061	/* Prepare transmit descriptors and buffers */
1062	if (em_setup_transmit_structures(adapter)) {
1063		device_printf(dev, "Could not setup transmit structures\n");
1064		em_stop(adapter);
1065		return;
1066	}
1067	em_initialize_transmit_unit(adapter);
1068
1069	/* Setup Multicast table */
1070	em_set_multi(adapter);
1071
1072	/* Prepare receive descriptors and buffers */
1073	if (em_setup_receive_structures(adapter)) {
1074		device_printf(dev, "Could not setup receive structures\n");
1075		em_stop(adapter);
1076		return;
1077	}
1078	em_initialize_receive_unit(adapter);
1079
1080	/* Don't lose promiscuous settings */
1081	em_set_promisc(adapter);
1082
1083	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1084	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1085
1086	ifp->if_hwassist = 0;
1087	if (adapter->hw.mac_type >= em_82543) {
1088		if (ifp->if_capenable & IFCAP_TXCSUM)
1089			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1090		if (ifp->if_capenable & IFCAP_TSO)
1091			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1092	}
1093
1094	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1095	em_clear_hw_cntrs(&adapter->hw);
1096#ifdef DEVICE_POLLING
1097	/*
1098	 * Only enable interrupts if we are not polling, make sure
1099	 * they are off otherwise.
1100	 */
1101	if (ifp->if_capenable & IFCAP_POLLING)
1102		em_disable_intr(adapter);
1103	else
1104#endif /* DEVICE_POLLING */
1105		em_enable_intr(adapter);
1106
1107	/* Don't reset the phy next time init gets called */
1108	adapter->hw.phy_reset_disable = TRUE;
1109}
1110
1111static void
1112em_init(void *arg)
1113{
1114	struct adapter *adapter = arg;
1115
1116	EM_LOCK(adapter);
1117	em_init_locked(adapter);
1118	EM_UNLOCK(adapter);
1119}
1120
1121
1122#ifdef DEVICE_POLLING
1123/*********************************************************************
1124 *
1125 *  Legacy polling routine
1126 *
1127 *********************************************************************/
1128static void
1129em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1130{
1131	struct adapter *adapter = ifp->if_softc;
1132	uint32_t reg_icr;
1133
1134	EM_LOCK(adapter);
1135	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1136		EM_UNLOCK(adapter);
1137		return;
1138	}
1139
1140	if (cmd == POLL_AND_CHECK_STATUS) {
1141		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1142		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1143			callout_stop(&adapter->timer);
1144			adapter->hw.get_link_status = 1;
1145			em_check_for_link(&adapter->hw);
1146			em_update_link_status(adapter);
1147			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1148		}
1149	}
1150	em_rxeof(adapter, count);
1151	em_txeof(adapter);
1152
1153	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1154		em_start_locked(ifp);
1155	EM_UNLOCK(adapter);
1156}
1157
1158/*********************************************************************
1159 *
1160 *  Legacy Interrupt Service routine
1161 *
1162 *********************************************************************/
1163static void
1164em_intr(void *arg)
1165{
1166	struct adapter	*adapter = arg;
1167	struct ifnet	*ifp;
1168	uint32_t	reg_icr;
1169
1170	EM_LOCK(adapter);
1171
1172	ifp = adapter->ifp;
1173
1174	if (ifp->if_capenable & IFCAP_POLLING) {
1175		EM_UNLOCK(adapter);
1176		return;
1177	}
1178
1179	for (;;) {
1180		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1181		if (adapter->hw.mac_type >= em_82571 &&
1182		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1183			break;
1184		else if (reg_icr == 0)
1185			break;
1186
1187		/*
1188		 * XXX: some laptops trigger several spurious interrupts
1189		 * on em(4) when in the resume cycle. The ICR register
1190		 * reports all-ones value in this case. Processing such
1191		 * interrupts would lead to a freeze. I don't know why.
1192		 */
1193		if (reg_icr == 0xffffffff)
1194			break;
1195
1196		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1197			em_rxeof(adapter, -1);
1198			em_txeof(adapter);
1199		}
1200
1201		/* Link status change */
1202		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1203			callout_stop(&adapter->timer);
1204			adapter->hw.get_link_status = 1;
1205			em_check_for_link(&adapter->hw);
1206			em_update_link_status(adapter);
1207			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1208		}
1209
1210		if (reg_icr & E1000_ICR_RXO)
1211			adapter->rx_overruns++;
1212	}
1213
1214	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1215	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1216		em_start_locked(ifp);
1217
1218	EM_UNLOCK(adapter);
1219}
1220
1221#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1222
1223static void
1224em_handle_link(void *context, int pending)
1225{
1226	struct adapter	*adapter = context;
1227	struct ifnet *ifp;
1228
1229	ifp = adapter->ifp;
1230
1231	EM_LOCK(adapter);
1232
1233	callout_stop(&adapter->timer);
1234	adapter->hw.get_link_status = 1;
1235	em_check_for_link(&adapter->hw);
1236	em_update_link_status(adapter);
1237	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1238	EM_UNLOCK(adapter);
1239}
1240
1241static void
1242em_handle_rxtx(void *context, int pending)
1243{
1244	struct adapter	*adapter = context;
1245	struct ifnet	*ifp;
1246
1247	NET_LOCK_GIANT();
1248	ifp = adapter->ifp;
1249
1250	/*
1251	 * TODO:
1252	 * It should be possible to run the tx clean loop without the lock.
1253	 */
1254	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1255		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1256			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1257		EM_LOCK(adapter);
1258		em_txeof(adapter);
1259
1260		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1261			em_start_locked(ifp);
1262		EM_UNLOCK(adapter);
1263	}
1264
1265	em_enable_intr(adapter);
1266	NET_UNLOCK_GIANT();
1267}
1268
1269/*********************************************************************
1270 *
1271 *  Fast Interrupt Service routine
1272 *
1273 *********************************************************************/
1274static void
1275em_intr_fast(void *arg)
1276{
1277	struct adapter	*adapter = arg;
1278	struct ifnet	*ifp;
1279	uint32_t	reg_icr;
1280
1281	ifp = adapter->ifp;
1282
1283	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1284
1285	/* Hot eject?  */
1286	if (reg_icr == 0xffffffff)
1287		return;
1288
1289	/* Definitely not our interrupt.  */
1290	if (reg_icr == 0x0)
1291		return;
1292
1293	/*
1294	 * Starting with the 82571 chip, bit 31 should be used to
1295	 * determine whether the interrupt belongs to us.
1296	 */
1297	if (adapter->hw.mac_type >= em_82571 &&
1298	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1299		return;
1300
1301	/*
1302	 * Mask interrupts until the taskqueue is finished running.  This is
1303	 * cheap, just assume that it is needed.  This also works around the
1304	 * MSI message reordering errata on certain systems.
1305	 */
1306	em_disable_intr(adapter);
1307	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1308
1309	/* Link status change */
1310	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1311		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1312
1313	if (reg_icr & E1000_ICR_RXO)
1314		adapter->rx_overruns++;
1315}
1316#endif /* ! DEVICE_POLLING */
1317
1318/*********************************************************************
1319 *
1320 *  Media Ioctl callback
1321 *
1322 *  This routine is called whenever the user queries the status of
1323 *  the interface using ifconfig.
1324 *
1325 **********************************************************************/
1326static void
1327em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1328{
1329	struct adapter *adapter = ifp->if_softc;
1330
1331	INIT_DEBUGOUT("em_media_status: begin");
1332
1333	EM_LOCK(adapter);
1334	em_check_for_link(&adapter->hw);
1335	em_update_link_status(adapter);
1336
1337	ifmr->ifm_status = IFM_AVALID;
1338	ifmr->ifm_active = IFM_ETHER;
1339
1340	if (!adapter->link_active) {
1341		EM_UNLOCK(adapter);
1342		return;
1343	}
1344
1345	ifmr->ifm_status |= IFM_ACTIVE;
1346
1347	if ((adapter->hw.media_type == em_media_type_fiber) ||
1348	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1349		if (adapter->hw.mac_type == em_82545)
1350			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1351		else
1352			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1353	} else {
1354		switch (adapter->link_speed) {
1355		case 10:
1356			ifmr->ifm_active |= IFM_10_T;
1357			break;
1358		case 100:
1359			ifmr->ifm_active |= IFM_100_TX;
1360			break;
1361		case 1000:
1362			ifmr->ifm_active |= IFM_1000_T;
1363			break;
1364		}
1365		if (adapter->link_duplex == FULL_DUPLEX)
1366			ifmr->ifm_active |= IFM_FDX;
1367		else
1368			ifmr->ifm_active |= IFM_HDX;
1369	}
1370	EM_UNLOCK(adapter);
1371}
1372
1373/*********************************************************************
1374 *
1375 *  Media Ioctl callback
1376 *
1377 *  This routine is called when the user changes speed/duplex using
1378 *  media/mediopt option with ifconfig.
1379 *
1380 **********************************************************************/
1381static int
1382em_media_change(struct ifnet *ifp)
1383{
1384	struct adapter *adapter = ifp->if_softc;
1385	struct ifmedia  *ifm = &adapter->media;
1386
1387	INIT_DEBUGOUT("em_media_change: begin");
1388
1389	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1390		return (EINVAL);
1391
1392	EM_LOCK(adapter);
1393	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1394	case IFM_AUTO:
1395		adapter->hw.autoneg = DO_AUTO_NEG;
1396		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1397		break;
1398	case IFM_1000_LX:
1399	case IFM_1000_SX:
1400	case IFM_1000_T:
1401		adapter->hw.autoneg = DO_AUTO_NEG;
1402		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1403		break;
1404	case IFM_100_TX:
1405		adapter->hw.autoneg = FALSE;
1406		adapter->hw.autoneg_advertised = 0;
1407		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1408			adapter->hw.forced_speed_duplex = em_100_full;
1409		else
1410			adapter->hw.forced_speed_duplex = em_100_half;
1411		break;
1412	case IFM_10_T:
1413		adapter->hw.autoneg = FALSE;
1414		adapter->hw.autoneg_advertised = 0;
1415		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1416			adapter->hw.forced_speed_duplex = em_10_full;
1417		else
1418			adapter->hw.forced_speed_duplex = em_10_half;
1419		break;
1420	default:
1421		device_printf(adapter->dev, "Unsupported media type\n");
1422	}
1423
1424	/* As the speed/duplex settings my have changed we need to
1425	 * reset the PHY.
1426	 */
1427	adapter->hw.phy_reset_disable = FALSE;
1428
1429	em_init_locked(adapter);
1430	EM_UNLOCK(adapter);
1431
1432	return (0);
1433}
1434
1435/*********************************************************************
1436 *
1437 *  This routine maps the mbufs to tx descriptors.
1438 *
1439 *  return 0 on success, positive on failure
1440 **********************************************************************/
1441static int
1442em_encap(struct adapter *adapter, struct mbuf **m_headp)
1443{
1444	struct ifnet		*ifp = adapter->ifp;
1445	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1446	bus_dmamap_t		map;
1447	struct em_buffer	*tx_buffer, *tx_buffer_last;
1448	struct em_tx_desc	*current_tx_desc;
1449	struct mbuf		*m_head;
1450	struct m_tag		*mtag;
1451	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1452	int			nsegs, i, j;
1453	int			error, do_tso, tso_desc = 0;
1454
1455	m_head = *m_headp;
1456	current_tx_desc = NULL;
1457	txd_upper = txd_lower = txd_used = txd_saved = 0;
1458
1459	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1460
1461	/*
1462	 * Force a cleanup if number of TX descriptors
1463	 * available hits the threshold.
1464	 */
1465	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1466		em_txeof(adapter);
1467		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1468			adapter->no_tx_desc_avail1++;
1469			return (ENOBUFS);
1470		}
1471	}
1472
1473	/* Find out if we are in vlan mode. */
1474	mtag = VLAN_OUTPUT_TAG(ifp, m_head);
1475
1476	/*
1477	 * When operating in promiscuous mode, hardware encapsulation for
1478	 * packets is disabled.  This means we have to add the vlan
1479	 * encapsulation in the driver, since it will have come down from the
1480	 * VLAN layer with a tag instead of a VLAN header.
1481	 */
1482	if (mtag != NULL && adapter->em_insert_vlan_header) {
1483		struct ether_vlan_header *evl;
1484		struct ether_header eh;
1485
1486		m_head = m_pullup(m_head, sizeof(eh));
1487		if (m_head == NULL) {
1488			*m_headp = NULL;
1489			return (ENOBUFS);
1490		}
1491		eh = *mtod(m_head, struct ether_header *);
1492		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1493		if (m_head == NULL) {
1494			*m_headp = NULL;
1495			return (ENOBUFS);
1496		}
1497		m_head = m_pullup(m_head, sizeof(*evl));
1498		if (m_head == NULL) {
1499			*m_headp = NULL;
1500			return (ENOBUFS);
1501		}
1502		evl = mtod(m_head, struct ether_vlan_header *);
1503		bcopy(&eh, evl, sizeof(*evl));
1504		evl->evl_proto = evl->evl_encap_proto;
1505		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1506		evl->evl_tag = htons(VLAN_TAG_VALUE(mtag));
1507		m_tag_delete(m_head, mtag);
1508		mtag = NULL;
1509		*m_headp = m_head;
1510	}
1511
1512	/*
1513	 * TSO workaround:
1514	 *  If an mbuf is only header we need
1515	 *     to pull 4 bytes of data into it.
1516	 */
1517	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1518		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1519		if (m_head == NULL)
1520			return (ENOBUFS);
1521	}
1522
1523	/*
1524	 * Map the packet for DMA.
1525	 */
1526	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1527	tx_buffer_last = tx_buffer;
1528	map = tx_buffer->map;
1529	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1530	    &nsegs, BUS_DMA_NOWAIT);
1531	if (error == EFBIG) {
1532		struct mbuf *m;
1533
1534		m = m_defrag(*m_headp, M_DONTWAIT);
1535		if (m == NULL) {
1536			/* Assume m_defrag(9) used only m_get(9). */
1537			adapter->mbuf_alloc_failed++;
1538			m_freem(*m_headp);
1539			*m_headp = NULL;
1540			return (ENOBUFS);
1541		}
1542		*m_headp = m;
1543		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1544		    segs, &nsegs, BUS_DMA_NOWAIT);
1545		if (error != 0) {
1546			adapter->no_tx_dma_setup++;
1547			m_freem(*m_headp);
1548			*m_headp = NULL;
1549			return (error);
1550		}
1551	} else if (error != 0) {
1552		adapter->no_tx_dma_setup++;
1553		return (error);
1554	}
1555	if (nsegs == 0) {
1556		m_freem(*m_headp);
1557		*m_headp = NULL;
1558		return (EIO);
1559	}
1560
1561	/*
1562	 * TSO Hardware workaround, if this packet is not
1563	 * TSO, and is only a single descriptor long, and
1564	 * it follows a TSO burst, then we need to add a
1565	 * sentinel descriptor to prevent premature writeback.
1566	 */
1567	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1568		if (nsegs == 1)
1569			tso_desc = TRUE;
1570		adapter->tx_tso = FALSE;
1571	}
1572
1573	if (nsegs > adapter->num_tx_desc_avail - 2) {
1574		adapter->no_tx_desc_avail2++;
1575		bus_dmamap_unload(adapter->txtag, map);
1576		return (ENOBUFS);
1577	}
1578
1579	/* Do hardware assists */
1580	m_head = *m_headp;
1581	if ( ifp->if_hwassist > 0) {
1582		if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1583			/* we need to make a final sentinel transmit desc */
1584			tso_desc = TRUE;
1585		} else
1586			em_transmit_checksum_setup(adapter,  m_head,
1587			    &txd_upper, &txd_lower);
1588	}
1589
1590	i = adapter->next_avail_tx_desc;
1591	if (adapter->pcix_82544)
1592		txd_saved = i;
1593
1594	for (j = 0; j < nsegs; j++) {
1595		bus_size_t seg_len;
1596		bus_addr_t seg_addr;
1597		/* If adapter is 82544 and on PCIX bus. */
1598		if(adapter->pcix_82544) {
1599			DESC_ARRAY	desc_array;
1600			uint32_t	array_elements, counter;
1601
1602			/*
1603			 * Check the Address and Length combination and
1604			 * split the data accordingly
1605			 */
1606			array_elements = em_fill_descriptors(segs[j].ds_addr,
1607			    segs[j].ds_len, &desc_array);
1608			for (counter = 0; counter < array_elements; counter++) {
1609				if (txd_used == adapter->num_tx_desc_avail) {
1610					adapter->next_avail_tx_desc = txd_saved;
1611					adapter->no_tx_desc_avail2++;
1612					bus_dmamap_unload(adapter->txtag, map);
1613					return (ENOBUFS);
1614				}
1615				tx_buffer = &adapter->tx_buffer_area[i];
1616				current_tx_desc = &adapter->tx_desc_base[i];
1617				current_tx_desc->buffer_addr = htole64(
1618					desc_array.descriptor[counter].address);
1619				current_tx_desc->lower.data = htole32(
1620					(adapter->txd_cmd | txd_lower |
1621					(uint16_t)desc_array.descriptor[counter].length));
1622				current_tx_desc->upper.data = htole32((txd_upper));
1623				if (++i == adapter->num_tx_desc)
1624					i = 0;
1625
1626				tx_buffer->m_head = NULL;
1627				txd_used++;
1628			}
1629		} else {
1630			tx_buffer = &adapter->tx_buffer_area[i];
1631			current_tx_desc = &adapter->tx_desc_base[i];
1632			seg_addr = htole64(segs[j].ds_addr);
1633			seg_len  = segs[j].ds_len;
1634			/*
1635			** TSO Workaround:
1636			** If this is the last descriptor, we want to
1637			** split it so we have a small final sentinel
1638			*/
1639			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1640				seg_len -= 4;
1641				current_tx_desc->buffer_addr = seg_addr;
1642				current_tx_desc->lower.data = htole32(
1643				adapter->txd_cmd | txd_lower | seg_len);
1644				current_tx_desc->upper.data =
1645				    htole32(txd_upper);
1646				if (++i == adapter->num_tx_desc)
1647					i = 0;
1648				/* Now make the sentinel */
1649				++txd_used; /* using an extra txd */
1650				current_tx_desc = &adapter->tx_desc_base[i];
1651				tx_buffer = &adapter->tx_buffer_area[i];
1652				current_tx_desc->buffer_addr =
1653				    seg_addr + seg_len;
1654				current_tx_desc->lower.data = htole32(
1655				adapter->txd_cmd | txd_lower | 4);
1656				current_tx_desc->upper.data =
1657				    htole32(txd_upper);
1658				if (++i == adapter->num_tx_desc)
1659					i = 0;
1660			} else {
1661				current_tx_desc->buffer_addr = seg_addr;
1662				current_tx_desc->lower.data = htole32(
1663				adapter->txd_cmd | txd_lower | seg_len);
1664				current_tx_desc->upper.data =
1665				    htole32(txd_upper);
1666				if (++i == adapter->num_tx_desc)
1667					i = 0;
1668			}
1669			tx_buffer->m_head = NULL;
1670		}
1671	}
1672
1673	adapter->next_avail_tx_desc = i;
1674	if (adapter->pcix_82544)
1675		adapter->num_tx_desc_avail -= txd_used;
1676	else {
1677		adapter->num_tx_desc_avail -= nsegs;
1678		if (tso_desc) /* TSO used an extra for sentinel */
1679			adapter->num_tx_desc_avail -= txd_used;
1680	}
1681
1682	if (mtag != NULL) {
1683		/* Set the vlan id. */
1684		current_tx_desc->upper.fields.special =
1685		    htole16(VLAN_TAG_VALUE(mtag));
1686
1687		/* Tell hardware to add tag. */
1688		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1689	}
1690
1691	tx_buffer->m_head = m_head;
1692	tx_buffer_last->map = tx_buffer->map;
1693	tx_buffer->map = map;
1694	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1695
1696	/*
1697	 * Last Descriptor of Packet needs End Of Packet (EOP).
1698	 */
1699	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1700
1701	/*
1702	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1703	 * that this frame is available to transmit.
1704	 */
1705	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1706	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1707	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1708		em_82547_move_tail_locked(adapter);
1709	else {
1710		E1000_WRITE_REG(&adapter->hw, TDT, i);
1711		if (adapter->hw.mac_type == em_82547)
1712			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1713	}
1714
1715	return (0);
1716}
1717
1718/*********************************************************************
1719 *
1720 * 82547 workaround to avoid controller hang in half-duplex environment.
1721 * The workaround is to avoid queuing a large packet that would span
1722 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1723 * in this case. We do that only when FIFO is quiescent.
1724 *
1725 **********************************************************************/
1726static void
1727em_82547_move_tail_locked(struct adapter *adapter)
1728{
1729	uint16_t hw_tdt;
1730	uint16_t sw_tdt;
1731	struct em_tx_desc *tx_desc;
1732	uint16_t length = 0;
1733	boolean_t eop = 0;
1734
1735	EM_LOCK_ASSERT(adapter);
1736
1737	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1738	sw_tdt = adapter->next_avail_tx_desc;
1739
1740	while (hw_tdt != sw_tdt) {
1741		tx_desc = &adapter->tx_desc_base[hw_tdt];
1742		length += tx_desc->lower.flags.length;
1743		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1744		if(++hw_tdt == adapter->num_tx_desc)
1745			hw_tdt = 0;
1746
1747		if (eop) {
1748			if (em_82547_fifo_workaround(adapter, length)) {
1749				adapter->tx_fifo_wrk_cnt++;
1750				callout_reset(&adapter->tx_fifo_timer, 1,
1751					em_82547_move_tail, adapter);
1752				break;
1753			}
1754			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1755			em_82547_update_fifo_head(adapter, length);
1756			length = 0;
1757		}
1758	}
1759}
1760
1761static void
1762em_82547_move_tail(void *arg)
1763{
1764	struct adapter *adapter = arg;
1765
1766	EM_LOCK(adapter);
1767	em_82547_move_tail_locked(adapter);
1768	EM_UNLOCK(adapter);
1769}
1770
1771static int
1772em_82547_fifo_workaround(struct adapter *adapter, int len)
1773{
1774	int fifo_space, fifo_pkt_len;
1775
1776	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1777
1778	if (adapter->link_duplex == HALF_DUPLEX) {
1779		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1780
1781		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1782			if (em_82547_tx_fifo_reset(adapter))
1783				return (0);
1784			else
1785				return (1);
1786		}
1787	}
1788
1789	return (0);
1790}
1791
1792static void
1793em_82547_update_fifo_head(struct adapter *adapter, int len)
1794{
1795	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1796
1797	/* tx_fifo_head is always 16 byte aligned */
1798	adapter->tx_fifo_head += fifo_pkt_len;
1799	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1800		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1801	}
1802}
1803
1804
1805static int
1806em_82547_tx_fifo_reset(struct adapter *adapter)
1807{
1808	uint32_t tctl;
1809
1810	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1811	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1812	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1813	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1814
1815		/* Disable TX unit */
1816		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1817		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1818
1819		/* Reset FIFO pointers */
1820		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1821		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1822		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1823		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1824
1825		/* Re-enable TX unit */
1826		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1827		E1000_WRITE_FLUSH(&adapter->hw);
1828
1829		adapter->tx_fifo_head = 0;
1830		adapter->tx_fifo_reset_cnt++;
1831
1832		return (TRUE);
1833	}
1834	else {
1835		return (FALSE);
1836	}
1837}
1838
1839static void
1840em_set_promisc(struct adapter *adapter)
1841{
1842	struct ifnet	*ifp = adapter->ifp;
1843	uint32_t	reg_rctl;
1844
1845	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1846
1847	if (ifp->if_flags & IFF_PROMISC) {
1848		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1849		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1850		/* Disable VLAN stripping in promiscous mode
1851		 * This enables bridging of vlan tagged frames to occur
1852		 * and also allows vlan tags to be seen in tcpdump
1853		 */
1854		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1855			em_disable_vlans(adapter);
1856		adapter->em_insert_vlan_header = 1;
1857	} else if (ifp->if_flags & IFF_ALLMULTI) {
1858		reg_rctl |= E1000_RCTL_MPE;
1859		reg_rctl &= ~E1000_RCTL_UPE;
1860		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1861		adapter->em_insert_vlan_header = 0;
1862	} else
1863		adapter->em_insert_vlan_header = 0;
1864}
1865
1866static void
1867em_disable_promisc(struct adapter *adapter)
1868{
1869	struct ifnet	*ifp = adapter->ifp;
1870	uint32_t	reg_rctl;
1871
1872	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1873
1874	reg_rctl &=  (~E1000_RCTL_UPE);
1875	reg_rctl &=  (~E1000_RCTL_MPE);
1876	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1877
1878	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1879		em_enable_vlans(adapter);
1880	adapter->em_insert_vlan_header = 0;
1881}
1882
1883
1884/*********************************************************************
1885 *  Multicast Update
1886 *
1887 *  This routine is called whenever multicast address list is updated.
1888 *
1889 **********************************************************************/
1890
1891static void
1892em_set_multi(struct adapter *adapter)
1893{
1894	struct ifnet	*ifp = adapter->ifp;
1895	struct ifmultiaddr *ifma;
1896	uint32_t reg_rctl = 0;
1897	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1898	int mcnt = 0;
1899
1900	IOCTL_DEBUGOUT("em_set_multi: begin");
1901
1902	if (adapter->hw.mac_type == em_82542_rev2_0) {
1903		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1904		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1905			em_pci_clear_mwi(&adapter->hw);
1906		reg_rctl |= E1000_RCTL_RST;
1907		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1908		msec_delay(5);
1909	}
1910
1911	IF_ADDR_LOCK(ifp);
1912	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1913		if (ifma->ifma_addr->sa_family != AF_LINK)
1914			continue;
1915
1916		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1917			break;
1918
1919		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1920		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1921		mcnt++;
1922	}
1923	IF_ADDR_UNLOCK(ifp);
1924
1925	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1926		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1927		reg_rctl |= E1000_RCTL_MPE;
1928		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1929	} else
1930		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1931
1932	if (adapter->hw.mac_type == em_82542_rev2_0) {
1933		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1934		reg_rctl &= ~E1000_RCTL_RST;
1935		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1936		msec_delay(5);
1937		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1938			em_pci_set_mwi(&adapter->hw);
1939	}
1940}
1941
1942
1943/*********************************************************************
1944 *  Timer routine
1945 *
1946 *  This routine checks for link status and updates statistics.
1947 *
1948 **********************************************************************/
1949
1950static void
1951em_local_timer(void *arg)
1952{
1953	struct adapter	*adapter = arg;
1954	struct ifnet	*ifp = adapter->ifp;
1955
1956	EM_LOCK(adapter);
1957
1958	em_check_for_link(&adapter->hw);
1959	em_update_link_status(adapter);
1960	em_update_stats_counters(adapter);
1961	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1962		em_print_hw_stats(adapter);
1963	em_smartspeed(adapter);
1964
1965	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1966
1967	EM_UNLOCK(adapter);
1968}
1969
1970static void
1971em_update_link_status(struct adapter *adapter)
1972{
1973	struct ifnet *ifp = adapter->ifp;
1974	device_t dev = adapter->dev;
1975
1976	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1977		if (adapter->link_active == 0) {
1978			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1979			    &adapter->link_duplex);
1980			/* Check if we may set SPEED_MODE bit on PCI-E */
1981			if ((adapter->link_speed == SPEED_1000) &&
1982			    ((adapter->hw.mac_type == em_82571) ||
1983			    (adapter->hw.mac_type == em_82572))) {
1984				int tarc0;
1985
1986				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1987				tarc0 |= SPEED_MODE_BIT;
1988				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1989			}
1990			if (bootverbose)
1991				device_printf(dev, "Link is up %d Mbps %s\n",
1992				    adapter->link_speed,
1993				    ((adapter->link_duplex == FULL_DUPLEX) ?
1994				    "Full Duplex" : "Half Duplex"));
1995			adapter->link_active = 1;
1996			adapter->smartspeed = 0;
1997			ifp->if_baudrate = adapter->link_speed * 1000000;
1998			if_link_state_change(ifp, LINK_STATE_UP);
1999		}
2000	} else {
2001		if (adapter->link_active == 1) {
2002			ifp->if_baudrate = adapter->link_speed = 0;
2003			adapter->link_duplex = 0;
2004			if (bootverbose)
2005				device_printf(dev, "Link is Down\n");
2006			adapter->link_active = 0;
2007			if_link_state_change(ifp, LINK_STATE_DOWN);
2008		}
2009	}
2010}
2011
2012/*********************************************************************
2013 *
2014 *  This routine disables all traffic on the adapter by issuing a
2015 *  global reset on the MAC and deallocates TX/RX buffers.
2016 *
2017 **********************************************************************/
2018
2019static void
2020em_stop(void *arg)
2021{
2022	struct adapter	*adapter = arg;
2023	struct ifnet	*ifp = adapter->ifp;
2024
2025	EM_LOCK_ASSERT(adapter);
2026
2027	INIT_DEBUGOUT("em_stop: begin");
2028
2029	em_disable_intr(adapter);
2030	em_reset_hw(&adapter->hw);
2031	callout_stop(&adapter->timer);
2032	callout_stop(&adapter->tx_fifo_timer);
2033	em_free_transmit_structures(adapter);
2034	em_free_receive_structures(adapter);
2035
2036	/* Tell the stack that the interface is no longer active */
2037	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2038}
2039
2040
2041/********************************************************************
2042 *
2043 *  Determine hardware revision.
2044 *
2045 **********************************************************************/
2046static void
2047em_identify_hardware(struct adapter *adapter)
2048{
2049	device_t dev = adapter->dev;
2050
2051	/* Make sure our PCI config space has the necessary stuff set */
2052	pci_enable_busmaster(dev);
2053	pci_enable_io(dev, SYS_RES_MEMORY);
2054	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055
2056	/* Save off the information about this board */
2057	adapter->hw.vendor_id = pci_get_vendor(dev);
2058	adapter->hw.device_id = pci_get_device(dev);
2059	adapter->hw.revision_id = pci_get_revid(dev);
2060	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2061	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2062
2063	/* Identify the MAC */
2064	if (em_set_mac_type(&adapter->hw))
2065		device_printf(dev, "Unknown MAC Type\n");
2066
2067	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2068	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2069		adapter->hw.phy_init_script = TRUE;
2070}
2071
2072static int
2073em_allocate_pci_resources(struct adapter *adapter)
2074{
2075	device_t	dev = adapter->dev;
2076	int		val, rid;
2077
2078	rid = PCIR_BAR(0);
2079	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2080	    &rid, RF_ACTIVE);
2081	if (adapter->res_memory == NULL) {
2082		device_printf(dev, "Unable to allocate bus resource: memory\n");
2083		return (ENXIO);
2084	}
2085	adapter->osdep.mem_bus_space_tag =
2086	rman_get_bustag(adapter->res_memory);
2087	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2088	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2089
2090	if (adapter->hw.mac_type > em_82543) {
2091		/* Figure our where our IO BAR is ? */
2092		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2093			val = pci_read_config(dev, rid, 4);
2094			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2095				adapter->io_rid = rid;
2096				break;
2097			}
2098			rid += 4;
2099			/* check for 64bit BAR */
2100			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2101				rid += 4;
2102		}
2103		if (rid >= PCIR_CIS) {
2104			device_printf(dev, "Unable to locate IO BAR\n");
2105			return (ENXIO);
2106		}
2107		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2108		    &adapter->io_rid, RF_ACTIVE);
2109		if (adapter->res_ioport == NULL) {
2110			device_printf(dev, "Unable to allocate bus resource: "
2111			    "ioport\n");
2112			return (ENXIO);
2113		}
2114		adapter->hw.io_base = 0;
2115		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2116		adapter->osdep.io_bus_space_handle =
2117		    rman_get_bushandle(adapter->res_ioport);
2118	}
2119
2120	/* For ICH8 we need to find the flash memory. */
2121	if (adapter->hw.mac_type == em_ich8lan) {
2122		rid = EM_FLASH;
2123
2124		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2125		    &rid, RF_ACTIVE);
2126		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2127		adapter->osdep.flash_bus_space_handle =
2128		    rman_get_bushandle(adapter->flash_mem);
2129	}
2130
2131	rid = 0x0;
2132	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2133	    RF_SHAREABLE | RF_ACTIVE);
2134	if (adapter->res_interrupt == NULL) {
2135		device_printf(dev, "Unable to allocate bus resource: "
2136		    "interrupt\n");
2137		return (ENXIO);
2138	}
2139
2140	adapter->hw.back = &adapter->osdep;
2141
2142	return (0);
2143}
2144
2145int
2146em_allocate_intr(struct adapter *adapter)
2147{
2148	device_t dev = adapter->dev;
2149	int error;
2150
2151	/* Manually turn off all interrupts */
2152	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2153
2154#ifdef DEVICE_POLLING
2155	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2156	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2157	    &adapter->int_handler_tag)) != 0) {
2158		device_printf(dev, "Failed to register interrupt handler");
2159		return (error);
2160	}
2161#else
2162	/*
2163	 * Try allocating a fast interrupt and the associated deferred
2164	 * processing contexts.
2165	 */
2166	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2167	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2168	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2169	    taskqueue_thread_enqueue, &adapter->tq);
2170	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2171	    device_get_nameunit(adapter->dev));
2172	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2173	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2174	    &adapter->int_handler_tag)) != 0) {
2175		device_printf(dev, "Failed to register fast interrupt "
2176			    "handler: %d\n", error);
2177		taskqueue_free(adapter->tq);
2178		adapter->tq = NULL;
2179		return (error);
2180	}
2181#endif
2182
2183	em_enable_intr(adapter);
2184	return (0);
2185}
2186
2187static void
2188em_free_intr(struct adapter *adapter)
2189{
2190	device_t dev = adapter->dev;
2191
2192	if (adapter->int_handler_tag != NULL) {
2193		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2194		adapter->int_handler_tag = NULL;
2195	}
2196	if (adapter->tq != NULL) {
2197		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2198		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2199		taskqueue_free(adapter->tq);
2200		adapter->tq = NULL;
2201	}
2202}
2203
2204static void
2205em_free_pci_resources(struct adapter *adapter)
2206{
2207	device_t dev = adapter->dev;
2208
2209	if (adapter->res_interrupt != NULL)
2210		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2211
2212	if (adapter->res_memory != NULL)
2213		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2214		    adapter->res_memory);
2215
2216	if (adapter->flash_mem != NULL)
2217		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2218		    adapter->flash_mem);
2219
2220	if (adapter->res_ioport != NULL)
2221		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2222		    adapter->res_ioport);
2223}
2224
2225/*********************************************************************
2226 *
2227 *  Initialize the hardware to a configuration as specified by the
2228 *  adapter structure. The controller is reset, the EEPROM is
2229 *  verified, the MAC address is set, then the shared initialization
2230 *  routines are called.
2231 *
2232 **********************************************************************/
2233static int
2234em_hardware_init(struct adapter *adapter)
2235{
2236	device_t dev = adapter->dev;
2237	uint16_t rx_buffer_size;
2238
2239	INIT_DEBUGOUT("em_hardware_init: begin");
2240	/* Issue a global reset */
2241	em_reset_hw(&adapter->hw);
2242
2243	/* When hardware is reset, fifo_head is also reset */
2244	adapter->tx_fifo_head = 0;
2245
2246	/* Make sure we have a good EEPROM before we read from it */
2247	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2248		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2249		return (EIO);
2250	}
2251
2252	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2253		device_printf(dev, "EEPROM read error while reading part "
2254		    "number\n");
2255		return (EIO);
2256	}
2257
2258	/* Set up smart power down as default off on newer adapters. */
2259	if (!em_smart_pwr_down &&
2260	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2261		uint16_t phy_tmp = 0;
2262
2263		/* Speed up time to link by disabling smart power down. */
2264		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2265		phy_tmp &= ~IGP02E1000_PM_SPD;
2266		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2267	}
2268
2269	/*
2270	 * These parameters control the automatic generation (Tx) and
2271	 * response (Rx) to Ethernet PAUSE frames.
2272	 * - High water mark should allow for at least two frames to be
2273	 *   received after sending an XOFF.
2274	 * - Low water mark works best when it is very near the high water mark.
2275	 *   This allows the receiver to restart by sending XON when it has
2276	 *   drained a bit. Here we use an arbitary value of 1500 which will
2277	 *   restart after one full frame is pulled from the buffer. There
2278	 *   could be several smaller frames in the buffer and if so they will
2279	 *   not trigger the XON until their total number reduces the buffer
2280	 *   by 1500.
2281	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2282	 */
2283	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2284
2285	adapter->hw.fc_high_water = rx_buffer_size -
2286	    roundup2(adapter->hw.max_frame_size, 1024);
2287	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2288	if (adapter->hw.mac_type == em_80003es2lan)
2289		adapter->hw.fc_pause_time = 0xFFFF;
2290	else
2291		adapter->hw.fc_pause_time = 0x1000;
2292	adapter->hw.fc_send_xon = TRUE;
2293	adapter->hw.fc = em_fc_full;
2294
2295	if (em_init_hw(&adapter->hw) < 0) {
2296		device_printf(dev, "Hardware Initialization Failed");
2297		return (EIO);
2298	}
2299
2300	em_check_for_link(&adapter->hw);
2301
2302	return (0);
2303}
2304
2305/*********************************************************************
2306 *
2307 *  Setup networking device structure and register an interface.
2308 *
2309 **********************************************************************/
2310static void
2311em_setup_interface(device_t dev, struct adapter *adapter)
2312{
2313	struct ifnet   *ifp;
2314	INIT_DEBUGOUT("em_setup_interface: begin");
2315
2316	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2317	if (ifp == NULL)
2318		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2319	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2320	ifp->if_mtu = ETHERMTU;
2321	ifp->if_init =  em_init;
2322	ifp->if_softc = adapter;
2323	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2324	ifp->if_ioctl = em_ioctl;
2325	ifp->if_start = em_start;
2326	ifp->if_watchdog = em_watchdog;
2327	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2328	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2329	IFQ_SET_READY(&ifp->if_snd);
2330
2331	ether_ifattach(ifp, adapter->hw.mac_addr);
2332
2333	ifp->if_capabilities = ifp->if_capenable = 0;
2334
2335	if (adapter->hw.mac_type >= em_82543) {
2336		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2337		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2338	}
2339
2340	/* Enable TSO if available */
2341	if ((adapter->hw.mac_type > em_82544) &&
2342	    (adapter->hw.mac_type != em_82547)) {
2343		ifp->if_capabilities |= IFCAP_TSO;
2344		ifp->if_capenable |= IFCAP_TSO;
2345	}
2346
2347	/*
2348	 * Tell the upper layer(s) we support long frames.
2349	 */
2350	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2351	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2352	ifp->if_capenable |= IFCAP_VLAN_MTU;
2353
2354#ifdef DEVICE_POLLING
2355	ifp->if_capabilities |= IFCAP_POLLING;
2356#endif
2357
2358	/*
2359	 * Specify the media types supported by this adapter and register
2360	 * callbacks to update media and link information
2361	 */
2362	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2363	    em_media_status);
2364	if ((adapter->hw.media_type == em_media_type_fiber) ||
2365	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2366		u_char fiber_type = IFM_1000_SX;	/* default type; */
2367
2368		if (adapter->hw.mac_type == em_82545)
2369			fiber_type = IFM_1000_LX;
2370		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2371		    0, NULL);
2372		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2373	} else {
2374		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2375		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2376			    0, NULL);
2377		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2378			    0, NULL);
2379		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2380			    0, NULL);
2381		if (adapter->hw.phy_type != em_phy_ife) {
2382			ifmedia_add(&adapter->media,
2383				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2384			ifmedia_add(&adapter->media,
2385				IFM_ETHER | IFM_1000_T, 0, NULL);
2386		}
2387	}
2388	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2389	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2390}
2391
2392
2393/*********************************************************************
2394 *
2395 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2396 *
2397 **********************************************************************/
2398static void
2399em_smartspeed(struct adapter *adapter)
2400{
2401	uint16_t phy_tmp;
2402
2403	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2404	    adapter->hw.autoneg == 0 ||
2405	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2406		return;
2407
2408	if (adapter->smartspeed == 0) {
2409		/* If Master/Slave config fault is asserted twice,
2410		 * we assume back-to-back */
2411		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2412		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2413			return;
2414		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2415		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2416			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2417			if(phy_tmp & CR_1000T_MS_ENABLE) {
2418				phy_tmp &= ~CR_1000T_MS_ENABLE;
2419				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2420				    phy_tmp);
2421				adapter->smartspeed++;
2422				if(adapter->hw.autoneg &&
2423				   !em_phy_setup_autoneg(&adapter->hw) &&
2424				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2425				    &phy_tmp)) {
2426					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2427						    MII_CR_RESTART_AUTO_NEG);
2428					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2429					    phy_tmp);
2430				}
2431			}
2432		}
2433		return;
2434	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2435		/* If still no link, perhaps using 2/3 pair cable */
2436		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2437		phy_tmp |= CR_1000T_MS_ENABLE;
2438		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2439		if(adapter->hw.autoneg &&
2440		   !em_phy_setup_autoneg(&adapter->hw) &&
2441		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2442			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2443				    MII_CR_RESTART_AUTO_NEG);
2444			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2445		}
2446	}
2447	/* Restart process after EM_SMARTSPEED_MAX iterations */
2448	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2449		adapter->smartspeed = 0;
2450}
2451
2452
2453/*
2454 * Manage DMA'able memory.
2455 */
2456static void
2457em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2458{
2459	if (error)
2460		return;
2461	*(bus_addr_t *) arg = segs[0].ds_addr;
2462}
2463
2464static int
2465em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2466	int mapflags)
2467{
2468	int error;
2469
2470	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2471				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2472				BUS_SPACE_MAXADDR,	/* lowaddr */
2473				BUS_SPACE_MAXADDR,	/* highaddr */
2474				NULL, NULL,		/* filter, filterarg */
2475				size,			/* maxsize */
2476				1,			/* nsegments */
2477				size,			/* maxsegsize */
2478				0,			/* flags */
2479				NULL,			/* lockfunc */
2480				NULL,			/* lockarg */
2481				&dma->dma_tag);
2482	if (error) {
2483		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2484		    __func__, error);
2485		goto fail_0;
2486	}
2487
2488	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2489	    BUS_DMA_NOWAIT, &dma->dma_map);
2490	if (error) {
2491		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2492		    __func__, (uintmax_t)size, error);
2493		goto fail_2;
2494	}
2495
2496	dma->dma_paddr = 0;
2497	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2498	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2499	if (error || dma->dma_paddr == 0) {
2500		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2501		    __func__, error);
2502		goto fail_3;
2503	}
2504
2505	return (0);
2506
2507fail_3:
2508	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2509fail_2:
2510	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2511	bus_dma_tag_destroy(dma->dma_tag);
2512fail_0:
2513	dma->dma_map = NULL;
2514	dma->dma_tag = NULL;
2515
2516	return (error);
2517}
2518
2519static void
2520em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2521{
2522	if (dma->dma_tag == NULL)
2523		return;
2524	if (dma->dma_map != NULL) {
2525		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2526		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2527		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2528		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2529		dma->dma_map = NULL;
2530	}
2531	bus_dma_tag_destroy(dma->dma_tag);
2532	dma->dma_tag = NULL;
2533}
2534
2535
2536/*********************************************************************
2537 *
2538 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2539 *  the information needed to transmit a packet on the wire.
2540 *
2541 **********************************************************************/
2542static int
2543em_allocate_transmit_structures(struct adapter *adapter)
2544{
2545	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2546	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2547	if (adapter->tx_buffer_area == NULL) {
2548		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2549		return (ENOMEM);
2550	}
2551
2552	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2553
2554	return (0);
2555}
2556
2557/*********************************************************************
2558 *
2559 *  Allocate and initialize transmit structures.
2560 *
2561 **********************************************************************/
2562static int
2563em_setup_transmit_structures(struct adapter *adapter)
2564{
2565	struct ifnet   *ifp = adapter->ifp;
2566	device_t dev = adapter->dev;
2567	struct em_buffer *tx_buffer;
2568	bus_size_t size, segsize;
2569	int error, i;
2570
2571	/*
2572	 * Setup DMA descriptor areas.
2573	 */
2574	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2575
2576	/* Overrides for TSO - want large sizes */
2577	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2578		size = EM_TSO_SIZE;
2579		segsize = PAGE_SIZE;
2580	}
2581
2582	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2583				1, 0,			/* alignment, bounds */
2584				BUS_SPACE_MAXADDR,	/* lowaddr */
2585				BUS_SPACE_MAXADDR,	/* highaddr */
2586				NULL, NULL,		/* filter, filterarg */
2587				size,			/* maxsize */
2588				EM_MAX_SCATTER,		/* nsegments */
2589				segsize,		/* maxsegsize */
2590				0,			/* flags */
2591				NULL,		/* lockfunc */
2592				NULL,		/* lockarg */
2593				&adapter->txtag)) != 0) {
2594		device_printf(dev, "Unable to allocate TX DMA tag\n");
2595		goto fail;
2596	}
2597
2598	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2599		goto fail;
2600
2601	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2602	tx_buffer = adapter->tx_buffer_area;
2603	for (i = 0; i < adapter->num_tx_desc; i++) {
2604		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2605		if (error != 0) {
2606			device_printf(dev, "Unable to create TX DMA map\n");
2607			goto fail;
2608		}
2609		tx_buffer++;
2610	}
2611
2612	adapter->next_avail_tx_desc = 0;
2613	adapter->oldest_used_tx_desc = 0;
2614
2615	/* Set number of descriptors available */
2616	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2617
2618	/* Set checksum context */
2619	adapter->active_checksum_context = OFFLOAD_NONE;
2620	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2621	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2622
2623	return (0);
2624
2625fail:
2626	em_free_transmit_structures(adapter);
2627	return (error);
2628}
2629
2630/*********************************************************************
2631 *
2632 *  Enable transmit unit.
2633 *
2634 **********************************************************************/
2635static void
2636em_initialize_transmit_unit(struct adapter *adapter)
2637{
2638	uint32_t	reg_tctl, reg_tarc;
2639	uint32_t	reg_tipg = 0;
2640	uint64_t	bus_addr;
2641
2642	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2643	/* Setup the Base and Length of the Tx Descriptor Ring */
2644	bus_addr = adapter->txdma.dma_paddr;
2645	E1000_WRITE_REG(&adapter->hw, TDLEN,
2646	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2647	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2648	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2649
2650	/* Setup the HW Tx Head and Tail descriptor pointers */
2651	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2652	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2653
2654
2655	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2656	    E1000_READ_REG(&adapter->hw, TDLEN));
2657
2658	/* Set the default values for the Tx Inter Packet Gap timer */
2659	switch (adapter->hw.mac_type) {
2660	case em_82542_rev2_0:
2661	case em_82542_rev2_1:
2662		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2663		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2664		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2665		break;
2666	case em_80003es2lan:
2667		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2668		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2669		    E1000_TIPG_IPGR2_SHIFT;
2670		break;
2671	default:
2672		if ((adapter->hw.media_type == em_media_type_fiber) ||
2673		    (adapter->hw.media_type == em_media_type_internal_serdes))
2674			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2675		else
2676			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2677		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2678		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2679	}
2680
2681	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2682	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2683	if(adapter->hw.mac_type >= em_82540)
2684		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2685
2686	/* Do adapter specific tweaks before we enable the transmitter. */
2687	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2688		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2689		reg_tarc |= (1 << 25);
2690		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2691		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2692		reg_tarc |= (1 << 25);
2693		reg_tarc &= ~(1 << 28);
2694		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2695	} else if (adapter->hw.mac_type == em_80003es2lan) {
2696		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2697		reg_tarc |= 1;
2698		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2699		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2700		reg_tarc |= 1;
2701		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2702	}
2703
2704	/* Program the Transmit Control Register */
2705	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2706		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2707	if (adapter->hw.mac_type >= em_82571)
2708		reg_tctl |= E1000_TCTL_MULR;
2709	if (adapter->link_duplex == FULL_DUPLEX) {
2710		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2711	} else {
2712		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2713	}
2714	/* This write will effectively turn on the transmit unit. */
2715	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2716
2717	/* Setup Transmit Descriptor Settings for this adapter */
2718	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2719
2720	if (adapter->tx_int_delay.value > 0)
2721		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2722}
2723
2724/*********************************************************************
2725 *
2726 *  Free all transmit related data structures.
2727 *
2728 **********************************************************************/
2729static void
2730em_free_transmit_structures(struct adapter *adapter)
2731{
2732	struct em_buffer *tx_buffer;
2733	int i;
2734
2735	INIT_DEBUGOUT("free_transmit_structures: begin");
2736
2737	if (adapter->tx_buffer_area != NULL) {
2738		tx_buffer = adapter->tx_buffer_area;
2739		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2740			if (tx_buffer->m_head != NULL) {
2741				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2742				    BUS_DMASYNC_POSTWRITE);
2743				bus_dmamap_unload(adapter->txtag,
2744				    tx_buffer->map);
2745				m_freem(tx_buffer->m_head);
2746				tx_buffer->m_head = NULL;
2747			} else if (tx_buffer->map != NULL)
2748				bus_dmamap_unload(adapter->txtag,
2749				    tx_buffer->map);
2750			if (tx_buffer->map != NULL) {
2751				bus_dmamap_destroy(adapter->txtag,
2752				    tx_buffer->map);
2753				tx_buffer->map = NULL;
2754			}
2755		}
2756	}
2757	if (adapter->tx_buffer_area != NULL) {
2758		free(adapter->tx_buffer_area, M_DEVBUF);
2759		adapter->tx_buffer_area = NULL;
2760	}
2761	if (adapter->txtag != NULL) {
2762		bus_dma_tag_destroy(adapter->txtag);
2763		adapter->txtag = NULL;
2764	}
2765}
2766
2767/*********************************************************************
2768 *
2769 *  The offload context needs to be set when we transfer the first
2770 *  packet of a particular protocol (TCP/UDP). We change the
2771 *  context only if the protocol type changes.
2772 *
2773 **********************************************************************/
2774static void
2775em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2776    uint32_t *txd_upper, uint32_t *txd_lower)
2777{
2778	struct em_context_desc *TXD;
2779	struct em_buffer *tx_buffer;
2780	int curr_txd;
2781
2782	if (mp->m_pkthdr.csum_flags) {
2783
2784		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2785			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2786			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2787			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2788				return;
2789			else
2790				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2791
2792		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2793			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2794			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2795			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2796				return;
2797			else
2798				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2799		} else {
2800			*txd_upper = 0;
2801			*txd_lower = 0;
2802			return;
2803		}
2804	} else {
2805		*txd_upper = 0;
2806		*txd_lower = 0;
2807		return;
2808	}
2809
2810	/* If we reach this point, the checksum offload context
2811	 * needs to be reset.
2812	 */
2813	curr_txd = adapter->next_avail_tx_desc;
2814	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2815	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2816
2817	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2818	TXD->lower_setup.ip_fields.ipcso =
2819		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2820	TXD->lower_setup.ip_fields.ipcse =
2821		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2822
2823	TXD->upper_setup.tcp_fields.tucss =
2824		ETHER_HDR_LEN + sizeof(struct ip);
2825	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2826
2827	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2828		TXD->upper_setup.tcp_fields.tucso =
2829			ETHER_HDR_LEN + sizeof(struct ip) +
2830			offsetof(struct tcphdr, th_sum);
2831	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2832		TXD->upper_setup.tcp_fields.tucso =
2833			ETHER_HDR_LEN + sizeof(struct ip) +
2834			offsetof(struct udphdr, uh_sum);
2835	}
2836
2837	TXD->tcp_seg_setup.data = htole32(0);
2838	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2839
2840	tx_buffer->m_head = NULL;
2841
2842	if (++curr_txd == adapter->num_tx_desc)
2843		curr_txd = 0;
2844
2845	adapter->num_tx_desc_avail--;
2846	adapter->next_avail_tx_desc = curr_txd;
2847}
2848
2849/**********************************************************************
2850 *
2851 *  Setup work for hardware segmentation offload (TSO)
2852 *
2853 **********************************************************************/
2854static boolean_t
2855em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2856   uint32_t *txd_lower)
2857{
2858	struct em_context_desc *TXD;
2859	struct em_buffer *tx_buffer;
2860	struct ip *ip;
2861	struct tcphdr *th;
2862	int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2863
2864	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2865	    (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2866		return FALSE;
2867	}
2868
2869	*txd_lower = (E1000_TXD_CMD_DEXT |
2870		      E1000_TXD_DTYP_D |
2871		      E1000_TXD_CMD_TSE);
2872
2873	*txd_upper = (E1000_TXD_POPTS_IXSM |
2874		      E1000_TXD_POPTS_TXSM) << 8;
2875
2876	curr_txd = adapter->next_avail_tx_desc;
2877	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2878	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2879
2880	mp->m_data += sizeof(struct ether_header);
2881	ip = mtod(mp, struct ip *);
2882	ip->ip_len = 0;
2883	ip->ip_sum = 0;
2884	ip_hlen = ip->ip_hl << 2 ;
2885	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2886	tcp_hlen = th->th_off << 2;
2887
2888	hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2889	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2890	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2891
2892	mp->m_data -= sizeof(struct ether_header);
2893	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2894	TXD->lower_setup.ip_fields.ipcso =
2895		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2896	TXD->lower_setup.ip_fields.ipcse =
2897		htole16(ETHER_HDR_LEN + ip_hlen - 1);
2898
2899	TXD->upper_setup.tcp_fields.tucss =
2900		ETHER_HDR_LEN + ip_hlen;
2901	TXD->upper_setup.tcp_fields.tucse = 0;
2902	TXD->upper_setup.tcp_fields.tucso =
2903		ETHER_HDR_LEN + ip_hlen +
2904		offsetof(struct tcphdr, th_sum);
2905	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2906	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2907	TXD->cmd_and_length = htole32(adapter->txd_cmd |
2908				E1000_TXD_CMD_DEXT |
2909				E1000_TXD_CMD_TSE |
2910				E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2911				(mp->m_pkthdr.len - (hdr_len)));
2912
2913	tx_buffer->m_head = NULL;
2914
2915	if (++curr_txd == adapter->num_tx_desc)
2916		curr_txd = 0;
2917
2918	adapter->num_tx_desc_avail--;
2919	adapter->next_avail_tx_desc = curr_txd;
2920	adapter->tx_tso = TRUE;
2921
2922	return TRUE;
2923}
2924
2925/**********************************************************************
2926 *
2927 *  Examine each tx_buffer in the used queue. If the hardware is done
2928 *  processing the packet then free associated resources. The
2929 *  tx_buffer is put back on the free queue.
2930 *
2931 **********************************************************************/
2932static void
2933em_txeof(struct adapter *adapter)
2934{
2935	int i, num_avail;
2936	struct em_buffer *tx_buffer;
2937	struct em_tx_desc   *tx_desc;
2938	struct ifnet   *ifp = adapter->ifp;
2939
2940	EM_LOCK_ASSERT(adapter);
2941
2942	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2943		return;
2944
2945	num_avail = adapter->num_tx_desc_avail;
2946	i = adapter->oldest_used_tx_desc;
2947
2948	tx_buffer = &adapter->tx_buffer_area[i];
2949	tx_desc = &adapter->tx_desc_base[i];
2950
2951	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2952	    BUS_DMASYNC_POSTREAD);
2953	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2954
2955		tx_desc->upper.data = 0;
2956		num_avail++;
2957
2958		if (tx_buffer->m_head) {
2959			ifp->if_opackets++;
2960			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2961			    BUS_DMASYNC_POSTWRITE);
2962			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2963
2964			m_freem(tx_buffer->m_head);
2965			tx_buffer->m_head = NULL;
2966		}
2967
2968		if (++i == adapter->num_tx_desc)
2969			i = 0;
2970
2971		tx_buffer = &adapter->tx_buffer_area[i];
2972		tx_desc = &adapter->tx_desc_base[i];
2973	}
2974	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2975	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2976
2977	adapter->oldest_used_tx_desc = i;
2978
2979	/*
2980	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2981	 * that it is OK to send packets.
2982	 * If there are no pending descriptors, clear the timeout. Otherwise,
2983	 * if some descriptors have been freed, restart the timeout.
2984	 */
2985	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2986		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2987		if (num_avail == adapter->num_tx_desc)
2988			ifp->if_timer = 0;
2989		else if (num_avail != adapter->num_tx_desc_avail)
2990			ifp->if_timer = EM_TX_TIMEOUT;
2991	}
2992	adapter->num_tx_desc_avail = num_avail;
2993}
2994
2995/*********************************************************************
2996 *
2997 *  Get a buffer from system mbuf buffer pool.
2998 *
2999 **********************************************************************/
3000static int
3001em_get_buf(struct adapter *adapter, int i)
3002{
3003	struct mbuf		*m;
3004	bus_dma_segment_t	segs[1];
3005	bus_dmamap_t		map;
3006	struct em_buffer	*rx_buffer;
3007	int			error, nsegs;
3008
3009	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3010	if (m == NULL) {
3011		adapter->mbuf_cluster_failed++;
3012		return (ENOBUFS);
3013	}
3014	m->m_len = m->m_pkthdr.len = MCLBYTES;
3015	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3016		m_adj(m, ETHER_ALIGN);
3017
3018	/*
3019	 * Using memory from the mbuf cluster pool, invoke the
3020	 * bus_dma machinery to arrange the memory mapping.
3021	 */
3022	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3023	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3024	if (error != 0) {
3025		m_free(m);
3026		return (error);
3027	}
3028	/* If nsegs is wrong then the stack is corrupt. */
3029	KASSERT(nsegs == 1, ("Too many segments returned!"));
3030
3031	rx_buffer = &adapter->rx_buffer_area[i];
3032	if (rx_buffer->m_head != NULL)
3033		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3034
3035	map = rx_buffer->map;
3036	rx_buffer->map = adapter->rx_sparemap;
3037	adapter->rx_sparemap = map;
3038	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3039	rx_buffer->m_head = m;
3040
3041	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3042
3043	return (0);
3044}
3045
3046/*********************************************************************
3047 *
3048 *  Allocate memory for rx_buffer structures. Since we use one
3049 *  rx_buffer per received packet, the maximum number of rx_buffer's
3050 *  that we'll need is equal to the number of receive descriptors
3051 *  that we've allocated.
3052 *
3053 **********************************************************************/
3054static int
3055em_allocate_receive_structures(struct adapter *adapter)
3056{
3057	device_t dev = adapter->dev;
3058	struct em_buffer *rx_buffer;
3059	int i, error;
3060
3061	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3062	    M_DEVBUF, M_NOWAIT);
3063	if (adapter->rx_buffer_area == NULL) {
3064		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3065		return (ENOMEM);
3066	}
3067
3068	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3069
3070	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3071				1, 0,			/* alignment, bounds */
3072				BUS_SPACE_MAXADDR,	/* lowaddr */
3073				BUS_SPACE_MAXADDR,	/* highaddr */
3074				NULL, NULL,		/* filter, filterarg */
3075				MCLBYTES,		/* maxsize */
3076				1,			/* nsegments */
3077				MCLBYTES,		/* maxsegsize */
3078				0,			/* flags */
3079				NULL,			/* lockfunc */
3080				NULL,			/* lockarg */
3081				&adapter->rxtag);
3082	if (error) {
3083		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3084		    __func__, error);
3085		goto fail;
3086	}
3087
3088	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3089	    &adapter->rx_sparemap);
3090	if (error) {
3091		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3092		    __func__, error);
3093		goto fail;
3094	}
3095	rx_buffer = adapter->rx_buffer_area;
3096	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3097		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3098		    &rx_buffer->map);
3099		if (error) {
3100			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3101			    __func__, error);
3102			goto fail;
3103		}
3104	}
3105
3106	for (i = 0; i < adapter->num_rx_desc; i++) {
3107		error = em_get_buf(adapter, i);
3108		if (error)
3109			goto fail;
3110	}
3111	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3112	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3113
3114	return (0);
3115
3116fail:
3117	em_free_receive_structures(adapter);
3118	return (error);
3119}
3120
3121/*********************************************************************
3122 *
3123 *  Allocate and initialize receive structures.
3124 *
3125 **********************************************************************/
3126static int
3127em_setup_receive_structures(struct adapter *adapter)
3128{
3129	int error;
3130
3131	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3132
3133	if ((error = em_allocate_receive_structures(adapter)) != 0)
3134		return (error);
3135
3136	/* Setup our descriptor pointers */
3137	adapter->next_rx_desc_to_check = 0;
3138
3139	return (0);
3140}
3141
3142/*********************************************************************
3143 *
3144 *  Enable receive unit.
3145 *
3146 **********************************************************************/
3147static void
3148em_initialize_receive_unit(struct adapter *adapter)
3149{
3150	struct ifnet	*ifp = adapter->ifp;
3151	uint64_t	bus_addr;
3152	uint32_t	reg_rctl;
3153	uint32_t	reg_rxcsum;
3154
3155	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3156
3157	/*
3158	 * Make sure receives are disabled while setting
3159	 * up the descriptor ring
3160	 */
3161	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3162
3163	/* Set the Receive Delay Timer Register */
3164	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3165
3166	if(adapter->hw.mac_type >= em_82540) {
3167		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3168
3169		/*
3170		 * Set the interrupt throttling rate. Value is calculated
3171		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3172		 */
3173#define MAX_INTS_PER_SEC	8000
3174#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3175		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3176	}
3177
3178	/* Setup the Base and Length of the Rx Descriptor Ring */
3179	bus_addr = adapter->rxdma.dma_paddr;
3180	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3181			sizeof(struct em_rx_desc));
3182	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3183	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3184
3185	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3186	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3187	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3188
3189	/* Setup the Receive Control Register */
3190	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3191		   E1000_RCTL_RDMTS_HALF |
3192		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3193
3194	if (adapter->hw.tbi_compatibility_on == TRUE)
3195		reg_rctl |= E1000_RCTL_SBP;
3196
3197
3198	switch (adapter->rx_buffer_len) {
3199	default:
3200	case EM_RXBUFFER_2048:
3201		reg_rctl |= E1000_RCTL_SZ_2048;
3202		break;
3203	case EM_RXBUFFER_4096:
3204		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3205		break;
3206	case EM_RXBUFFER_8192:
3207		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3208		break;
3209	case EM_RXBUFFER_16384:
3210		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3211		break;
3212	}
3213
3214	if (ifp->if_mtu > ETHERMTU)
3215		reg_rctl |= E1000_RCTL_LPE;
3216
3217	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3218	if ((adapter->hw.mac_type >= em_82543) &&
3219	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3220		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3221		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3222		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3223	}
3224
3225	/* Enable Receives */
3226	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3227}
3228
3229/*********************************************************************
3230 *
3231 *  Free receive related data structures.
3232 *
3233 **********************************************************************/
3234static void
3235em_free_receive_structures(struct adapter *adapter)
3236{
3237	struct em_buffer *rx_buffer;
3238	int i;
3239
3240	INIT_DEBUGOUT("free_receive_structures: begin");
3241
3242	if (adapter->rx_sparemap) {
3243		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3244		adapter->rx_sparemap = NULL;
3245	}
3246	if (adapter->rx_buffer_area != NULL) {
3247		rx_buffer = adapter->rx_buffer_area;
3248		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3249			if (rx_buffer->m_head != NULL) {
3250				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3251				    BUS_DMASYNC_POSTREAD);
3252				bus_dmamap_unload(adapter->rxtag,
3253				    rx_buffer->map);
3254				m_freem(rx_buffer->m_head);
3255				rx_buffer->m_head = NULL;
3256			} else if (rx_buffer->map != NULL)
3257				bus_dmamap_unload(adapter->rxtag,
3258				    rx_buffer->map);
3259			if (rx_buffer->map != NULL) {
3260				bus_dmamap_destroy(adapter->rxtag,
3261				    rx_buffer->map);
3262				rx_buffer->map = NULL;
3263			}
3264		}
3265	}
3266	if (adapter->rx_buffer_area != NULL) {
3267		free(adapter->rx_buffer_area, M_DEVBUF);
3268		adapter->rx_buffer_area = NULL;
3269	}
3270	if (adapter->rxtag != NULL) {
3271		bus_dma_tag_destroy(adapter->rxtag);
3272		adapter->rxtag = NULL;
3273	}
3274}
3275
3276/*********************************************************************
3277 *
3278 *  This routine executes in interrupt context. It replenishes
3279 *  the mbufs in the descriptor and sends data which has been
3280 *  dma'ed into host memory to upper layer.
3281 *
3282 *  We loop at most count times if count is > 0, or until done if
3283 *  count < 0.
3284 *
3285 *********************************************************************/
3286static int
3287em_rxeof(struct adapter *adapter, int count)
3288{
3289	struct ifnet	*ifp;
3290	struct mbuf	*mp;
3291	uint8_t		accept_frame = 0;
3292	uint8_t		eop = 0;
3293	uint16_t 	len, desc_len, prev_len_adj;
3294	int		i;
3295
3296	/* Pointer to the receive descriptor being examined. */
3297	struct em_rx_desc   *current_desc;
3298	uint8_t		status;
3299
3300	ifp = adapter->ifp;
3301	i = adapter->next_rx_desc_to_check;
3302	current_desc = &adapter->rx_desc_base[i];
3303	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3304	    BUS_DMASYNC_POSTREAD);
3305
3306	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3307		return (0);
3308
3309	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3310	    (count != 0) &&
3311	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3312		struct mbuf *m = NULL;
3313
3314		mp = adapter->rx_buffer_area[i].m_head;
3315		/*
3316		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3317		 * needs to access the last received byte in the mbuf.
3318		 */
3319		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3320		    BUS_DMASYNC_POSTREAD);
3321
3322		accept_frame = 1;
3323		prev_len_adj = 0;
3324		desc_len = le16toh(current_desc->length);
3325		status = current_desc->status;
3326		if (status & E1000_RXD_STAT_EOP) {
3327			count--;
3328			eop = 1;
3329			if (desc_len < ETHER_CRC_LEN) {
3330				len = 0;
3331				prev_len_adj = ETHER_CRC_LEN - desc_len;
3332			} else
3333				len = desc_len - ETHER_CRC_LEN;
3334		} else {
3335			eop = 0;
3336			len = desc_len;
3337		}
3338
3339		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3340			uint8_t		last_byte;
3341			uint32_t	pkt_len = desc_len;
3342
3343			if (adapter->fmp != NULL)
3344				pkt_len += adapter->fmp->m_pkthdr.len;
3345
3346			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3347			if (TBI_ACCEPT(&adapter->hw, status,
3348			    current_desc->errors, pkt_len, last_byte)) {
3349				em_tbi_adjust_stats(&adapter->hw,
3350				    &adapter->stats, pkt_len,
3351				    adapter->hw.mac_addr);
3352				if (len > 0)
3353					len--;
3354			} else
3355				accept_frame = 0;
3356		}
3357
3358		if (accept_frame) {
3359			if (em_get_buf(adapter, i) != 0) {
3360				ifp->if_iqdrops++;
3361				goto discard;
3362			}
3363
3364			/* Assign correct length to the current fragment */
3365			mp->m_len = len;
3366
3367			if (adapter->fmp == NULL) {
3368				mp->m_pkthdr.len = len;
3369				adapter->fmp = mp; /* Store the first mbuf */
3370				adapter->lmp = mp;
3371			} else {
3372				/* Chain mbuf's together */
3373				mp->m_flags &= ~M_PKTHDR;
3374				/*
3375				 * Adjust length of previous mbuf in chain if
3376				 * we received less than 4 bytes in the last
3377				 * descriptor.
3378				 */
3379				if (prev_len_adj > 0) {
3380					adapter->lmp->m_len -= prev_len_adj;
3381					adapter->fmp->m_pkthdr.len -=
3382					    prev_len_adj;
3383				}
3384				adapter->lmp->m_next = mp;
3385				adapter->lmp = adapter->lmp->m_next;
3386				adapter->fmp->m_pkthdr.len += len;
3387			}
3388
3389			if (eop) {
3390				adapter->fmp->m_pkthdr.rcvif = ifp;
3391				ifp->if_ipackets++;
3392				em_receive_checksum(adapter, current_desc,
3393				    adapter->fmp);
3394#ifndef __NO_STRICT_ALIGNMENT
3395				if (adapter->hw.max_frame_size >
3396				    (MCLBYTES - ETHER_ALIGN) &&
3397				    em_fixup_rx(adapter) != 0)
3398					goto skip;
3399#endif
3400				if (status & E1000_RXD_STAT_VP)
3401					VLAN_INPUT_TAG(ifp, adapter->fmp,
3402					    (le16toh(current_desc->special) &
3403					    E1000_RXD_SPC_VLAN_MASK));
3404#ifndef __NO_STRICT_ALIGNMENT
3405skip:
3406#endif
3407				m = adapter->fmp;
3408				adapter->fmp = NULL;
3409				adapter->lmp = NULL;
3410			}
3411		} else {
3412			ifp->if_ierrors++;
3413discard:
3414			/* Reuse loaded DMA map and just update mbuf chain */
3415			mp = adapter->rx_buffer_area[i].m_head;
3416			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3417			mp->m_data = mp->m_ext.ext_buf;
3418			mp->m_next = NULL;
3419			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3420				m_adj(mp, ETHER_ALIGN);
3421			if (adapter->fmp != NULL) {
3422				m_freem(adapter->fmp);
3423				adapter->fmp = NULL;
3424				adapter->lmp = NULL;
3425			}
3426			m = NULL;
3427		}
3428
3429		/* Zero out the receive descriptors status. */
3430		current_desc->status = 0;
3431		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3432		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3433
3434		/* Advance our pointers to the next descriptor. */
3435		if (++i == adapter->num_rx_desc)
3436			i = 0;
3437		if (m != NULL) {
3438			adapter->next_rx_desc_to_check = i;
3439#ifdef DEVICE_POLLING
3440			EM_UNLOCK(adapter);
3441			(*ifp->if_input)(ifp, m);
3442			EM_LOCK(adapter);
3443#else
3444			(*ifp->if_input)(ifp, m);
3445#endif
3446			i = adapter->next_rx_desc_to_check;
3447		}
3448		current_desc = &adapter->rx_desc_base[i];
3449	}
3450	adapter->next_rx_desc_to_check = i;
3451
3452	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3453	if (--i < 0)
3454		i = adapter->num_rx_desc - 1;
3455	E1000_WRITE_REG(&adapter->hw, RDT, i);
3456	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3457		return (0);
3458
3459	return (1);
3460}
3461
3462#ifndef __NO_STRICT_ALIGNMENT
3463/*
3464 * When jumbo frames are enabled we should realign entire payload on
3465 * architecures with strict alignment. This is serious design mistake of 8254x
3466 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3467 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3468 * payload. On architecures without strict alignment restrictions 8254x still
3469 * performs unaligned memory access which would reduce the performance too.
3470 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3471 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3472 * existing mbuf chain.
3473 *
3474 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3475 * not used at all on architectures with strict alignment.
3476 */
3477static int
3478em_fixup_rx(struct adapter *adapter)
3479{
3480	struct mbuf *m, *n;
3481	int error;
3482
3483	error = 0;
3484	m = adapter->fmp;
3485	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3486		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3487		m->m_data += ETHER_HDR_LEN;
3488	} else {
3489		MGETHDR(n, M_DONTWAIT, MT_DATA);
3490		if (n != NULL) {
3491			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3492			m->m_data += ETHER_HDR_LEN;
3493			m->m_len -= ETHER_HDR_LEN;
3494			n->m_len = ETHER_HDR_LEN;
3495			M_MOVE_PKTHDR(n, m);
3496			n->m_next = m;
3497			adapter->fmp = n;
3498		} else {
3499			adapter->ifp->if_iqdrops++;
3500			adapter->mbuf_alloc_failed++;
3501			m_freem(adapter->fmp);
3502			adapter->fmp = NULL;
3503			adapter->lmp = NULL;
3504			error = ENOBUFS;
3505		}
3506	}
3507
3508	return (error);
3509}
3510#endif
3511
3512/*********************************************************************
3513 *
3514 *  Verify that the hardware indicated that the checksum is valid.
3515 *  Inform the stack about the status of checksum so that stack
3516 *  doesn't spend time verifying the checksum.
3517 *
3518 *********************************************************************/
3519static void
3520em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3521		    struct mbuf *mp)
3522{
3523	/* 82543 or newer only */
3524	if ((adapter->hw.mac_type < em_82543) ||
3525	    /* Ignore Checksum bit is set */
3526	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3527		mp->m_pkthdr.csum_flags = 0;
3528		return;
3529	}
3530
3531	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3532		/* Did it pass? */
3533		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3534			/* IP Checksum Good */
3535			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3536			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3537
3538		} else {
3539			mp->m_pkthdr.csum_flags = 0;
3540		}
3541	}
3542
3543	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3544		/* Did it pass? */
3545		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3546			mp->m_pkthdr.csum_flags |=
3547			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3548			mp->m_pkthdr.csum_data = htons(0xffff);
3549		}
3550	}
3551}
3552
3553
3554static void
3555em_enable_vlans(struct adapter *adapter)
3556{
3557	uint32_t ctrl;
3558
3559	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3560
3561	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3562	ctrl |= E1000_CTRL_VME;
3563	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3564}
3565
3566static void
3567em_disable_vlans(struct adapter *adapter)
3568{
3569	uint32_t ctrl;
3570
3571	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3572	ctrl &= ~E1000_CTRL_VME;
3573	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3574}
3575
3576static void
3577em_enable_intr(struct adapter *adapter)
3578{
3579	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3580}
3581
3582static void
3583em_disable_intr(struct adapter *adapter)
3584{
3585	/*
3586	 * The first version of 82542 had an errata where when link was forced
3587	 * it would stay up even up even if the cable was disconnected.
3588	 * Sequence errors were used to detect the disconnect and then the
3589	 * driver would unforce the link. This code in the in the ISR. For this
3590	 * to work correctly the Sequence error interrupt had to be enabled
3591	 * all the time.
3592	 */
3593
3594	if (adapter->hw.mac_type == em_82542_rev2_0)
3595	    E1000_WRITE_REG(&adapter->hw, IMC,
3596		(0xffffffff & ~E1000_IMC_RXSEQ));
3597	else
3598	    E1000_WRITE_REG(&adapter->hw, IMC,
3599		0xffffffff);
3600}
3601
3602static int
3603em_is_valid_ether_addr(uint8_t *addr)
3604{
3605	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3606
3607	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3608		return (FALSE);
3609	}
3610
3611	return (TRUE);
3612}
3613
3614void
3615em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3616{
3617	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3618}
3619
3620void
3621em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3622{
3623	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3624}
3625
3626void
3627em_pci_set_mwi(struct em_hw *hw)
3628{
3629	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3630	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3631}
3632
3633void
3634em_pci_clear_mwi(struct em_hw *hw)
3635{
3636	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3637	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3638}
3639
3640/*********************************************************************
3641* 82544 Coexistence issue workaround.
3642*    There are 2 issues.
3643*       1. Transmit Hang issue.
3644*    To detect this issue, following equation can be used...
3645*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3646*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3647*
3648*       2. DAC issue.
3649*    To detect this issue, following equation can be used...
3650*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3651*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3652*
3653*
3654*    WORKAROUND:
3655*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3656*
3657*** *********************************************************************/
3658static uint32_t
3659em_fill_descriptors (bus_addr_t address, uint32_t length,
3660		PDESC_ARRAY desc_array)
3661{
3662	/* Since issue is sensitive to length and address.*/
3663	/* Let us first check the address...*/
3664	uint32_t safe_terminator;
3665	if (length <= 4) {
3666		desc_array->descriptor[0].address = address;
3667		desc_array->descriptor[0].length = length;
3668		desc_array->elements = 1;
3669		return (desc_array->elements);
3670	}
3671	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3672	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3673	if (safe_terminator == 0   ||
3674	(safe_terminator > 4   &&
3675	safe_terminator < 9)   ||
3676	(safe_terminator > 0xC &&
3677	safe_terminator <= 0xF)) {
3678		desc_array->descriptor[0].address = address;
3679		desc_array->descriptor[0].length = length;
3680		desc_array->elements = 1;
3681		return (desc_array->elements);
3682	}
3683
3684	desc_array->descriptor[0].address = address;
3685	desc_array->descriptor[0].length = length - 4;
3686	desc_array->descriptor[1].address = address + (length - 4);
3687	desc_array->descriptor[1].length = 4;
3688	desc_array->elements = 2;
3689	return (desc_array->elements);
3690}
3691
3692/**********************************************************************
3693 *
3694 *  Update the board statistics counters.
3695 *
3696 **********************************************************************/
3697static void
3698em_update_stats_counters(struct adapter *adapter)
3699{
3700	struct ifnet   *ifp;
3701
3702	if(adapter->hw.media_type == em_media_type_copper ||
3703	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3704		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3705		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3706	}
3707	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3708	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3709	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3710	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3711
3712	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3713	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3714	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3715	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3716	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3717	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3718	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3719	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3720	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3721	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3722	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3723	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3724	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3725	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3726	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3727	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3728	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3729	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3730	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3731	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3732
3733	/* For the 64-bit byte counters the low dword must be read first. */
3734	/* Both registers clear on the read of the high dword */
3735
3736	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3737	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3738	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3739	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3740
3741	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3742	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3743	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3744	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3745	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3746
3747	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3748	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3749	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3750	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3751
3752	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3753	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3754	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3755	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3756	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3757	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3758	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3759	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3760	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3761	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3762
3763	if (adapter->hw.mac_type >= em_82543) {
3764		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3765		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3766		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3767		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3768		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3769		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3770	}
3771	ifp = adapter->ifp;
3772
3773	ifp->if_collisions = adapter->stats.colc;
3774
3775	/* Rx Errors */
3776	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3777	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3778	    adapter->stats.mpc + adapter->stats.cexterr;
3779
3780	/* Tx Errors */
3781	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3782	    adapter->watchdog_events;
3783}
3784
3785
3786/**********************************************************************
3787 *
3788 *  This routine is called only when em_display_debug_stats is enabled.
3789 *  This routine provides a way to take a look at important statistics
3790 *  maintained by the driver and hardware.
3791 *
3792 **********************************************************************/
3793static void
3794em_print_debug_info(struct adapter *adapter)
3795{
3796	device_t dev = adapter->dev;
3797	uint8_t *hw_addr = adapter->hw.hw_addr;
3798
3799	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3800	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3801	    E1000_READ_REG(&adapter->hw, CTRL),
3802	    E1000_READ_REG(&adapter->hw, RCTL));
3803	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3804	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3805	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3806	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3807	    adapter->hw.fc_high_water,
3808	    adapter->hw.fc_low_water);
3809	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3810	    E1000_READ_REG(&adapter->hw, TIDV),
3811	    E1000_READ_REG(&adapter->hw, TADV));
3812	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3813	    E1000_READ_REG(&adapter->hw, RDTR),
3814	    E1000_READ_REG(&adapter->hw, RADV));
3815	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3816	    (long long)adapter->tx_fifo_wrk_cnt,
3817	    (long long)adapter->tx_fifo_reset_cnt);
3818	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3819	    E1000_READ_REG(&adapter->hw, TDH),
3820	    E1000_READ_REG(&adapter->hw, TDT));
3821	device_printf(dev, "Num Tx descriptors avail = %d\n",
3822	    adapter->num_tx_desc_avail);
3823	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3824	    adapter->no_tx_desc_avail1);
3825	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3826	    adapter->no_tx_desc_avail2);
3827	device_printf(dev, "Std mbuf failed = %ld\n",
3828	    adapter->mbuf_alloc_failed);
3829	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3830	    adapter->mbuf_cluster_failed);
3831}
3832
3833static void
3834em_print_hw_stats(struct adapter *adapter)
3835{
3836	device_t dev = adapter->dev;
3837
3838	device_printf(dev, "Excessive collisions = %lld\n",
3839	    (long long)adapter->stats.ecol);
3840	device_printf(dev, "Symbol errors = %lld\n",
3841	    (long long)adapter->stats.symerrs);
3842	device_printf(dev, "Sequence errors = %lld\n",
3843	    (long long)adapter->stats.sec);
3844	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3845
3846	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3847	device_printf(dev, "Receive No Buffers = %lld\n",
3848	    (long long)adapter->stats.rnbc);
3849	/* RLEC is inaccurate on some hardware, calculate our own. */
3850	device_printf(dev, "Receive Length Errors = %lld\n",
3851	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3852	device_printf(dev, "Receive errors = %lld\n",
3853	    (long long)adapter->stats.rxerrc);
3854	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3855	device_printf(dev, "Alignment errors = %lld\n",
3856	    (long long)adapter->stats.algnerrc);
3857	device_printf(dev, "Carrier extension errors = %lld\n",
3858	    (long long)adapter->stats.cexterr);
3859	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3860	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3861
3862	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3863	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3864	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3865	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3866
3867	device_printf(dev, "Good Packets Rcvd = %lld\n",
3868	    (long long)adapter->stats.gprc);
3869	device_printf(dev, "Good Packets Xmtd = %lld\n",
3870	    (long long)adapter->stats.gptc);
3871	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3872	    (long long)adapter->stats.tsctc);
3873	device_printf(dev, "TSO Contexts Failed = %lld\n",
3874	    (long long)adapter->stats.tsctfc);
3875}
3876
3877static int
3878em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3879{
3880	struct adapter *adapter;
3881	int error;
3882	int result;
3883
3884	result = -1;
3885	error = sysctl_handle_int(oidp, &result, 0, req);
3886
3887	if (error || !req->newptr)
3888		return (error);
3889
3890	if (result == 1) {
3891		adapter = (struct adapter *)arg1;
3892		em_print_debug_info(adapter);
3893	}
3894
3895	return (error);
3896}
3897
3898
3899static int
3900em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3901{
3902	struct adapter *adapter;
3903	int error;
3904	int result;
3905
3906	result = -1;
3907	error = sysctl_handle_int(oidp, &result, 0, req);
3908
3909	if (error || !req->newptr)
3910		return (error);
3911
3912	if (result == 1) {
3913		adapter = (struct adapter *)arg1;
3914		em_print_hw_stats(adapter);
3915	}
3916
3917	return (error);
3918}
3919
3920static int
3921em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3922{
3923	struct em_int_delay_info *info;
3924	struct adapter *adapter;
3925	uint32_t regval;
3926	int error;
3927	int usecs;
3928	int ticks;
3929
3930	info = (struct em_int_delay_info *)arg1;
3931	usecs = info->value;
3932	error = sysctl_handle_int(oidp, &usecs, 0, req);
3933	if (error != 0 || req->newptr == NULL)
3934		return (error);
3935	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3936		return (EINVAL);
3937	info->value = usecs;
3938	ticks = E1000_USECS_TO_TICKS(usecs);
3939
3940	adapter = info->adapter;
3941
3942	EM_LOCK(adapter);
3943	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3944	regval = (regval & ~0xffff) | (ticks & 0xffff);
3945	/* Handle a few special cases. */
3946	switch (info->offset) {
3947	case E1000_RDTR:
3948	case E1000_82542_RDTR:
3949		regval |= E1000_RDT_FPDB;
3950		break;
3951	case E1000_TIDV:
3952	case E1000_82542_TIDV:
3953		if (ticks == 0) {
3954			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3955			/* Don't write 0 into the TIDV register. */
3956			regval++;
3957		} else
3958			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3959		break;
3960	}
3961	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3962	EM_UNLOCK(adapter);
3963	return (0);
3964}
3965
3966static void
3967em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3968	const char *description, struct em_int_delay_info *info,
3969	int offset, int value)
3970{
3971	info->adapter = adapter;
3972	info->offset = offset;
3973	info->value = value;
3974	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3975	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3976	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3977	    info, 0, em_sysctl_int_delay, "I", description);
3978}
3979
3980#ifndef DEVICE_POLLING
3981static void
3982em_add_int_process_limit(struct adapter *adapter, const char *name,
3983	const char *description, int *limit, int value)
3984{
3985	*limit = value;
3986	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3987	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3988	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3989}
3990#endif
3991