if_em.c revision 164534
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 164534 2006-11-23 00:52:52Z kmacy $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.2.9";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159
160	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
164
165	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
183
184	/* required last entry */
185	{ 0, 0, 0, 0, 0}
186};
187
188/*********************************************************************
189 *  Table of branding strings for all supported NICs.
190 *********************************************************************/
191
192static char *em_strings[] = {
193	"Intel(R) PRO/1000 Network Connection"
194};
195
196/*********************************************************************
197 *  Function prototypes
198 *********************************************************************/
199static int	em_probe(device_t);
200static int	em_attach(device_t);
201static int	em_detach(device_t);
202static int	em_shutdown(device_t);
203static int	em_suspend(device_t);
204static int	em_resume(device_t);
205static void	em_start(struct ifnet *);
206static void	em_start_locked(struct ifnet *ifp);
207static int	em_ioctl(struct ifnet *, u_long, caddr_t);
208static void	em_watchdog(struct adapter *);
209static void	em_init(void *);
210static void	em_init_locked(struct adapter *);
211static void	em_stop(void *);
212static void	em_media_status(struct ifnet *, struct ifmediareq *);
213static int	em_media_change(struct ifnet *);
214static void	em_identify_hardware(struct adapter *);
215static int	em_allocate_pci_resources(struct adapter *);
216static int	em_allocate_intr(struct adapter *);
217static void	em_free_intr(struct adapter *);
218static void	em_free_pci_resources(struct adapter *);
219static void	em_local_timer(void *);
220static int	em_hardware_init(struct adapter *);
221static void	em_setup_interface(device_t, struct adapter *);
222static void	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_setup_receive_structures(struct adapter *);
225static void	em_initialize_receive_unit(struct adapter *);
226static void	em_enable_intr(struct adapter *);
227static void	em_disable_intr(struct adapter *);
228static void	em_free_transmit_structures(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_update_stats_counters(struct adapter *);
231static void	em_txeof(struct adapter *);
232static int	em_allocate_receive_structures(struct adapter *);
233static int	em_allocate_transmit_structures(struct adapter *);
234static int	em_rxeof(struct adapter *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct adapter *);
237#endif
238static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
239		    struct mbuf *);
240static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
241		    uint32_t *, uint32_t *);
242static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
243		    uint32_t *, uint32_t *);
244static void	em_set_promisc(struct adapter *);
245static void	em_disable_promisc(struct adapter *);
246static void	em_set_multi(struct adapter *);
247static void	em_print_hw_stats(struct adapter *);
248static void	em_update_link_status(struct adapter *);
249static int	em_get_buf(struct adapter *, int);
250static void	em_enable_vlans(struct adapter *);
251static int	em_encap(struct adapter *, struct mbuf **);
252static void	em_smartspeed(struct adapter *);
253static int	em_82547_fifo_workaround(struct adapter *, int);
254static void	em_82547_update_fifo_head(struct adapter *, int);
255static int	em_82547_tx_fifo_reset(struct adapter *);
256static void	em_82547_move_tail(void *arg);
257static int	em_dma_malloc(struct adapter *, bus_size_t,
258		struct em_dma_alloc *, int);
259static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260static void	em_print_debug_info(struct adapter *);
261static int 	em_is_valid_ether_addr(uint8_t *);
262static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
263static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
264static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
265		    PDESC_ARRAY desc_array);
266static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
267static void	em_add_int_delay_sysctl(struct adapter *, const char *,
268		const char *, struct em_int_delay_info *, int, int);
269
270/*
271 * Fast interrupt handler and legacy ithread/polling modes are
272 * mutually exclusive.
273 */
274#ifdef DEVICE_POLLING
275static poll_handler_t em_poll;
276static void	em_intr(void *);
277#else
278static void	em_intr_fast(void *);
279static void	em_add_int_process_limit(struct adapter *, const char *,
280		const char *, int *, int);
281static void	em_handle_rxtx(void *context, int pending);
282static void	em_handle_link(void *context, int pending);
283#endif
284
285/*********************************************************************
286 *  FreeBSD Device Interface Entry Points
287 *********************************************************************/
288
289static device_method_t em_methods[] = {
290	/* Device interface */
291	DEVMETHOD(device_probe, em_probe),
292	DEVMETHOD(device_attach, em_attach),
293	DEVMETHOD(device_detach, em_detach),
294	DEVMETHOD(device_shutdown, em_shutdown),
295	DEVMETHOD(device_suspend, em_suspend),
296	DEVMETHOD(device_resume, em_resume),
297	{0, 0}
298};
299
300static driver_t em_driver = {
301	"em", em_methods, sizeof(struct adapter),
302};
303
304static devclass_t em_devclass;
305DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
306MODULE_DEPEND(em, pci, 1, 1, 1);
307MODULE_DEPEND(em, ether, 1, 1, 1);
308
309/*********************************************************************
310 *  Tunable default values.
311 *********************************************************************/
312
313#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
314#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
315#define M_TSO_LEN			66
316
317static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
318static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
319static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
320static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
321static int em_rxd = EM_DEFAULT_RXD;
322static int em_txd = EM_DEFAULT_TXD;
323static int em_smart_pwr_down = FALSE;
324
325TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
326TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
327TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
329TUNABLE_INT("hw.em.rxd", &em_rxd);
330TUNABLE_INT("hw.em.txd", &em_txd);
331TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
332#ifndef DEVICE_POLLING
333static int em_rx_process_limit = 100;
334TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
335#endif
336
337/*********************************************************************
338 *  Device identification routine
339 *
340 *  em_probe determines if the driver should be loaded on
341 *  adapter based on PCI vendor/device id of the adapter.
342 *
343 *  return BUS_PROBE_DEFAULT on success, positive on failure
344 *********************************************************************/
345
346static int
347em_probe(device_t dev)
348{
349	char		adapter_name[60];
350	uint16_t	pci_vendor_id = 0;
351	uint16_t	pci_device_id = 0;
352	uint16_t	pci_subvendor_id = 0;
353	uint16_t	pci_subdevice_id = 0;
354	em_vendor_info_t *ent;
355
356	INIT_DEBUGOUT("em_probe: begin");
357
358	pci_vendor_id = pci_get_vendor(dev);
359	if (pci_vendor_id != EM_VENDOR_ID)
360		return (ENXIO);
361
362	pci_device_id = pci_get_device(dev);
363	pci_subvendor_id = pci_get_subvendor(dev);
364	pci_subdevice_id = pci_get_subdevice(dev);
365
366	ent = em_vendor_info_array;
367	while (ent->vendor_id != 0) {
368		if ((pci_vendor_id == ent->vendor_id) &&
369		    (pci_device_id == ent->device_id) &&
370
371		    ((pci_subvendor_id == ent->subvendor_id) ||
372		    (ent->subvendor_id == PCI_ANY_ID)) &&
373
374		    ((pci_subdevice_id == ent->subdevice_id) ||
375		    (ent->subdevice_id == PCI_ANY_ID))) {
376			sprintf(adapter_name, "%s %s",
377				em_strings[ent->index],
378				em_driver_version);
379			device_set_desc_copy(dev, adapter_name);
380			return (BUS_PROBE_DEFAULT);
381		}
382		ent++;
383	}
384
385	return (ENXIO);
386}
387
388/*********************************************************************
389 *  Device initialization routine
390 *
391 *  The attach entry point is called when the driver is being loaded.
392 *  This routine identifies the type of hardware, allocates all resources
393 *  and initializes the hardware.
394 *
395 *  return 0 on success, positive on failure
396 *********************************************************************/
397
398static int
399em_attach(device_t dev)
400{
401	struct adapter	*adapter;
402	int		tsize, rsize;
403	int		error = 0;
404
405	INIT_DEBUGOUT("em_attach: begin");
406
407	adapter = device_get_softc(dev);
408	adapter->dev = adapter->osdep.dev = dev;
409	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
410
411	/* SYSCTL stuff */
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_debug_info, "I", "Debug Information");
416
417	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
418	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
419	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
420	    em_sysctl_stats, "I", "Statistics");
421
422	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
423	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
424
425	/* Determine hardware revision */
426	em_identify_hardware(adapter);
427
428	/* Set up some sysctls for the tunable interrupt delays */
429	em_add_int_delay_sysctl(adapter, "rx_int_delay",
430	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
431	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
432	em_add_int_delay_sysctl(adapter, "tx_int_delay",
433	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
434	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
435	if (adapter->hw.mac_type >= em_82540) {
436		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
437		    "receive interrupt delay limit in usecs",
438		    &adapter->rx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, RADV),
440		    em_rx_abs_int_delay_dflt);
441		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
442		    "transmit interrupt delay limit in usecs",
443		    &adapter->tx_abs_int_delay,
444		    E1000_REG_OFFSET(&adapter->hw, TADV),
445		    em_tx_abs_int_delay_dflt);
446	}
447
448#ifndef DEVICE_POLLING
449	/* Sysctls for limiting the amount of work done in the taskqueue */
450	em_add_int_process_limit(adapter, "rx_processing_limit",
451	    "max number of rx packets to process", &adapter->rx_process_limit,
452	    em_rx_process_limit);
453#endif
454
455	/*
456	 * Validate number of transmit and receive descriptors. It
457	 * must not exceed hardware maximum, and must be multiple
458	 * of EM_DBA_ALIGN.
459	 */
460	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
461	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
462	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
463	    (em_txd < EM_MIN_TXD)) {
464		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
465		    EM_DEFAULT_TXD, em_txd);
466		adapter->num_tx_desc = EM_DEFAULT_TXD;
467	} else
468		adapter->num_tx_desc = em_txd;
469	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
470	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
471	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
472	    (em_rxd < EM_MIN_RXD)) {
473		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474		    EM_DEFAULT_RXD, em_rxd);
475		adapter->num_rx_desc = EM_DEFAULT_RXD;
476	} else
477		adapter->num_rx_desc = em_rxd;
478
479	adapter->hw.autoneg = DO_AUTO_NEG;
480	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
481	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482	adapter->hw.tbi_compatibility_en = TRUE;
483	adapter->rx_buffer_len = EM_RXBUFFER_2048;
484
485	adapter->hw.phy_init_script = 1;
486	adapter->hw.phy_reset_disable = FALSE;
487
488#ifndef EM_MASTER_SLAVE
489	adapter->hw.master_slave = em_ms_hw_default;
490#else
491	adapter->hw.master_slave = EM_MASTER_SLAVE;
492#endif
493	/*
494	 * Set the max frame size assuming standard ethernet
495	 * sized frames.
496	 */
497	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
498
499	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
500
501	/*
502	 * This controls when hardware reports transmit completion
503	 * status.
504	 */
505	adapter->hw.report_tx_early = 1;
506	if (em_allocate_pci_resources(adapter)) {
507		device_printf(dev, "Allocation of PCI resources failed\n");
508		error = ENXIO;
509		goto err_pci;
510	}
511
512	/* Initialize eeprom parameters */
513	em_init_eeprom_params(&adapter->hw);
514
515	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
516	    EM_DBA_ALIGN);
517
518	/* Allocate Transmit Descriptor ring */
519	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
520		device_printf(dev, "Unable to allocate tx_desc memory\n");
521		error = ENOMEM;
522		goto err_tx_desc;
523	}
524	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
525
526	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
527	    EM_DBA_ALIGN);
528
529	/* Allocate Receive Descriptor ring */
530	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
531		device_printf(dev, "Unable to allocate rx_desc memory\n");
532		error = ENOMEM;
533		goto err_rx_desc;
534	}
535	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
536
537	/* Initialize the hardware */
538	if (em_hardware_init(adapter)) {
539		device_printf(dev, "Unable to initialize the hardware\n");
540		error = EIO;
541		goto err_hw_init;
542	}
543
544	/* Copy the permanent MAC address out of the EEPROM */
545	if (em_read_mac_addr(&adapter->hw) < 0) {
546		device_printf(dev, "EEPROM read error while reading MAC"
547		    " address\n");
548		error = EIO;
549		goto err_hw_init;
550	}
551
552	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
553		device_printf(dev, "Invalid MAC address\n");
554		error = EIO;
555		goto err_hw_init;
556	}
557
558	/* Allocate transmit descriptors and buffers */
559	if (em_allocate_transmit_structures(adapter)) {
560		device_printf(dev, "Could not setup transmit structures\n");
561		error = ENOMEM;
562		goto err_tx_struct;
563	}
564
565	/* Allocate receive descriptors and buffers */
566	if (em_allocate_receive_structures(adapter)) {
567		device_printf(dev, "Could not setup receive structures\n");
568		error = ENOMEM;
569		goto err_rx_struct;
570	}
571
572	/* Setup OS specific network interface */
573	em_setup_interface(dev, adapter);
574
575	em_allocate_intr(adapter);
576
577	/* Initialize statistics */
578	em_clear_hw_cntrs(&adapter->hw);
579	em_update_stats_counters(adapter);
580	adapter->hw.get_link_status = 1;
581	em_update_link_status(adapter);
582
583	/* Indicate SOL/IDER usage */
584	if (em_check_phy_reset_block(&adapter->hw))
585		device_printf(dev,
586		    "PHY reset is blocked due to SOL/IDER session.\n");
587
588	/* Identify 82544 on PCIX */
589	em_get_bus_info(&adapter->hw);
590	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
591		adapter->pcix_82544 = TRUE;
592	else
593		adapter->pcix_82544 = FALSE;
594
595	INIT_DEBUGOUT("em_attach: end");
596
597	return (0);
598
599err_rx_struct:
600	em_free_transmit_structures(adapter);
601err_tx_struct:
602err_hw_init:
603	em_dma_free(adapter, &adapter->rxdma);
604err_rx_desc:
605	em_dma_free(adapter, &adapter->txdma);
606err_tx_desc:
607err_pci:
608	em_free_intr(adapter);
609	em_free_pci_resources(adapter);
610	EM_LOCK_DESTROY(adapter);
611
612	return (error);
613}
614
615/*********************************************************************
616 *  Device removal routine
617 *
618 *  The detach entry point is called when the driver is being removed.
619 *  This routine stops the adapter and deallocates all the resources
620 *  that were allocated for driver operation.
621 *
622 *  return 0 on success, positive on failure
623 *********************************************************************/
624
625static int
626em_detach(device_t dev)
627{
628	struct adapter	*adapter = device_get_softc(dev);
629	struct ifnet	*ifp = adapter->ifp;
630
631	INIT_DEBUGOUT("em_detach: begin");
632
633#ifdef DEVICE_POLLING
634	if (ifp->if_capenable & IFCAP_POLLING)
635		ether_poll_deregister(ifp);
636#endif
637
638	em_free_intr(adapter);
639	EM_LOCK(adapter);
640	adapter->in_detach = 1;
641	em_stop(adapter);
642	em_phy_hw_reset(&adapter->hw);
643	EM_UNLOCK(adapter);
644	ether_ifdetach(adapter->ifp);
645
646	callout_drain(&adapter->timer);
647	callout_drain(&adapter->tx_fifo_timer);
648
649	em_free_pci_resources(adapter);
650	bus_generic_detach(dev);
651	if_free(ifp);
652
653	em_free_transmit_structures(adapter);
654	em_free_receive_structures(adapter);
655
656	/* Free Transmit Descriptor ring */
657	if (adapter->tx_desc_base) {
658		em_dma_free(adapter, &adapter->txdma);
659		adapter->tx_desc_base = NULL;
660	}
661
662	/* Free Receive Descriptor ring */
663	if (adapter->rx_desc_base) {
664		em_dma_free(adapter, &adapter->rxdma);
665		adapter->rx_desc_base = NULL;
666	}
667
668	EM_LOCK_DESTROY(adapter);
669
670	return (0);
671}
672
673/*********************************************************************
674 *
675 *  Shutdown entry point
676 *
677 **********************************************************************/
678
679static int
680em_shutdown(device_t dev)
681{
682	struct adapter *adapter = device_get_softc(dev);
683	EM_LOCK(adapter);
684	em_stop(adapter);
685	EM_UNLOCK(adapter);
686	return (0);
687}
688
689/*
690 * Suspend/resume device methods.
691 */
692static int
693em_suspend(device_t dev)
694{
695	struct adapter *adapter = device_get_softc(dev);
696
697	EM_LOCK(adapter);
698	em_stop(adapter);
699	EM_UNLOCK(adapter);
700
701	return bus_generic_suspend(dev);
702}
703
704static int
705em_resume(device_t dev)
706{
707	struct adapter *adapter = device_get_softc(dev);
708	struct ifnet *ifp = adapter->ifp;
709
710	EM_LOCK(adapter);
711	em_init_locked(adapter);
712	if ((ifp->if_flags & IFF_UP) &&
713	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
714		em_start_locked(ifp);
715	EM_UNLOCK(adapter);
716
717	return bus_generic_resume(dev);
718}
719
720
721/*********************************************************************
722 *  Transmit entry point
723 *
724 *  em_start is called by the stack to initiate a transmit.
725 *  The driver will remain in this routine as long as there are
726 *  packets to transmit and transmit resources are available.
727 *  In case resources are not available stack is notified and
728 *  the packet is requeued.
729 **********************************************************************/
730
731static void
732em_start_locked(struct ifnet *ifp)
733{
734	struct adapter	*adapter = ifp->if_softc;
735	struct mbuf	*m_head;
736
737	EM_LOCK_ASSERT(adapter);
738
739	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
740	    IFF_DRV_RUNNING)
741		return;
742	if (!adapter->link_active)
743		return;
744
745	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
746
747		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
748		if (m_head == NULL)
749			break;
750		/*
751		 * em_encap() can modify our pointer, and or make it NULL on
752		 * failure.  In that event, we can't requeue.
753		 */
754		if (em_encap(adapter, &m_head)) {
755			if (m_head == NULL)
756				break;
757			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
758			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
759			break;
760		}
761
762		/* Send a copy of the frame to the BPF listener */
763		ETHER_BPF_MTAP(ifp, m_head);
764
765		/* Set timeout in case hardware has problems transmitting. */
766		adapter->watchdog_timer = EM_TX_TIMEOUT;
767	}
768}
769
770static void
771em_start(struct ifnet *ifp)
772{
773	struct adapter *adapter = ifp->if_softc;
774
775	EM_LOCK(adapter);
776	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
777		em_start_locked(ifp);
778	EM_UNLOCK(adapter);
779}
780
781/*********************************************************************
782 *  Ioctl entry point
783 *
784 *  em_ioctl is called when the user wants to configure the
785 *  interface.
786 *
787 *  return 0 on success, positive on failure
788 **********************************************************************/
789
790static int
791em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
792{
793	struct adapter	*adapter = ifp->if_softc;
794	struct ifreq *ifr = (struct ifreq *)data;
795	struct ifaddr *ifa = (struct ifaddr *)data;
796	int error = 0;
797
798	if (adapter->in_detach)
799		return (error);
800
801	switch (command) {
802	case SIOCSIFADDR:
803	case SIOCGIFADDR:
804		if (ifa->ifa_addr->sa_family == AF_INET) {
805			/*
806			 * XXX
807			 * Since resetting hardware takes a very long time
808			 * and results in link renegotiation we only
809			 * initialize the hardware only when it is absolutely
810			 * required.
811			 */
812			ifp->if_flags |= IFF_UP;
813			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
814				EM_LOCK(adapter);
815				em_init_locked(adapter);
816				EM_UNLOCK(adapter);
817			}
818			arp_ifinit(ifp, ifa);
819		} else
820			error = ether_ioctl(ifp, command, data);
821		break;
822	case SIOCSIFMTU:
823	    {
824		int max_frame_size;
825		uint16_t eeprom_data = 0;
826
827		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
828
829		EM_LOCK(adapter);
830		switch (adapter->hw.mac_type) {
831		case em_82573:
832			/*
833			 * 82573 only supports jumbo frames
834			 * if ASPM is disabled.
835			 */
836			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
837			    &eeprom_data);
838			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
839				max_frame_size = ETHER_MAX_LEN;
840				break;
841			}
842			/* Allow Jumbo frames - fall thru */
843		case em_82571:
844		case em_82572:
845		case em_80003es2lan:	/* Limit Jumbo Frame size */
846			max_frame_size = 9234;
847			break;
848		case em_ich8lan:
849			/* ICH8 does not support jumbo frames */
850			max_frame_size = ETHER_MAX_LEN;
851			break;
852		default:
853			max_frame_size = MAX_JUMBO_FRAME_SIZE;
854		}
855		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
856		    ETHER_CRC_LEN) {
857			EM_UNLOCK(adapter);
858			error = EINVAL;
859			break;
860		}
861
862		ifp->if_mtu = ifr->ifr_mtu;
863		adapter->hw.max_frame_size =
864		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
865		em_init_locked(adapter);
866		EM_UNLOCK(adapter);
867		break;
868	    }
869	case SIOCSIFFLAGS:
870		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
871		EM_LOCK(adapter);
872		if (ifp->if_flags & IFF_UP) {
873			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
874				if ((ifp->if_flags ^ adapter->if_flags) &
875				    IFF_PROMISC) {
876					em_disable_promisc(adapter);
877					em_set_promisc(adapter);
878				}
879			} else
880				em_init_locked(adapter);
881		} else {
882			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
883				em_stop(adapter);
884			}
885		}
886		adapter->if_flags = ifp->if_flags;
887		EM_UNLOCK(adapter);
888		break;
889	case SIOCADDMULTI:
890	case SIOCDELMULTI:
891		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
892		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
893			EM_LOCK(adapter);
894			em_disable_intr(adapter);
895			em_set_multi(adapter);
896			if (adapter->hw.mac_type == em_82542_rev2_0) {
897				em_initialize_receive_unit(adapter);
898			}
899#ifdef DEVICE_POLLING
900			if (!(ifp->if_capenable & IFCAP_POLLING))
901#endif
902				em_enable_intr(adapter);
903			EM_UNLOCK(adapter);
904		}
905		break;
906	case SIOCSIFMEDIA:
907		/* Check SOL/IDER usage */
908		EM_LOCK(adapter);
909		if (em_check_phy_reset_block(&adapter->hw)) {
910			EM_UNLOCK(adapter);
911			device_printf(adapter->dev, "Media change is"
912			    "blocked due to SOL/IDER session.\n");
913			break;
914		}
915		EM_UNLOCK(adapter);
916	case SIOCGIFMEDIA:
917		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
918		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
919		break;
920	case SIOCSIFCAP:
921	    {
922		int mask, reinit;
923
924		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
925		reinit = 0;
926		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
927#ifdef DEVICE_POLLING
928		if (mask & IFCAP_POLLING) {
929			if (ifr->ifr_reqcap & IFCAP_POLLING) {
930				error = ether_poll_register(em_poll, ifp);
931				if (error)
932					return (error);
933				EM_LOCK(adapter);
934				em_disable_intr(adapter);
935				ifp->if_capenable |= IFCAP_POLLING;
936				EM_UNLOCK(adapter);
937			} else {
938				error = ether_poll_deregister(ifp);
939				/* Enable interrupt even in error case */
940				EM_LOCK(adapter);
941				em_enable_intr(adapter);
942				ifp->if_capenable &= ~IFCAP_POLLING;
943				EM_UNLOCK(adapter);
944			}
945		}
946#endif
947		if (mask & IFCAP_HWCSUM) {
948			ifp->if_capenable ^= IFCAP_HWCSUM;
949			reinit = 1;
950		}
951		if (mask & IFCAP_TSO4) {
952			ifp->if_capenable ^= IFCAP_TSO4;
953			reinit = 1;
954		}
955		if (mask & IFCAP_VLAN_HWTAGGING) {
956			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
957			reinit = 1;
958		}
959		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
960			em_init(adapter);
961		VLAN_CAPABILITIES(ifp);
962		break;
963	    }
964	default:
965		error = ether_ioctl(ifp, command, data);
966		break;
967	}
968
969	return (error);
970}
971
972/*********************************************************************
973 *  Watchdog timer:
974 *
975 *  This routine is called from the local timer every second.
976 *  As long as transmit descriptors are being cleaned the value
977 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
978 *  and we then reset the device.
979 *
980 **********************************************************************/
981
982static void
983em_watchdog(struct adapter *adapter)
984{
985
986	EM_LOCK_ASSERT(adapter);
987
988	/*
989	 * The timer is set to 5 every time em_start() queues a packet.
990	 * Then em_txeof() keeps resetting to 5 as long as it cleans at
991	 * least one descriptor.
992	 * Finally, anytime all descriptors are clean the timer is
993	 * set to 0.
994	 */
995	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
996		return;
997
998	/* If we are in this routine because of pause frames, then
999	 * don't reset the hardware.
1000	 */
1001	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
1002		adapter->watchdog_timer = EM_TX_TIMEOUT;
1003		return;
1004	}
1005
1006	if (em_check_for_link(&adapter->hw) == 0)
1007		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1008
1009	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1010	adapter->watchdog_events++;
1011
1012	em_init_locked(adapter);
1013}
1014
1015/*********************************************************************
1016 *  Init entry point
1017 *
1018 *  This routine is used in two ways. It is used by the stack as
1019 *  init entry point in network interface structure. It is also used
1020 *  by the driver as a hw/sw initialization routine to get to a
1021 *  consistent state.
1022 *
1023 *  return 0 on success, positive on failure
1024 **********************************************************************/
1025
1026static void
1027em_init_locked(struct adapter *adapter)
1028{
1029	struct ifnet	*ifp = adapter->ifp;
1030	device_t	dev = adapter->dev;
1031	uint32_t	pba;
1032
1033	INIT_DEBUGOUT("em_init: begin");
1034
1035	EM_LOCK_ASSERT(adapter);
1036
1037	em_stop(adapter);
1038
1039	/*
1040	 * Packet Buffer Allocation (PBA)
1041	 * Writing PBA sets the receive portion of the buffer
1042	 * the remainder is used for the transmit buffer.
1043	 *
1044	 * Devices before the 82547 had a Packet Buffer of 64K.
1045	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1046	 * After the 82547 the buffer was reduced to 40K.
1047	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1048	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1049	 */
1050	switch (adapter->hw.mac_type) {
1051	case em_82547:
1052	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1053		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1054			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1055		else
1056			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1057		adapter->tx_fifo_head = 0;
1058		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1059		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1060		break;
1061	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1062	case em_82571: /* 82571: Total Packet Buffer is 48K */
1063	case em_82572: /* 82572: Total Packet Buffer is 48K */
1064			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1065		break;
1066	case em_82573: /* 82573: Total Packet Buffer is 32K */
1067		/* Jumbo frames not supported */
1068			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1069		break;
1070	case em_ich8lan:
1071		pba = E1000_PBA_8K;
1072		break;
1073	default:
1074		/* Devices before 82547 had a Packet Buffer of 64K.   */
1075		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1076			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1077		else
1078			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1079	}
1080
1081	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1082	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1083
1084	/* Get the latest mac address, User can use a LAA */
1085	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1086
1087	/* Initialize the hardware */
1088	if (em_hardware_init(adapter)) {
1089		device_printf(dev, "Unable to initialize the hardware\n");
1090		return;
1091	}
1092	em_update_link_status(adapter);
1093
1094	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1095		em_enable_vlans(adapter);
1096
1097	ifp->if_hwassist = 0;
1098	if (adapter->hw.mac_type >= em_82543) {
1099		if (ifp->if_capenable & IFCAP_TXCSUM)
1100			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1101		if (ifp->if_capenable & IFCAP_TSO)
1102			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1103	}
1104
1105	/* Prepare transmit descriptors and buffers */
1106	em_setup_transmit_structures(adapter);
1107	em_initialize_transmit_unit(adapter);
1108
1109	/* Setup Multicast table */
1110	em_set_multi(adapter);
1111
1112	/* Prepare receive descriptors and buffers */
1113	if (em_setup_receive_structures(adapter)) {
1114		device_printf(dev, "Could not setup receive structures\n");
1115		em_stop(adapter);
1116		return;
1117	}
1118	em_initialize_receive_unit(adapter);
1119
1120	/* Don't lose promiscuous settings */
1121	em_set_promisc(adapter);
1122
1123	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1124	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1125
1126	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1127	em_clear_hw_cntrs(&adapter->hw);
1128#ifdef DEVICE_POLLING
1129	/*
1130	 * Only enable interrupts if we are not polling, make sure
1131	 * they are off otherwise.
1132	 */
1133	if (ifp->if_capenable & IFCAP_POLLING)
1134		em_disable_intr(adapter);
1135	else
1136#endif /* DEVICE_POLLING */
1137		em_enable_intr(adapter);
1138
1139	/* Don't reset the phy next time init gets called */
1140	adapter->hw.phy_reset_disable = TRUE;
1141}
1142
1143static void
1144em_init(void *arg)
1145{
1146	struct adapter *adapter = arg;
1147
1148	EM_LOCK(adapter);
1149	em_init_locked(adapter);
1150	EM_UNLOCK(adapter);
1151}
1152
1153
1154#ifdef DEVICE_POLLING
1155/*********************************************************************
1156 *
1157 *  Legacy polling routine
1158 *
1159 *********************************************************************/
1160static void
1161em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1162{
1163	struct adapter *adapter = ifp->if_softc;
1164	uint32_t reg_icr;
1165
1166	EM_LOCK(adapter);
1167	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1168		EM_UNLOCK(adapter);
1169		return;
1170	}
1171
1172	if (cmd == POLL_AND_CHECK_STATUS) {
1173		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1174		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1175			callout_stop(&adapter->timer);
1176			adapter->hw.get_link_status = 1;
1177			em_check_for_link(&adapter->hw);
1178			em_update_link_status(adapter);
1179			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1180		}
1181	}
1182	em_rxeof(adapter, count);
1183	em_txeof(adapter);
1184
1185	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1186		em_start_locked(ifp);
1187	EM_UNLOCK(adapter);
1188}
1189
1190/*********************************************************************
1191 *
1192 *  Legacy Interrupt Service routine
1193 *
1194 *********************************************************************/
1195static void
1196em_intr(void *arg)
1197{
1198	struct adapter	*adapter = arg;
1199	struct ifnet	*ifp;
1200	uint32_t	reg_icr;
1201
1202	EM_LOCK(adapter);
1203
1204	ifp = adapter->ifp;
1205
1206	if (ifp->if_capenable & IFCAP_POLLING) {
1207		EM_UNLOCK(adapter);
1208		return;
1209	}
1210
1211	for (;;) {
1212		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1213		if (adapter->hw.mac_type >= em_82571 &&
1214		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1215			break;
1216		else if (reg_icr == 0)
1217			break;
1218
1219		/*
1220		 * XXX: some laptops trigger several spurious interrupts
1221		 * on em(4) when in the resume cycle. The ICR register
1222		 * reports all-ones value in this case. Processing such
1223		 * interrupts would lead to a freeze. I don't know why.
1224		 */
1225		if (reg_icr == 0xffffffff)
1226			break;
1227
1228		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1229			em_rxeof(adapter, -1);
1230			em_txeof(adapter);
1231		}
1232
1233		/* Link status change */
1234		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1235			callout_stop(&adapter->timer);
1236			adapter->hw.get_link_status = 1;
1237			em_check_for_link(&adapter->hw);
1238			em_update_link_status(adapter);
1239			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1240		}
1241
1242		if (reg_icr & E1000_ICR_RXO)
1243			adapter->rx_overruns++;
1244	}
1245
1246	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1247	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248		em_start_locked(ifp);
1249
1250	EM_UNLOCK(adapter);
1251}
1252
1253#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1254
1255static void
1256em_handle_link(void *context, int pending)
1257{
1258	struct adapter	*adapter = context;
1259	struct ifnet *ifp;
1260
1261	ifp = adapter->ifp;
1262
1263	EM_LOCK(adapter);
1264	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1265		EM_UNLOCK(adapter);
1266		return;
1267	}
1268
1269	callout_stop(&adapter->timer);
1270	adapter->hw.get_link_status = 1;
1271	em_check_for_link(&adapter->hw);
1272	em_update_link_status(adapter);
1273	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1274	EM_UNLOCK(adapter);
1275}
1276
1277static void
1278em_handle_rxtx(void *context, int pending)
1279{
1280	struct adapter	*adapter = context;
1281	struct ifnet	*ifp;
1282
1283	NET_LOCK_GIANT();
1284	ifp = adapter->ifp;
1285
1286	/*
1287	 * TODO:
1288	 * It should be possible to run the tx clean loop without the lock.
1289	 */
1290	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1291		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1292			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1293		EM_LOCK(adapter);
1294		em_txeof(adapter);
1295
1296		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1297			em_start_locked(ifp);
1298		EM_UNLOCK(adapter);
1299	}
1300
1301	em_enable_intr(adapter);
1302	NET_UNLOCK_GIANT();
1303}
1304
1305/*********************************************************************
1306 *
1307 *  Fast Interrupt Service routine
1308 *
1309 *********************************************************************/
1310static void
1311em_intr_fast(void *arg)
1312{
1313	struct adapter	*adapter = arg;
1314	struct ifnet	*ifp;
1315	uint32_t	reg_icr;
1316
1317	ifp = adapter->ifp;
1318
1319	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1320
1321	/* Hot eject?  */
1322	if (reg_icr == 0xffffffff)
1323		return;
1324
1325	/* Definitely not our interrupt.  */
1326	if (reg_icr == 0x0)
1327		return;
1328
1329	/*
1330	 * Starting with the 82571 chip, bit 31 should be used to
1331	 * determine whether the interrupt belongs to us.
1332	 */
1333	if (adapter->hw.mac_type >= em_82571 &&
1334	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1335		return;
1336
1337	/*
1338	 * Mask interrupts until the taskqueue is finished running.  This is
1339	 * cheap, just assume that it is needed.  This also works around the
1340	 * MSI message reordering errata on certain systems.
1341	 */
1342	em_disable_intr(adapter);
1343	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1344
1345	/* Link status change */
1346	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1347		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1348
1349	if (reg_icr & E1000_ICR_RXO)
1350		adapter->rx_overruns++;
1351}
1352#endif /* ! DEVICE_POLLING */
1353
1354/*********************************************************************
1355 *
1356 *  Media Ioctl callback
1357 *
1358 *  This routine is called whenever the user queries the status of
1359 *  the interface using ifconfig.
1360 *
1361 **********************************************************************/
1362static void
1363em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1364{
1365	struct adapter *adapter = ifp->if_softc;
1366
1367	INIT_DEBUGOUT("em_media_status: begin");
1368
1369	EM_LOCK(adapter);
1370	em_check_for_link(&adapter->hw);
1371	em_update_link_status(adapter);
1372
1373	ifmr->ifm_status = IFM_AVALID;
1374	ifmr->ifm_active = IFM_ETHER;
1375
1376	if (!adapter->link_active) {
1377		EM_UNLOCK(adapter);
1378		return;
1379	}
1380
1381	ifmr->ifm_status |= IFM_ACTIVE;
1382
1383	if ((adapter->hw.media_type == em_media_type_fiber) ||
1384	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1385		if (adapter->hw.mac_type == em_82545)
1386			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1387		else
1388			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1389	} else {
1390		switch (adapter->link_speed) {
1391		case 10:
1392			ifmr->ifm_active |= IFM_10_T;
1393			break;
1394		case 100:
1395			ifmr->ifm_active |= IFM_100_TX;
1396			break;
1397		case 1000:
1398			ifmr->ifm_active |= IFM_1000_T;
1399			break;
1400		}
1401		if (adapter->link_duplex == FULL_DUPLEX)
1402			ifmr->ifm_active |= IFM_FDX;
1403		else
1404			ifmr->ifm_active |= IFM_HDX;
1405	}
1406	EM_UNLOCK(adapter);
1407}
1408
1409/*********************************************************************
1410 *
1411 *  Media Ioctl callback
1412 *
1413 *  This routine is called when the user changes speed/duplex using
1414 *  media/mediopt option with ifconfig.
1415 *
1416 **********************************************************************/
1417static int
1418em_media_change(struct ifnet *ifp)
1419{
1420	struct adapter *adapter = ifp->if_softc;
1421	struct ifmedia  *ifm = &adapter->media;
1422
1423	INIT_DEBUGOUT("em_media_change: begin");
1424
1425	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1426		return (EINVAL);
1427
1428	EM_LOCK(adapter);
1429	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1430	case IFM_AUTO:
1431		adapter->hw.autoneg = DO_AUTO_NEG;
1432		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1433		break;
1434	case IFM_1000_LX:
1435	case IFM_1000_SX:
1436	case IFM_1000_T:
1437		adapter->hw.autoneg = DO_AUTO_NEG;
1438		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1439		break;
1440	case IFM_100_TX:
1441		adapter->hw.autoneg = FALSE;
1442		adapter->hw.autoneg_advertised = 0;
1443		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1444			adapter->hw.forced_speed_duplex = em_100_full;
1445		else
1446			adapter->hw.forced_speed_duplex = em_100_half;
1447		break;
1448	case IFM_10_T:
1449		adapter->hw.autoneg = FALSE;
1450		adapter->hw.autoneg_advertised = 0;
1451		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1452			adapter->hw.forced_speed_duplex = em_10_full;
1453		else
1454			adapter->hw.forced_speed_duplex = em_10_half;
1455		break;
1456	default:
1457		device_printf(adapter->dev, "Unsupported media type\n");
1458	}
1459
1460	/* As the speed/duplex settings my have changed we need to
1461	 * reset the PHY.
1462	 */
1463	adapter->hw.phy_reset_disable = FALSE;
1464
1465	em_init_locked(adapter);
1466	EM_UNLOCK(adapter);
1467
1468	return (0);
1469}
1470
1471/*********************************************************************
1472 *
1473 *  This routine maps the mbufs to tx descriptors.
1474 *
1475 *  return 0 on success, positive on failure
1476 **********************************************************************/
1477static int
1478em_encap(struct adapter *adapter, struct mbuf **m_headp)
1479{
1480	struct ifnet		*ifp = adapter->ifp;
1481	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1482	bus_dmamap_t		map;
1483	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1484	struct em_tx_desc	*current_tx_desc;
1485	struct mbuf		*m_head;
1486	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1487	int			nsegs, i, j, first, last = 0;
1488	int			error, do_tso, tso_desc = 0;
1489
1490	m_head = *m_headp;
1491	current_tx_desc = NULL;
1492	txd_upper = txd_lower = txd_used = txd_saved = 0;
1493
1494	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1495
1496	/*
1497	 * Force a cleanup if number of TX descriptors
1498	 * available hits the threshold.
1499	 */
1500	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1501		em_txeof(adapter);
1502		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1503			adapter->no_tx_desc_avail1++;
1504			return (ENOBUFS);
1505		}
1506	}
1507
1508	/*
1509	 * TSO workaround:
1510	 *  If an mbuf contains only the IP and TCP header we have
1511	 *  to pull 4 bytes of data into it.
1512	 */
1513	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1514		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1515		*m_headp = m_head;
1516		if (m_head == NULL) {
1517			return (ENOBUFS);
1518		}
1519	}
1520
1521	/*
1522	 * Map the packet for DMA.
1523	 *
1524	 * Capture the first descriptor index,
1525	 * this descriptor will have the index
1526	 * of the EOP which is the only one that
1527	 * now gets a DONE bit writeback.
1528	 */
1529	first = adapter->next_avail_tx_desc;
1530	tx_buffer = &adapter->tx_buffer_area[first];
1531	tx_buffer_mapped = tx_buffer;
1532	map = tx_buffer->map;
1533
1534	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1535	    &nsegs, BUS_DMA_NOWAIT);
1536
1537	/*
1538	 * There are two types of errors we can (try) to handle:
1539	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1540	 *   out of segments.  Defragment the mbuf chain and try again.
1541	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1542	 *   at this point in time.  Defer sending and try again later.
1543	 * All other errors, in particular EINVAL, are fatal and prevent the
1544	 * mbuf chain from ever going through.  Drop it and report error.
1545	 */
1546	if (error == EFBIG) {
1547		struct mbuf *m;
1548
1549		m = m_defrag(*m_headp, M_DONTWAIT);
1550		if (m == NULL) {
1551			/* Assume m_defrag(9) used only m_get(9). */
1552			adapter->mbuf_alloc_failed++;
1553			m_freem(*m_headp);
1554			*m_headp = NULL;
1555			return (ENOBUFS);
1556		}
1557		*m_headp = m;
1558
1559		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1560		    segs, &nsegs, BUS_DMA_NOWAIT);
1561
1562		if (error == ENOMEM) {
1563			adapter->no_tx_dma_setup++;
1564			return (error);
1565		} else if (error != 0) {
1566			adapter->no_tx_dma_setup++;
1567			m_freem(*m_headp);
1568			*m_headp = NULL;
1569			return (error);
1570		}
1571	} else if (error == ENOMEM) {
1572		adapter->no_tx_dma_setup++;
1573		return (error);
1574	} else if (error != 0) {
1575		adapter->no_tx_dma_setup++;
1576		m_freem(*m_headp);
1577		*m_headp = NULL;
1578		return (error);
1579	}
1580
1581	/*
1582	 * TSO Hardware workaround, if this packet is not
1583	 * TSO, and is only a single descriptor long, and
1584	 * it follows a TSO burst, then we need to add a
1585	 * sentinel descriptor to prevent premature writeback.
1586	 */
1587	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1588		if (nsegs == 1)
1589			tso_desc = TRUE;
1590		adapter->tx_tso = FALSE;
1591	}
1592
1593	if (nsegs > adapter->num_tx_desc_avail - 2) {
1594		adapter->no_tx_desc_avail2++;
1595		bus_dmamap_unload(adapter->txtag, map);
1596		return (ENOBUFS);
1597	}
1598	m_head = *m_headp;
1599
1600	/* Do hardware assists */
1601	if (ifp->if_hwassist) {
1602		if (do_tso &&
1603		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1604			/* we need to make a final sentinel transmit desc */
1605			tso_desc = TRUE;
1606		} else
1607			em_transmit_checksum_setup(adapter,  m_head,
1608			    &txd_upper, &txd_lower);
1609	}
1610
1611	i = adapter->next_avail_tx_desc;
1612	if (adapter->pcix_82544)
1613		txd_saved = i;
1614
1615	for (j = 0; j < nsegs; j++) {
1616		bus_size_t seg_len;
1617		bus_addr_t seg_addr;
1618		/* If adapter is 82544 and on PCIX bus. */
1619		if(adapter->pcix_82544) {
1620			DESC_ARRAY	desc_array;
1621			uint32_t	array_elements, counter;
1622
1623			/*
1624			 * Check the Address and Length combination and
1625			 * split the data accordingly
1626			 */
1627			array_elements = em_fill_descriptors(segs[j].ds_addr,
1628			    segs[j].ds_len, &desc_array);
1629			for (counter = 0; counter < array_elements; counter++) {
1630				if (txd_used == adapter->num_tx_desc_avail) {
1631					adapter->next_avail_tx_desc = txd_saved;
1632					adapter->no_tx_desc_avail2++;
1633					bus_dmamap_unload(adapter->txtag, map);
1634					return (ENOBUFS);
1635				}
1636				tx_buffer = &adapter->tx_buffer_area[i];
1637				current_tx_desc = &adapter->tx_desc_base[i];
1638				current_tx_desc->buffer_addr = htole64(
1639					desc_array.descriptor[counter].address);
1640				current_tx_desc->lower.data = htole32(
1641					(adapter->txd_cmd | txd_lower |
1642					(uint16_t)desc_array.descriptor[counter].length));
1643				current_tx_desc->upper.data = htole32((txd_upper));
1644				last = i;
1645				if (++i == adapter->num_tx_desc)
1646					i = 0;
1647
1648				tx_buffer->m_head = NULL;
1649				tx_buffer->next_eop = -1;
1650				txd_used++;
1651			}
1652		} else {
1653			tx_buffer = &adapter->tx_buffer_area[i];
1654			current_tx_desc = &adapter->tx_desc_base[i];
1655			seg_addr = segs[j].ds_addr;
1656			seg_len  = segs[j].ds_len;
1657			/*
1658			** TSO Workaround:
1659			** If this is the last descriptor, we want to
1660			** split it so we have a small final sentinel
1661			*/
1662			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1663				seg_len -= 4;
1664				current_tx_desc->buffer_addr = htole64(seg_addr);
1665				current_tx_desc->lower.data = htole32(
1666				adapter->txd_cmd | txd_lower | seg_len);
1667				current_tx_desc->upper.data =
1668				    htole32(txd_upper);
1669				if (++i == adapter->num_tx_desc)
1670					i = 0;
1671				/* Now make the sentinel */
1672				++txd_used; /* using an extra txd */
1673				current_tx_desc = &adapter->tx_desc_base[i];
1674				tx_buffer = &adapter->tx_buffer_area[i];
1675				current_tx_desc->buffer_addr =
1676					htole64(seg_addr + seg_len);
1677				current_tx_desc->lower.data = htole32(
1678				adapter->txd_cmd | txd_lower | 4);
1679				current_tx_desc->upper.data =
1680				    htole32(txd_upper);
1681				last = i;
1682				if (++i == adapter->num_tx_desc)
1683					i = 0;
1684			} else {
1685				current_tx_desc->buffer_addr = htole64(seg_addr);
1686				current_tx_desc->lower.data = htole32(
1687				adapter->txd_cmd | txd_lower | seg_len);
1688				current_tx_desc->upper.data =
1689				    htole32(txd_upper);
1690				last = i;
1691				if (++i == adapter->num_tx_desc)
1692					i = 0;
1693			}
1694			tx_buffer->m_head = NULL;
1695			tx_buffer->next_eop = -1;
1696		}
1697	}
1698
1699	adapter->next_avail_tx_desc = i;
1700	if (adapter->pcix_82544)
1701		adapter->num_tx_desc_avail -= txd_used;
1702	else {
1703		adapter->num_tx_desc_avail -= nsegs;
1704		if (tso_desc) /* TSO used an extra for sentinel */
1705			adapter->num_tx_desc_avail -= txd_used;
1706	}
1707
1708	if (m_head->m_flags & M_VLANTAG) {
1709		/* Set the vlan id. */
1710		current_tx_desc->upper.fields.special =
1711		    htole16(m_head->m_pkthdr.ether_vtag);
1712
1713		/* Tell hardware to add tag. */
1714		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1715	}
1716
1717	tx_buffer->m_head = m_head;
1718	tx_buffer_mapped->map = tx_buffer->map;
1719	tx_buffer->map = map;
1720	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1721
1722	/*
1723	 * Last Descriptor of Packet
1724	 * needs End Of Packet (EOP)
1725	 * and Report Status (RS)
1726	 */
1727	current_tx_desc->lower.data |=
1728	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1729	/*
1730	 * Keep track in the first buffer which
1731	 * descriptor will be written back
1732	 */
1733	tx_buffer = &adapter->tx_buffer_area[first];
1734	tx_buffer->next_eop = last;
1735
1736	/*
1737	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1738	 * that this frame is available to transmit.
1739	 */
1740	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1741	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1742
1743	if (adapter->hw.mac_type == em_82547 &&
1744	    adapter->link_duplex == HALF_DUPLEX)
1745		em_82547_move_tail(adapter);
1746	else {
1747		E1000_WRITE_REG(&adapter->hw, TDT, i);
1748		if (adapter->hw.mac_type == em_82547)
1749			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1750	}
1751
1752	return (0);
1753}
1754
1755/*********************************************************************
1756 *
1757 * 82547 workaround to avoid controller hang in half-duplex environment.
1758 * The workaround is to avoid queuing a large packet that would span
1759 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1760 * in this case. We do that only when FIFO is quiescent.
1761 *
1762 **********************************************************************/
1763static void
1764em_82547_move_tail(void *arg)
1765{
1766	struct adapter *adapter = arg;
1767	uint16_t hw_tdt;
1768	uint16_t sw_tdt;
1769	struct em_tx_desc *tx_desc;
1770	uint16_t length = 0;
1771	boolean_t eop = 0;
1772
1773	EM_LOCK_ASSERT(adapter);
1774
1775	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1776	sw_tdt = adapter->next_avail_tx_desc;
1777
1778	while (hw_tdt != sw_tdt) {
1779		tx_desc = &adapter->tx_desc_base[hw_tdt];
1780		length += tx_desc->lower.flags.length;
1781		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1782		if(++hw_tdt == adapter->num_tx_desc)
1783			hw_tdt = 0;
1784
1785		if (eop) {
1786			if (em_82547_fifo_workaround(adapter, length)) {
1787				adapter->tx_fifo_wrk_cnt++;
1788				callout_reset(&adapter->tx_fifo_timer, 1,
1789					em_82547_move_tail, adapter);
1790				break;
1791			}
1792			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1793			em_82547_update_fifo_head(adapter, length);
1794			length = 0;
1795		}
1796	}
1797}
1798
1799static int
1800em_82547_fifo_workaround(struct adapter *adapter, int len)
1801{
1802	int fifo_space, fifo_pkt_len;
1803
1804	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1805
1806	if (adapter->link_duplex == HALF_DUPLEX) {
1807		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1808
1809		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1810			if (em_82547_tx_fifo_reset(adapter))
1811				return (0);
1812			else
1813				return (1);
1814		}
1815	}
1816
1817	return (0);
1818}
1819
1820static void
1821em_82547_update_fifo_head(struct adapter *adapter, int len)
1822{
1823	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1824
1825	/* tx_fifo_head is always 16 byte aligned */
1826	adapter->tx_fifo_head += fifo_pkt_len;
1827	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1828		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1829	}
1830}
1831
1832
1833static int
1834em_82547_tx_fifo_reset(struct adapter *adapter)
1835{
1836	uint32_t tctl;
1837
1838	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1839	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1840	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1841	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1842
1843		/* Disable TX unit */
1844		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1845		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1846
1847		/* Reset FIFO pointers */
1848		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1849		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1850		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1851		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1852
1853		/* Re-enable TX unit */
1854		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1855		E1000_WRITE_FLUSH(&adapter->hw);
1856
1857		adapter->tx_fifo_head = 0;
1858		adapter->tx_fifo_reset_cnt++;
1859
1860		return (TRUE);
1861	}
1862	else {
1863		return (FALSE);
1864	}
1865}
1866
1867static void
1868em_set_promisc(struct adapter *adapter)
1869{
1870	struct ifnet	*ifp = adapter->ifp;
1871	uint32_t	reg_rctl;
1872
1873	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1874
1875	if (ifp->if_flags & IFF_PROMISC) {
1876		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1877		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1878	} else if (ifp->if_flags & IFF_ALLMULTI) {
1879		reg_rctl |= E1000_RCTL_MPE;
1880		reg_rctl &= ~E1000_RCTL_UPE;
1881		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1882	}
1883}
1884
1885static void
1886em_disable_promisc(struct adapter *adapter)
1887{
1888	uint32_t	reg_rctl;
1889
1890	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1891
1892	reg_rctl &=  (~E1000_RCTL_UPE);
1893	reg_rctl &=  (~E1000_RCTL_MPE);
1894	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1895}
1896
1897
1898/*********************************************************************
1899 *  Multicast Update
1900 *
1901 *  This routine is called whenever multicast address list is updated.
1902 *
1903 **********************************************************************/
1904
1905static void
1906em_set_multi(struct adapter *adapter)
1907{
1908	struct ifnet	*ifp = adapter->ifp;
1909	struct ifmultiaddr *ifma;
1910	uint32_t reg_rctl = 0;
1911	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1912	int mcnt = 0;
1913
1914	IOCTL_DEBUGOUT("em_set_multi: begin");
1915
1916	if (adapter->hw.mac_type == em_82542_rev2_0) {
1917		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1918		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1919			em_pci_clear_mwi(&adapter->hw);
1920		reg_rctl |= E1000_RCTL_RST;
1921		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1922		msec_delay(5);
1923	}
1924
1925	IF_ADDR_LOCK(ifp);
1926	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1927		if (ifma->ifma_addr->sa_family != AF_LINK)
1928			continue;
1929
1930		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1931			break;
1932
1933		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1934		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1935		mcnt++;
1936	}
1937	IF_ADDR_UNLOCK(ifp);
1938
1939	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1940		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1941		reg_rctl |= E1000_RCTL_MPE;
1942		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1943	} else
1944		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1945
1946	if (adapter->hw.mac_type == em_82542_rev2_0) {
1947		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1948		reg_rctl &= ~E1000_RCTL_RST;
1949		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1950		msec_delay(5);
1951		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1952			em_pci_set_mwi(&adapter->hw);
1953	}
1954}
1955
1956
1957/*********************************************************************
1958 *  Timer routine
1959 *
1960 *  This routine checks for link status and updates statistics.
1961 *
1962 **********************************************************************/
1963
1964static void
1965em_local_timer(void *arg)
1966{
1967	struct adapter	*adapter = arg;
1968	struct ifnet	*ifp = adapter->ifp;
1969
1970	EM_LOCK_ASSERT(adapter);
1971
1972	em_check_for_link(&adapter->hw);
1973	em_update_link_status(adapter);
1974	em_update_stats_counters(adapter);
1975	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1976		em_print_hw_stats(adapter);
1977	em_smartspeed(adapter);
1978	/*
1979	 * Each second we check the watchdog to
1980	 * protect against hardware hangs.
1981	 */
1982	em_watchdog(adapter);
1983
1984	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1985}
1986
1987static void
1988em_update_link_status(struct adapter *adapter)
1989{
1990	struct ifnet *ifp = adapter->ifp;
1991	device_t dev = adapter->dev;
1992
1993	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1994		if (adapter->link_active == 0) {
1995			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1996			    &adapter->link_duplex);
1997			/* Check if we may set SPEED_MODE bit on PCI-E */
1998			if ((adapter->link_speed == SPEED_1000) &&
1999			    ((adapter->hw.mac_type == em_82571) ||
2000			    (adapter->hw.mac_type == em_82572))) {
2001				int tarc0;
2002
2003				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2004				tarc0 |= SPEED_MODE_BIT;
2005				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2006			}
2007			if (bootverbose)
2008				device_printf(dev, "Link is up %d Mbps %s\n",
2009				    adapter->link_speed,
2010				    ((adapter->link_duplex == FULL_DUPLEX) ?
2011				    "Full Duplex" : "Half Duplex"));
2012			adapter->link_active = 1;
2013			adapter->smartspeed = 0;
2014			ifp->if_baudrate = adapter->link_speed * 1000000;
2015			if_link_state_change(ifp, LINK_STATE_UP);
2016		}
2017	} else {
2018		if (adapter->link_active == 1) {
2019			ifp->if_baudrate = adapter->link_speed = 0;
2020			adapter->link_duplex = 0;
2021			if (bootverbose)
2022				device_printf(dev, "Link is Down\n");
2023			adapter->link_active = 0;
2024			if_link_state_change(ifp, LINK_STATE_DOWN);
2025		}
2026	}
2027}
2028
2029/*********************************************************************
2030 *
2031 *  This routine disables all traffic on the adapter by issuing a
2032 *  global reset on the MAC and deallocates TX/RX buffers.
2033 *
2034 **********************************************************************/
2035
2036static void
2037em_stop(void *arg)
2038{
2039	struct adapter	*adapter = arg;
2040	struct ifnet	*ifp = adapter->ifp;
2041
2042	EM_LOCK_ASSERT(adapter);
2043
2044	INIT_DEBUGOUT("em_stop: begin");
2045
2046	em_disable_intr(adapter);
2047	em_reset_hw(&adapter->hw);
2048	callout_stop(&adapter->timer);
2049	callout_stop(&adapter->tx_fifo_timer);
2050
2051	/* Tell the stack that the interface is no longer active */
2052	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2053}
2054
2055
2056/********************************************************************
2057 *
2058 *  Determine hardware revision.
2059 *
2060 **********************************************************************/
2061static void
2062em_identify_hardware(struct adapter *adapter)
2063{
2064	device_t dev = adapter->dev;
2065
2066	/* Make sure our PCI config space has the necessary stuff set */
2067	pci_enable_busmaster(dev);
2068	pci_enable_io(dev, SYS_RES_MEMORY);
2069	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2070
2071	/* Save off the information about this board */
2072	adapter->hw.vendor_id = pci_get_vendor(dev);
2073	adapter->hw.device_id = pci_get_device(dev);
2074	adapter->hw.revision_id = pci_get_revid(dev);
2075	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2076	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2077
2078	/* Identify the MAC */
2079	if (em_set_mac_type(&adapter->hw))
2080		device_printf(dev, "Unknown MAC Type\n");
2081
2082	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2083	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2084		adapter->hw.phy_init_script = TRUE;
2085}
2086
2087static int
2088em_allocate_pci_resources(struct adapter *adapter)
2089{
2090	device_t	dev = adapter->dev;
2091	int		val, rid;
2092
2093	rid = PCIR_BAR(0);
2094	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2095	    &rid, RF_ACTIVE);
2096	if (adapter->res_memory == NULL) {
2097		device_printf(dev, "Unable to allocate bus resource: memory\n");
2098		return (ENXIO);
2099	}
2100	adapter->osdep.mem_bus_space_tag =
2101	rman_get_bustag(adapter->res_memory);
2102	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2103	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2104
2105	if (adapter->hw.mac_type > em_82543) {
2106		/* Figure our where our IO BAR is ? */
2107		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2108			val = pci_read_config(dev, rid, 4);
2109			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2110				adapter->io_rid = rid;
2111				break;
2112			}
2113			rid += 4;
2114			/* check for 64bit BAR */
2115			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2116				rid += 4;
2117		}
2118		if (rid >= PCIR_CIS) {
2119			device_printf(dev, "Unable to locate IO BAR\n");
2120			return (ENXIO);
2121		}
2122		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2123		    &adapter->io_rid, RF_ACTIVE);
2124		if (adapter->res_ioport == NULL) {
2125			device_printf(dev, "Unable to allocate bus resource: "
2126			    "ioport\n");
2127			return (ENXIO);
2128		}
2129		adapter->hw.io_base = 0;
2130		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2131		adapter->osdep.io_bus_space_handle =
2132		    rman_get_bushandle(adapter->res_ioport);
2133	}
2134
2135	/* For ICH8 we need to find the flash memory. */
2136	if (adapter->hw.mac_type == em_ich8lan) {
2137		rid = EM_FLASH;
2138
2139		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2140		    &rid, RF_ACTIVE);
2141		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2142		adapter->osdep.flash_bus_space_handle =
2143		    rman_get_bushandle(adapter->flash_mem);
2144	}
2145
2146	val = pci_msi_count(dev);
2147	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2148		rid = 1;
2149		adapter->msi = 1;
2150	} else
2151		rid = 0;
2152	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2153	    RF_SHAREABLE | RF_ACTIVE);
2154	if (adapter->res_interrupt == NULL) {
2155		device_printf(dev, "Unable to allocate bus resource: "
2156		    "interrupt\n");
2157		return (ENXIO);
2158	}
2159
2160	adapter->hw.back = &adapter->osdep;
2161
2162	return (0);
2163}
2164
2165int
2166em_allocate_intr(struct adapter *adapter)
2167{
2168	device_t dev = adapter->dev;
2169	int error;
2170
2171	/* Manually turn off all interrupts */
2172	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2173
2174#ifdef DEVICE_POLLING
2175	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2176	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2177	    &adapter->int_handler_tag)) != 0) {
2178		device_printf(dev, "Failed to register interrupt handler");
2179		return (error);
2180	}
2181#else
2182	/*
2183	 * Try allocating a fast interrupt and the associated deferred
2184	 * processing contexts.
2185	 */
2186	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2187	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2188	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2189	    taskqueue_thread_enqueue, &adapter->tq);
2190	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2191	    device_get_nameunit(adapter->dev));
2192	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2193	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2194	    &adapter->int_handler_tag)) != 0) {
2195		device_printf(dev, "Failed to register fast interrupt "
2196			    "handler: %d\n", error);
2197		taskqueue_free(adapter->tq);
2198		adapter->tq = NULL;
2199		return (error);
2200	}
2201#endif
2202
2203	em_enable_intr(adapter);
2204	return (0);
2205}
2206
2207static void
2208em_free_intr(struct adapter *adapter)
2209{
2210	device_t dev = adapter->dev;
2211
2212	if (adapter->int_handler_tag != NULL) {
2213		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2214		adapter->int_handler_tag = NULL;
2215	}
2216	if (adapter->tq != NULL) {
2217		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2218		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2219		taskqueue_free(adapter->tq);
2220		adapter->tq = NULL;
2221	}
2222}
2223
2224static void
2225em_free_pci_resources(struct adapter *adapter)
2226{
2227	device_t dev = adapter->dev;
2228
2229	if (adapter->res_interrupt != NULL)
2230		bus_release_resource(dev, SYS_RES_IRQ, adapter->msi ? 1 : 0,
2231		    adapter->res_interrupt);
2232
2233	if (adapter->msi)
2234		pci_release_msi(dev);
2235
2236	if (adapter->res_memory != NULL)
2237		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2238		    adapter->res_memory);
2239
2240	if (adapter->flash_mem != NULL)
2241		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2242		    adapter->flash_mem);
2243
2244	if (adapter->res_ioport != NULL)
2245		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2246		    adapter->res_ioport);
2247}
2248
2249/*********************************************************************
2250 *
2251 *  Initialize the hardware to a configuration as specified by the
2252 *  adapter structure. The controller is reset, the EEPROM is
2253 *  verified, the MAC address is set, then the shared initialization
2254 *  routines are called.
2255 *
2256 **********************************************************************/
2257static int
2258em_hardware_init(struct adapter *adapter)
2259{
2260	device_t dev = adapter->dev;
2261	uint16_t rx_buffer_size;
2262
2263	INIT_DEBUGOUT("em_hardware_init: begin");
2264	/* Issue a global reset */
2265	em_reset_hw(&adapter->hw);
2266
2267	/* When hardware is reset, fifo_head is also reset */
2268	adapter->tx_fifo_head = 0;
2269
2270	/* Make sure we have a good EEPROM before we read from it */
2271	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2272		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2273		return (EIO);
2274	}
2275
2276	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2277		device_printf(dev, "EEPROM read error while reading part "
2278		    "number\n");
2279		return (EIO);
2280	}
2281
2282	/* Set up smart power down as default off on newer adapters. */
2283	if (!em_smart_pwr_down &&
2284	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2285		uint16_t phy_tmp = 0;
2286
2287		/* Speed up time to link by disabling smart power down. */
2288		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2289		phy_tmp &= ~IGP02E1000_PM_SPD;
2290		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2291	}
2292
2293	/*
2294	 * These parameters control the automatic generation (Tx) and
2295	 * response (Rx) to Ethernet PAUSE frames.
2296	 * - High water mark should allow for at least two frames to be
2297	 *   received after sending an XOFF.
2298	 * - Low water mark works best when it is very near the high water mark.
2299	 *   This allows the receiver to restart by sending XON when it has
2300	 *   drained a bit. Here we use an arbitary value of 1500 which will
2301	 *   restart after one full frame is pulled from the buffer. There
2302	 *   could be several smaller frames in the buffer and if so they will
2303	 *   not trigger the XON until their total number reduces the buffer
2304	 *   by 1500.
2305	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2306	 */
2307	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2308
2309	adapter->hw.fc_high_water = rx_buffer_size -
2310	    roundup2(adapter->hw.max_frame_size, 1024);
2311	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2312	if (adapter->hw.mac_type == em_80003es2lan)
2313		adapter->hw.fc_pause_time = 0xFFFF;
2314	else
2315		adapter->hw.fc_pause_time = 0x1000;
2316	adapter->hw.fc_send_xon = TRUE;
2317	adapter->hw.fc = E1000_FC_FULL;
2318
2319	if (em_init_hw(&adapter->hw) < 0) {
2320		device_printf(dev, "Hardware Initialization Failed");
2321		return (EIO);
2322	}
2323
2324	em_check_for_link(&adapter->hw);
2325
2326	return (0);
2327}
2328
2329/*********************************************************************
2330 *
2331 *  Setup networking device structure and register an interface.
2332 *
2333 **********************************************************************/
2334static void
2335em_setup_interface(device_t dev, struct adapter *adapter)
2336{
2337	struct ifnet   *ifp;
2338	INIT_DEBUGOUT("em_setup_interface: begin");
2339
2340	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2341	if (ifp == NULL)
2342		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2343	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2344	ifp->if_mtu = ETHERMTU;
2345	ifp->if_init =  em_init;
2346	ifp->if_softc = adapter;
2347	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2348	ifp->if_ioctl = em_ioctl;
2349	ifp->if_start = em_start;
2350	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2351	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2352	IFQ_SET_READY(&ifp->if_snd);
2353
2354	ether_ifattach(ifp, adapter->hw.mac_addr);
2355
2356	ifp->if_capabilities = ifp->if_capenable = 0;
2357
2358	if (adapter->hw.mac_type >= em_82543) {
2359		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2360		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2361	}
2362
2363	/* Enable TSO if available */
2364	if ((adapter->hw.mac_type > em_82544) &&
2365	    (adapter->hw.mac_type != em_82547)) {
2366		ifp->if_capabilities |= IFCAP_TSO4;
2367		ifp->if_capenable |= IFCAP_TSO4;
2368	}
2369
2370	/*
2371	 * Tell the upper layer(s) we support long frames.
2372	 */
2373	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2374	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2375	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2376
2377#ifdef DEVICE_POLLING
2378	ifp->if_capabilities |= IFCAP_POLLING;
2379#endif
2380
2381	/*
2382	 * Specify the media types supported by this adapter and register
2383	 * callbacks to update media and link information
2384	 */
2385	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2386	    em_media_status);
2387	if ((adapter->hw.media_type == em_media_type_fiber) ||
2388	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2389		u_char fiber_type = IFM_1000_SX;	/* default type; */
2390
2391		if (adapter->hw.mac_type == em_82545)
2392			fiber_type = IFM_1000_LX;
2393		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2394		    0, NULL);
2395		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2396	} else {
2397		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2398		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2399			    0, NULL);
2400		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2401			    0, NULL);
2402		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2403			    0, NULL);
2404		if (adapter->hw.phy_type != em_phy_ife) {
2405			ifmedia_add(&adapter->media,
2406				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2407			ifmedia_add(&adapter->media,
2408				IFM_ETHER | IFM_1000_T, 0, NULL);
2409		}
2410	}
2411	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2412	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2413}
2414
2415
2416/*********************************************************************
2417 *
2418 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2419 *
2420 **********************************************************************/
2421static void
2422em_smartspeed(struct adapter *adapter)
2423{
2424	uint16_t phy_tmp;
2425
2426	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2427	    adapter->hw.autoneg == 0 ||
2428	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2429		return;
2430
2431	if (adapter->smartspeed == 0) {
2432		/* If Master/Slave config fault is asserted twice,
2433		 * we assume back-to-back */
2434		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2435		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2436			return;
2437		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2438		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2439			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2440			if(phy_tmp & CR_1000T_MS_ENABLE) {
2441				phy_tmp &= ~CR_1000T_MS_ENABLE;
2442				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2443				    phy_tmp);
2444				adapter->smartspeed++;
2445				if(adapter->hw.autoneg &&
2446				   !em_phy_setup_autoneg(&adapter->hw) &&
2447				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2448				    &phy_tmp)) {
2449					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2450						    MII_CR_RESTART_AUTO_NEG);
2451					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2452					    phy_tmp);
2453				}
2454			}
2455		}
2456		return;
2457	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2458		/* If still no link, perhaps using 2/3 pair cable */
2459		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2460		phy_tmp |= CR_1000T_MS_ENABLE;
2461		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2462		if(adapter->hw.autoneg &&
2463		   !em_phy_setup_autoneg(&adapter->hw) &&
2464		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2465			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2466				    MII_CR_RESTART_AUTO_NEG);
2467			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2468		}
2469	}
2470	/* Restart process after EM_SMARTSPEED_MAX iterations */
2471	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2472		adapter->smartspeed = 0;
2473}
2474
2475
2476/*
2477 * Manage DMA'able memory.
2478 */
2479static void
2480em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2481{
2482	if (error)
2483		return;
2484	*(bus_addr_t *) arg = segs[0].ds_addr;
2485}
2486
2487static int
2488em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2489	int mapflags)
2490{
2491	int error;
2492
2493	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2494				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2495				BUS_SPACE_MAXADDR,	/* lowaddr */
2496				BUS_SPACE_MAXADDR,	/* highaddr */
2497				NULL, NULL,		/* filter, filterarg */
2498				size,			/* maxsize */
2499				1,			/* nsegments */
2500				size,			/* maxsegsize */
2501				0,			/* flags */
2502				NULL,			/* lockfunc */
2503				NULL,			/* lockarg */
2504				&dma->dma_tag);
2505	if (error) {
2506		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2507		    __func__, error);
2508		goto fail_0;
2509	}
2510
2511	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2512	    BUS_DMA_NOWAIT, &dma->dma_map);
2513	if (error) {
2514		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2515		    __func__, (uintmax_t)size, error);
2516		goto fail_2;
2517	}
2518
2519	dma->dma_paddr = 0;
2520	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2521	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2522	if (error || dma->dma_paddr == 0) {
2523		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2524		    __func__, error);
2525		goto fail_3;
2526	}
2527
2528	return (0);
2529
2530fail_3:
2531	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2532fail_2:
2533	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2534	bus_dma_tag_destroy(dma->dma_tag);
2535fail_0:
2536	dma->dma_map = NULL;
2537	dma->dma_tag = NULL;
2538
2539	return (error);
2540}
2541
2542static void
2543em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2544{
2545	if (dma->dma_tag == NULL)
2546		return;
2547	if (dma->dma_map != NULL) {
2548		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2549		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2550		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2551		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2552		dma->dma_map = NULL;
2553	}
2554	bus_dma_tag_destroy(dma->dma_tag);
2555	dma->dma_tag = NULL;
2556}
2557
2558
2559/*********************************************************************
2560 *
2561 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2562 *  the information needed to transmit a packet on the wire.
2563 *
2564 **********************************************************************/
2565static int
2566em_allocate_transmit_structures(struct adapter *adapter)
2567{
2568	device_t dev = adapter->dev;
2569	struct em_buffer *tx_buffer;
2570	bus_size_t size, segsize;
2571	int error, i;
2572
2573	/*
2574	 * Setup DMA descriptor areas.
2575	 */
2576	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2577
2578	/* Overrides for TSO - want large sizes */
2579	if ((adapter->hw.mac_type > em_82544) &&
2580	    (adapter->hw.mac_type != em_82547)) {
2581		size = EM_TSO_SIZE;
2582		segsize = 4096; /* page size isn't always 4k */
2583	}
2584
2585	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2586				1, 0,			/* alignment, bounds */
2587				BUS_SPACE_MAXADDR,	/* lowaddr */
2588				BUS_SPACE_MAXADDR,	/* highaddr */
2589				NULL, NULL,		/* filter, filterarg */
2590				size,			/* maxsize */
2591				EM_MAX_SCATTER,		/* nsegments */
2592				segsize,		/* maxsegsize */
2593				0,			/* flags */
2594				NULL,		/* lockfunc */
2595				NULL,		/* lockarg */
2596				&adapter->txtag)) != 0) {
2597		device_printf(dev, "Unable to allocate TX DMA tag\n");
2598		goto fail;
2599	}
2600
2601	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2602	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2603	if (adapter->tx_buffer_area == NULL) {
2604		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2605		error = ENOMEM;
2606		goto fail;
2607	}
2608
2609	tx_buffer = adapter->tx_buffer_area;
2610	for (i = 0; i < adapter->num_tx_desc; i++) {
2611		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2612		if (error != 0) {
2613			device_printf(dev, "Unable to create TX DMA map\n");
2614			goto fail;
2615		}
2616		tx_buffer++;
2617	}
2618
2619	return (0);
2620
2621fail:
2622	em_free_transmit_structures(adapter);
2623	return (error);
2624}
2625
2626/*********************************************************************
2627 *
2628 *  Initialize transmit structures.
2629 *
2630 **********************************************************************/
2631static void
2632em_setup_transmit_structures(struct adapter *adapter)
2633{
2634	struct em_buffer *tx_buffer;
2635	int i;
2636
2637	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2638
2639	adapter->next_avail_tx_desc = 0;
2640	adapter->next_tx_to_clean = 0;
2641
2642	/* Free any existing tx buffers. */
2643	tx_buffer = adapter->tx_buffer_area;
2644	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2645		if (tx_buffer->m_head != NULL) {
2646			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2647			    BUS_DMASYNC_POSTWRITE);
2648			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2649			m_freem(tx_buffer->m_head);
2650			tx_buffer->m_head = NULL;
2651		}
2652	}
2653
2654	/* Set number of descriptors available */
2655	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2656
2657	/* Set checksum context */
2658	adapter->active_checksum_context = OFFLOAD_NONE;
2659	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2660	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2661}
2662
2663/*********************************************************************
2664 *
2665 *  Enable transmit unit.
2666 *
2667 **********************************************************************/
2668static void
2669em_initialize_transmit_unit(struct adapter *adapter)
2670{
2671	uint32_t	reg_tctl;
2672	uint32_t	reg_tipg = 0;
2673	uint64_t	bus_addr;
2674
2675	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2676	/* Setup the Base and Length of the Tx Descriptor Ring */
2677	bus_addr = adapter->txdma.dma_paddr;
2678	E1000_WRITE_REG(&adapter->hw, TDLEN,
2679	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2680	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2681	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2682
2683	/* Setup the HW Tx Head and Tail descriptor pointers */
2684	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2685	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2686
2687
2688	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2689	    E1000_READ_REG(&adapter->hw, TDLEN));
2690
2691	/* Set the default values for the Tx Inter Packet Gap timer */
2692	switch (adapter->hw.mac_type) {
2693	case em_82542_rev2_0:
2694	case em_82542_rev2_1:
2695		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2696		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2697		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2698		break;
2699	case em_80003es2lan:
2700		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2701		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2702		    E1000_TIPG_IPGR2_SHIFT;
2703		break;
2704	default:
2705		if ((adapter->hw.media_type == em_media_type_fiber) ||
2706		    (adapter->hw.media_type == em_media_type_internal_serdes))
2707			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2708		else
2709			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2710		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2711		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2712	}
2713
2714	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2715	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2716	if(adapter->hw.mac_type >= em_82540)
2717		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2718
2719	/* Program the Transmit Control Register */
2720	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2721		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2722	if (adapter->hw.mac_type >= em_82571)
2723		reg_tctl |= E1000_TCTL_MULR;
2724	if (adapter->link_duplex == FULL_DUPLEX) {
2725		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2726	} else {
2727		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2728	}
2729	/* This write will effectively turn on the transmit unit. */
2730	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2731
2732	/* Setup Transmit Descriptor Base Settings */
2733	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2734
2735	if (adapter->tx_int_delay.value > 0)
2736		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2737}
2738
2739/*********************************************************************
2740 *
2741 *  Free all transmit related data structures.
2742 *
2743 **********************************************************************/
2744static void
2745em_free_transmit_structures(struct adapter *adapter)
2746{
2747	struct em_buffer *tx_buffer;
2748	int i;
2749
2750	INIT_DEBUGOUT("free_transmit_structures: begin");
2751
2752	if (adapter->tx_buffer_area != NULL) {
2753		tx_buffer = adapter->tx_buffer_area;
2754		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2755			if (tx_buffer->m_head != NULL) {
2756				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2757				    BUS_DMASYNC_POSTWRITE);
2758				bus_dmamap_unload(adapter->txtag,
2759				    tx_buffer->map);
2760				m_freem(tx_buffer->m_head);
2761				tx_buffer->m_head = NULL;
2762			} else if (tx_buffer->map != NULL)
2763				bus_dmamap_unload(adapter->txtag,
2764				    tx_buffer->map);
2765			if (tx_buffer->map != NULL) {
2766				bus_dmamap_destroy(adapter->txtag,
2767				    tx_buffer->map);
2768				tx_buffer->map = NULL;
2769			}
2770		}
2771	}
2772	if (adapter->tx_buffer_area != NULL) {
2773		free(adapter->tx_buffer_area, M_DEVBUF);
2774		adapter->tx_buffer_area = NULL;
2775	}
2776	if (adapter->txtag != NULL) {
2777		bus_dma_tag_destroy(adapter->txtag);
2778		adapter->txtag = NULL;
2779	}
2780}
2781
2782/*********************************************************************
2783 *
2784 *  The offload context needs to be set when we transfer the first
2785 *  packet of a particular protocol (TCP/UDP). We change the
2786 *  context only if the protocol type changes.
2787 *
2788 **********************************************************************/
2789static void
2790em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2791    uint32_t *txd_upper, uint32_t *txd_lower)
2792{
2793	struct em_context_desc *TXD;
2794	struct em_buffer *tx_buffer;
2795	struct ether_vlan_header *eh;
2796	struct ip *ip;
2797	struct ip6_hdr *ip6;
2798	struct tcp_hdr *th;
2799	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2800	uint32_t cmd = 0;
2801	uint16_t etype;
2802	uint8_t ipproto;
2803
2804	/* Setup checksum offload context. */
2805	curr_txd = adapter->next_avail_tx_desc;
2806	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2807	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2808
2809	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2810		     E1000_TXD_DTYP_D;		/* Data descr */
2811
2812	/*
2813	 * Determine where frame payload starts.
2814	 * Jump over vlan headers if already present,
2815	 * helpful for QinQ too.
2816	 */
2817	eh = mtod(mp, struct ether_vlan_header *);
2818	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2819		etype = ntohs(eh->evl_proto);
2820		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2821	} else {
2822		etype = ntohs(eh->evl_encap_proto);
2823		ehdrlen = ETHER_HDR_LEN;
2824	}
2825
2826	/*
2827	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2828	 * TODO: Support SCTP too when it hits the tree.
2829	 */
2830	switch (etype) {
2831	case ETHERTYPE_IP:
2832		ip = (struct ip *)(mp->m_data + ehdrlen);
2833		ip_hlen = ip->ip_hl << 2;
2834
2835		/* Setup of IP header checksum. */
2836		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2837			/*
2838			 * Start offset for header checksum calculation.
2839			 * End offset for header checksum calculation.
2840			 * Offset of place to put the checksum.
2841			 */
2842			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2843			TXD->lower_setup.ip_fields.ipcse =
2844			    htole16(ehdrlen + ip_hlen);
2845			TXD->lower_setup.ip_fields.ipcso =
2846			    ehdrlen + offsetof(struct ip, ip_sum);
2847			cmd |= E1000_TXD_CMD_IP;
2848			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2849		}
2850
2851		if (mp->m_len < ehdrlen + ip_hlen)
2852			return;	/* failure */
2853
2854		hdr_len = ehdrlen + ip_hlen;
2855		ipproto = ip->ip_p;
2856
2857		break;
2858	case ETHERTYPE_IPV6:
2859		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2860		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2861
2862		if (mp->m_len < ehdrlen + ip_hlen)
2863			return;	/* failure */
2864
2865		/* IPv6 doesn't have a header checksum. */
2866
2867		hdr_len = ehdrlen + ip_hlen;
2868		ipproto = ip6->ip6_nxt;
2869
2870		break;
2871	default:
2872		*txd_upper = 0;
2873		*txd_lower = 0;
2874		return;
2875	}
2876
2877	switch (ipproto) {
2878	case IPPROTO_TCP:
2879		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2880			/*
2881			 * Start offset for payload checksum calculation.
2882			 * End offset for payload checksum calculation.
2883			 * Offset of place to put the checksum.
2884			 */
2885			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2886			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2887			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2888			TXD->upper_setup.tcp_fields.tucso =
2889			    hdr_len + offsetof(struct tcphdr, th_sum);
2890			cmd |= E1000_TXD_CMD_TCP;
2891			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2892		}
2893		break;
2894	case IPPROTO_UDP:
2895		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2896			/*
2897			 * Start offset for header checksum calculation.
2898			 * End offset for header checksum calculation.
2899			 * Offset of place to put the checksum.
2900			 */
2901			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2902			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2903			TXD->upper_setup.tcp_fields.tucso =
2904			    hdr_len + offsetof(struct udphdr, uh_sum);
2905			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2906		}
2907		break;
2908	default:
2909		break;
2910	}
2911
2912	TXD->tcp_seg_setup.data = htole32(0);
2913	TXD->cmd_and_length =
2914	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2915	tx_buffer->m_head = NULL;
2916	tx_buffer->next_eop = -1;
2917
2918	if (++curr_txd == adapter->num_tx_desc)
2919		curr_txd = 0;
2920
2921	adapter->num_tx_desc_avail--;
2922	adapter->next_avail_tx_desc = curr_txd;
2923}
2924
2925/**********************************************************************
2926 *
2927 *  Setup work for hardware segmentation offload (TSO)
2928 *
2929 **********************************************************************/
2930static boolean_t
2931em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2932   uint32_t *txd_lower)
2933{
2934	struct em_context_desc *TXD;
2935	struct em_buffer *tx_buffer;
2936	struct ether_vlan_header *eh;
2937	struct ip *ip;
2938	struct ip6_hdr *ip6;
2939	struct tcphdr *th;
2940	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2941	uint16_t etype;
2942
2943	/*
2944	 * XXX: This is not really correct as the stack would not have
2945	 * set up all checksums.
2946	 * XXX: Return FALSE is not sufficient as we may have to return
2947	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2948	 * and 1 (success).
2949	 */
2950	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2951		return FALSE;	/* 0 */
2952
2953	/*
2954	 * This function could/should be extended to support IP/IPv6
2955	 * fragmentation as well.  But as they say, one step at a time.
2956	 */
2957
2958	/*
2959	 * Determine where frame payload starts.
2960	 * Jump over vlan headers if already present,
2961	 * helpful for QinQ too.
2962	 */
2963	eh = mtod(mp, struct ether_vlan_header *);
2964	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2965		etype = ntohs(eh->evl_proto);
2966		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2967	} else {
2968		etype = ntohs(eh->evl_encap_proto);
2969		ehdrlen = ETHER_HDR_LEN;
2970	}
2971
2972	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2973	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2974		return FALSE;	/* -1 */
2975
2976	/*
2977	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
2978	 * TODO: Support SCTP too when it hits the tree.
2979	 */
2980	switch (etype) {
2981	case ETHERTYPE_IP:
2982		isip6 = 0;
2983		ip = (struct ip *)(mp->m_data + ehdrlen);
2984		if (ip->ip_p != IPPROTO_TCP)
2985			return FALSE;	/* 0 */
2986		ip->ip_len = 0;
2987		ip->ip_sum = 0;
2988		ip_hlen = ip->ip_hl << 2;
2989		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
2990			return FALSE;	/* -1 */
2991		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2992#if 1
2993		th->th_sum = in_pseudo(ip->ip_src.s_addr,
2994		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2995#else
2996		th->th_sum = mp->m_pkthdr.csum_data;
2997#endif
2998		break;
2999	case ETHERTYPE_IPV6:
3000		isip6 = 1;
3001		return FALSE;			/* Not supported yet. */
3002		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3003		if (ip6->ip6_nxt != IPPROTO_TCP)
3004			return FALSE;	/* 0 */
3005		ip6->ip6_plen = 0;
3006		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3007		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3008			return FALSE;	/* -1 */
3009		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3010#if 0
3011		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3012		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3013#else
3014		th->th_sum = mp->m_pkthdr.csum_data;
3015#endif
3016		break;
3017	default:
3018		return FALSE;
3019	}
3020	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3021
3022	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3023		      E1000_TXD_DTYP_D |	/* Data descr type */
3024		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3025
3026	/* IP and/or TCP header checksum calculation and insertion. */
3027	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3028		      E1000_TXD_POPTS_TXSM) << 8;
3029
3030	curr_txd = adapter->next_avail_tx_desc;
3031	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3032	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3033
3034	/* IPv6 doesn't have a header checksum. */
3035	if (!isip6) {
3036		/*
3037		 * Start offset for header checksum calculation.
3038		 * End offset for header checksum calculation.
3039		 * Offset of place put the checksum.
3040		 */
3041		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3042		TXD->lower_setup.ip_fields.ipcse =
3043		    htole16(ehdrlen + ip_hlen - 1);
3044		TXD->lower_setup.ip_fields.ipcso =
3045		    ehdrlen + offsetof(struct ip, ip_sum);
3046	}
3047	/*
3048	 * Start offset for payload checksum calculation.
3049	 * End offset for payload checksum calculation.
3050	 * Offset of place to put the checksum.
3051	 */
3052	TXD->upper_setup.tcp_fields.tucss =
3053	    ehdrlen + ip_hlen;
3054	TXD->upper_setup.tcp_fields.tucse = 0;
3055	TXD->upper_setup.tcp_fields.tucso =
3056	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3057	/*
3058	 * Payload size per packet w/o any headers.
3059	 * Length of all headers up to payload.
3060	 */
3061	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3062	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3063
3064	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3065				E1000_TXD_CMD_DEXT |	/* Extended descr */
3066				E1000_TXD_CMD_TSE |	/* TSE context */
3067				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3068				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3069				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3070
3071	tx_buffer->m_head = NULL;
3072
3073	if (++curr_txd == adapter->num_tx_desc)
3074		curr_txd = 0;
3075
3076	adapter->num_tx_desc_avail--;
3077	adapter->next_avail_tx_desc = curr_txd;
3078	adapter->tx_tso = TRUE;
3079
3080	return TRUE;
3081}
3082
3083/**********************************************************************
3084 *
3085 *  Examine each tx_buffer in the used queue. If the hardware is done
3086 *  processing the packet then free associated resources. The
3087 *  tx_buffer is put back on the free queue.
3088 *
3089 **********************************************************************/
3090static void
3091em_txeof(struct adapter *adapter)
3092{
3093	int first, last, done, num_avail;
3094	struct em_buffer *tx_buffer;
3095	struct em_tx_desc   *tx_desc, *eop_desc;
3096	struct ifnet   *ifp = adapter->ifp;
3097
3098	EM_LOCK_ASSERT(adapter);
3099
3100	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3101		return;
3102
3103	num_avail = adapter->num_tx_desc_avail;
3104	first = adapter->next_tx_to_clean;
3105	tx_desc = &adapter->tx_desc_base[first];
3106	tx_buffer = &adapter->tx_buffer_area[first];
3107	last = tx_buffer->next_eop;
3108	eop_desc = &adapter->tx_desc_base[last];
3109
3110	/*
3111	 * Now calculate the terminating index
3112	 * for the cleanup loop below.
3113	 */
3114	if (++last == adapter->num_tx_desc)
3115		last = 0;
3116	done = last;
3117
3118	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3119	    BUS_DMASYNC_POSTREAD);
3120	while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3121		/* We clean the range of the packet */
3122		while (first != done) {
3123			tx_desc->upper.data = 0;
3124			tx_desc->lower.data = 0;
3125			num_avail++;
3126
3127			if (tx_buffer->m_head) {
3128				ifp->if_opackets++;
3129				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3130				    BUS_DMASYNC_POSTWRITE);
3131				bus_dmamap_unload(adapter->txtag,
3132				    tx_buffer->map);
3133
3134				m_freem(tx_buffer->m_head);
3135				tx_buffer->m_head = NULL;
3136			}
3137			tx_buffer->next_eop = -1;
3138
3139			if (++first == adapter->num_tx_desc)
3140				first = 0;
3141
3142			tx_buffer = &adapter->tx_buffer_area[first];
3143			tx_desc = &adapter->tx_desc_base[first];
3144		}
3145		/* See if we can continue to the next packet */
3146		last = tx_buffer->next_eop;
3147		if (last != -1) {
3148			eop_desc = &adapter->tx_desc_base[last];
3149			/* Get new done point */
3150			if (++last == adapter->num_tx_desc)
3151				last = 0;
3152			done = last;
3153		} else
3154			break;
3155	}
3156	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3157	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3158
3159	adapter->next_tx_to_clean = first;
3160
3161	/*
3162	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3163	 * that it is OK to send packets.
3164	 * If there are no pending descriptors, clear the timeout. Otherwise,
3165	 * if some descriptors have been freed, restart the timeout.
3166	 */
3167	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3168		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3169		if (num_avail == adapter->num_tx_desc)
3170			adapter->watchdog_timer = 0;
3171		else if (num_avail != adapter->num_tx_desc_avail)
3172			adapter->watchdog_timer = EM_TX_TIMEOUT;
3173	}
3174	adapter->num_tx_desc_avail = num_avail;
3175}
3176
3177/*********************************************************************
3178 *
3179 *  Get a buffer from system mbuf buffer pool.
3180 *
3181 **********************************************************************/
3182static int
3183em_get_buf(struct adapter *adapter, int i)
3184{
3185	struct mbuf		*m;
3186	bus_dma_segment_t	segs[1];
3187	bus_dmamap_t		map;
3188	struct em_buffer	*rx_buffer;
3189	int			error, nsegs;
3190
3191	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3192	if (m == NULL) {
3193		adapter->mbuf_cluster_failed++;
3194		return (ENOBUFS);
3195	}
3196	m->m_len = m->m_pkthdr.len = MCLBYTES;
3197	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3198		m_adj(m, ETHER_ALIGN);
3199
3200	/*
3201	 * Using memory from the mbuf cluster pool, invoke the
3202	 * bus_dma machinery to arrange the memory mapping.
3203	 */
3204	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3205	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3206	if (error != 0) {
3207		m_free(m);
3208		return (error);
3209	}
3210	/* If nsegs is wrong then the stack is corrupt. */
3211	KASSERT(nsegs == 1, ("Too many segments returned!"));
3212
3213	rx_buffer = &adapter->rx_buffer_area[i];
3214	if (rx_buffer->m_head != NULL)
3215		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3216
3217	map = rx_buffer->map;
3218	rx_buffer->map = adapter->rx_sparemap;
3219	adapter->rx_sparemap = map;
3220	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3221	rx_buffer->m_head = m;
3222
3223	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3224
3225	return (0);
3226}
3227
3228/*********************************************************************
3229 *
3230 *  Allocate memory for rx_buffer structures. Since we use one
3231 *  rx_buffer per received packet, the maximum number of rx_buffer's
3232 *  that we'll need is equal to the number of receive descriptors
3233 *  that we've allocated.
3234 *
3235 **********************************************************************/
3236static int
3237em_allocate_receive_structures(struct adapter *adapter)
3238{
3239	device_t dev = adapter->dev;
3240	struct em_buffer *rx_buffer;
3241	int i, error;
3242
3243	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3244	    M_DEVBUF, M_NOWAIT);
3245	if (adapter->rx_buffer_area == NULL) {
3246		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3247		return (ENOMEM);
3248	}
3249
3250	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3251
3252	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3253				1, 0,			/* alignment, bounds */
3254				BUS_SPACE_MAXADDR,	/* lowaddr */
3255				BUS_SPACE_MAXADDR,	/* highaddr */
3256				NULL, NULL,		/* filter, filterarg */
3257				MCLBYTES,	        /* maxsize */
3258				1,			/* nsegments */
3259				MCLBYTES,	        /* maxsegsize */
3260				0,			/* flags */
3261				NULL,			/* lockfunc */
3262				NULL,			/* lockarg */
3263				&adapter->rxtag);
3264	if (error) {
3265		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3266		    __func__, error);
3267		goto fail;
3268	}
3269
3270	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3271	    &adapter->rx_sparemap);
3272	if (error) {
3273		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3274		    __func__, error);
3275		goto fail;
3276	}
3277	rx_buffer = adapter->rx_buffer_area;
3278	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3279		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3280		    &rx_buffer->map);
3281		if (error) {
3282			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3283			    __func__, error);
3284			goto fail;
3285		}
3286	}
3287
3288	return (0);
3289
3290fail:
3291	em_free_receive_structures(adapter);
3292	return (error);
3293}
3294
3295/*********************************************************************
3296 *
3297 *  Allocate and initialize receive structures.
3298 *
3299 **********************************************************************/
3300static int
3301em_setup_receive_structures(struct adapter *adapter)
3302{
3303	struct em_buffer *rx_buffer;
3304	int i, error;
3305
3306	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3307
3308	/* Free current RX buffers. */
3309	rx_buffer = adapter->rx_buffer_area;
3310	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3311		if (rx_buffer->m_head != NULL) {
3312			bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3313			    BUS_DMASYNC_POSTREAD);
3314			bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3315			m_freem(rx_buffer->m_head);
3316			rx_buffer->m_head = NULL;
3317		}
3318	}
3319
3320	/* Allocate new ones. */
3321	for (i = 0; i < adapter->num_rx_desc; i++) {
3322		error = em_get_buf(adapter, i);
3323		if (error)
3324			return (error);
3325	}
3326
3327	/* Setup our descriptor pointers */
3328	adapter->next_rx_desc_to_check = 0;
3329	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3330	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3331
3332	return (0);
3333}
3334
3335/*********************************************************************
3336 *
3337 *  Enable receive unit.
3338 *
3339 **********************************************************************/
3340static void
3341em_initialize_receive_unit(struct adapter *adapter)
3342{
3343	struct ifnet	*ifp = adapter->ifp;
3344	uint64_t	bus_addr;
3345	uint32_t	reg_rctl;
3346	uint32_t	reg_rxcsum;
3347
3348	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3349
3350	/*
3351	 * Make sure receives are disabled while setting
3352	 * up the descriptor ring
3353	 */
3354	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3355
3356	/* Set the Receive Delay Timer Register */
3357	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3358
3359	if(adapter->hw.mac_type >= em_82540) {
3360		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3361
3362		/*
3363		 * Set the interrupt throttling rate. Value is calculated
3364		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3365		 */
3366#define MAX_INTS_PER_SEC	8000
3367#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3368		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3369	}
3370
3371	/* Setup the Base and Length of the Rx Descriptor Ring */
3372	bus_addr = adapter->rxdma.dma_paddr;
3373	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3374			sizeof(struct em_rx_desc));
3375	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3376	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3377
3378	/* Setup the Receive Control Register */
3379	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3380		   E1000_RCTL_RDMTS_HALF |
3381		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3382
3383	if (adapter->hw.tbi_compatibility_on == TRUE)
3384		reg_rctl |= E1000_RCTL_SBP;
3385
3386
3387	switch (adapter->rx_buffer_len) {
3388	default:
3389	case EM_RXBUFFER_2048:
3390		reg_rctl |= E1000_RCTL_SZ_2048;
3391		break;
3392	case EM_RXBUFFER_4096:
3393		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3394		break;
3395	case EM_RXBUFFER_8192:
3396		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3397		break;
3398	case EM_RXBUFFER_16384:
3399		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3400		break;
3401	}
3402
3403	if (ifp->if_mtu > ETHERMTU)
3404		reg_rctl |= E1000_RCTL_LPE;
3405
3406	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3407	if ((adapter->hw.mac_type >= em_82543) &&
3408	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3409		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3410		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3411		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3412	}
3413
3414	/* Enable Receives */
3415	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3416
3417	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3418	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3419	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3420}
3421
3422/*********************************************************************
3423 *
3424 *  Free receive related data structures.
3425 *
3426 **********************************************************************/
3427static void
3428em_free_receive_structures(struct adapter *adapter)
3429{
3430	struct em_buffer *rx_buffer;
3431	int i;
3432
3433	INIT_DEBUGOUT("free_receive_structures: begin");
3434
3435	if (adapter->rx_sparemap) {
3436		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3437		adapter->rx_sparemap = NULL;
3438	}
3439	if (adapter->rx_buffer_area != NULL) {
3440		rx_buffer = adapter->rx_buffer_area;
3441		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3442			if (rx_buffer->m_head != NULL) {
3443				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3444				    BUS_DMASYNC_POSTREAD);
3445				bus_dmamap_unload(adapter->rxtag,
3446				    rx_buffer->map);
3447				m_freem(rx_buffer->m_head);
3448				rx_buffer->m_head = NULL;
3449			} else if (rx_buffer->map != NULL)
3450				bus_dmamap_unload(adapter->rxtag,
3451				    rx_buffer->map);
3452			if (rx_buffer->map != NULL) {
3453				bus_dmamap_destroy(adapter->rxtag,
3454				    rx_buffer->map);
3455				rx_buffer->map = NULL;
3456			}
3457		}
3458	}
3459	if (adapter->rx_buffer_area != NULL) {
3460		free(adapter->rx_buffer_area, M_DEVBUF);
3461		adapter->rx_buffer_area = NULL;
3462	}
3463	if (adapter->rxtag != NULL) {
3464		bus_dma_tag_destroy(adapter->rxtag);
3465		adapter->rxtag = NULL;
3466	}
3467}
3468
3469/*********************************************************************
3470 *
3471 *  This routine executes in interrupt context. It replenishes
3472 *  the mbufs in the descriptor and sends data which has been
3473 *  dma'ed into host memory to upper layer.
3474 *
3475 *  We loop at most count times if count is > 0, or until done if
3476 *  count < 0.
3477 *
3478 *********************************************************************/
3479static int
3480em_rxeof(struct adapter *adapter, int count)
3481{
3482	struct ifnet	*ifp;
3483	struct mbuf	*mp;
3484	uint8_t		accept_frame = 0;
3485	uint8_t		eop = 0;
3486	uint16_t 	len, desc_len, prev_len_adj;
3487	int		i;
3488
3489	/* Pointer to the receive descriptor being examined. */
3490	struct em_rx_desc   *current_desc;
3491	uint8_t		status;
3492
3493	ifp = adapter->ifp;
3494	i = adapter->next_rx_desc_to_check;
3495	current_desc = &adapter->rx_desc_base[i];
3496	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3497	    BUS_DMASYNC_POSTREAD);
3498
3499	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3500		return (0);
3501
3502	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3503	    (count != 0) &&
3504	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3505		struct mbuf *m = NULL;
3506
3507		mp = adapter->rx_buffer_area[i].m_head;
3508		/*
3509		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3510		 * needs to access the last received byte in the mbuf.
3511		 */
3512		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3513		    BUS_DMASYNC_POSTREAD);
3514
3515		accept_frame = 1;
3516		prev_len_adj = 0;
3517		desc_len = le16toh(current_desc->length);
3518		status = current_desc->status;
3519		if (status & E1000_RXD_STAT_EOP) {
3520			count--;
3521			eop = 1;
3522			if (desc_len < ETHER_CRC_LEN) {
3523				len = 0;
3524				prev_len_adj = ETHER_CRC_LEN - desc_len;
3525			} else
3526				len = desc_len - ETHER_CRC_LEN;
3527		} else {
3528			eop = 0;
3529			len = desc_len;
3530		}
3531
3532		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3533			uint8_t		last_byte;
3534			uint32_t	pkt_len = desc_len;
3535
3536			if (adapter->fmp != NULL)
3537				pkt_len += adapter->fmp->m_pkthdr.len;
3538
3539			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3540			if (TBI_ACCEPT(&adapter->hw, status,
3541			    current_desc->errors, pkt_len, last_byte)) {
3542				em_tbi_adjust_stats(&adapter->hw,
3543				    &adapter->stats, pkt_len,
3544				    adapter->hw.mac_addr);
3545				if (len > 0)
3546					len--;
3547			} else
3548				accept_frame = 0;
3549		}
3550
3551		if (accept_frame) {
3552			if (em_get_buf(adapter, i) != 0) {
3553				ifp->if_iqdrops++;
3554				goto discard;
3555			}
3556
3557			/* Assign correct length to the current fragment */
3558			mp->m_len = len;
3559
3560			if (adapter->fmp == NULL) {
3561				mp->m_pkthdr.len = len;
3562				adapter->fmp = mp; /* Store the first mbuf */
3563				adapter->lmp = mp;
3564			} else {
3565				/* Chain mbuf's together */
3566				mp->m_flags &= ~M_PKTHDR;
3567				/*
3568				 * Adjust length of previous mbuf in chain if
3569				 * we received less than 4 bytes in the last
3570				 * descriptor.
3571				 */
3572				if (prev_len_adj > 0) {
3573					adapter->lmp->m_len -= prev_len_adj;
3574					adapter->fmp->m_pkthdr.len -=
3575					    prev_len_adj;
3576				}
3577				adapter->lmp->m_next = mp;
3578				adapter->lmp = adapter->lmp->m_next;
3579				adapter->fmp->m_pkthdr.len += len;
3580			}
3581
3582			if (eop) {
3583				adapter->fmp->m_pkthdr.rcvif = ifp;
3584				ifp->if_ipackets++;
3585				em_receive_checksum(adapter, current_desc,
3586				    adapter->fmp);
3587#ifndef __NO_STRICT_ALIGNMENT
3588				if (adapter->hw.max_frame_size >
3589				    (MCLBYTES - ETHER_ALIGN) &&
3590				    em_fixup_rx(adapter) != 0)
3591					goto skip;
3592#endif
3593				if (status & E1000_RXD_STAT_VP) {
3594					adapter->fmp->m_pkthdr.ether_vtag =
3595					    (le16toh(current_desc->special) &
3596					    E1000_RXD_SPC_VLAN_MASK);
3597					adapter->fmp->m_flags |= M_VLANTAG;
3598				}
3599#ifndef __NO_STRICT_ALIGNMENT
3600skip:
3601#endif
3602				m = adapter->fmp;
3603				adapter->fmp = NULL;
3604				adapter->lmp = NULL;
3605			}
3606		} else {
3607			ifp->if_ierrors++;
3608discard:
3609			/* Reuse loaded DMA map and just update mbuf chain */
3610			mp = adapter->rx_buffer_area[i].m_head;
3611			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3612			mp->m_data = mp->m_ext.ext_buf;
3613			mp->m_next = NULL;
3614			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3615				m_adj(mp, ETHER_ALIGN);
3616			if (adapter->fmp != NULL) {
3617				m_freem(adapter->fmp);
3618				adapter->fmp = NULL;
3619				adapter->lmp = NULL;
3620			}
3621			m = NULL;
3622		}
3623
3624		/* Zero out the receive descriptors status. */
3625		current_desc->status = 0;
3626		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3627		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3628
3629		/* Advance our pointers to the next descriptor. */
3630		if (++i == adapter->num_rx_desc)
3631			i = 0;
3632		if (m != NULL) {
3633			adapter->next_rx_desc_to_check = i;
3634#ifdef DEVICE_POLLING
3635			EM_UNLOCK(adapter);
3636			(*ifp->if_input)(ifp, m);
3637			EM_LOCK(adapter);
3638#else
3639			(*ifp->if_input)(ifp, m);
3640#endif
3641			i = adapter->next_rx_desc_to_check;
3642		}
3643		current_desc = &adapter->rx_desc_base[i];
3644	}
3645	adapter->next_rx_desc_to_check = i;
3646
3647	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3648	if (--i < 0)
3649		i = adapter->num_rx_desc - 1;
3650	E1000_WRITE_REG(&adapter->hw, RDT, i);
3651	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3652		return (0);
3653
3654	return (1);
3655}
3656
3657#ifndef __NO_STRICT_ALIGNMENT
3658/*
3659 * When jumbo frames are enabled we should realign entire payload on
3660 * architecures with strict alignment. This is serious design mistake of 8254x
3661 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3662 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3663 * payload. On architecures without strict alignment restrictions 8254x still
3664 * performs unaligned memory access which would reduce the performance too.
3665 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3666 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3667 * existing mbuf chain.
3668 *
3669 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3670 * not used at all on architectures with strict alignment.
3671 */
3672static int
3673em_fixup_rx(struct adapter *adapter)
3674{
3675	struct mbuf *m, *n;
3676	int error;
3677
3678	error = 0;
3679	m = adapter->fmp;
3680	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3681		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3682		m->m_data += ETHER_HDR_LEN;
3683	} else {
3684		MGETHDR(n, M_DONTWAIT, MT_DATA);
3685		if (n != NULL) {
3686			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3687			m->m_data += ETHER_HDR_LEN;
3688			m->m_len -= ETHER_HDR_LEN;
3689			n->m_len = ETHER_HDR_LEN;
3690			M_MOVE_PKTHDR(n, m);
3691			n->m_next = m;
3692			adapter->fmp = n;
3693		} else {
3694			adapter->ifp->if_iqdrops++;
3695			adapter->mbuf_alloc_failed++;
3696			m_freem(adapter->fmp);
3697			adapter->fmp = NULL;
3698			adapter->lmp = NULL;
3699			error = ENOBUFS;
3700		}
3701	}
3702
3703	return (error);
3704}
3705#endif
3706
3707/*********************************************************************
3708 *
3709 *  Verify that the hardware indicated that the checksum is valid.
3710 *  Inform the stack about the status of checksum so that stack
3711 *  doesn't spend time verifying the checksum.
3712 *
3713 *********************************************************************/
3714static void
3715em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3716		    struct mbuf *mp)
3717{
3718	/* 82543 or newer only */
3719	if ((adapter->hw.mac_type < em_82543) ||
3720	    /* Ignore Checksum bit is set */
3721	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3722		mp->m_pkthdr.csum_flags = 0;
3723		return;
3724	}
3725
3726	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3727		/* Did it pass? */
3728		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3729			/* IP Checksum Good */
3730			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3731			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3732
3733		} else {
3734			mp->m_pkthdr.csum_flags = 0;
3735		}
3736	}
3737
3738	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3739		/* Did it pass? */
3740		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3741			mp->m_pkthdr.csum_flags |=
3742			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3743			mp->m_pkthdr.csum_data = htons(0xffff);
3744		}
3745	}
3746}
3747
3748
3749static void
3750em_enable_vlans(struct adapter *adapter)
3751{
3752	uint32_t ctrl;
3753
3754	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3755
3756	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3757	ctrl |= E1000_CTRL_VME;
3758	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3759}
3760
3761static void
3762em_enable_intr(struct adapter *adapter)
3763{
3764	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3765}
3766
3767static void
3768em_disable_intr(struct adapter *adapter)
3769{
3770	/*
3771	 * The first version of 82542 had an errata where when link was forced
3772	 * it would stay up even up even if the cable was disconnected.
3773	 * Sequence errors were used to detect the disconnect and then the
3774	 * driver would unforce the link. This code in the in the ISR. For this
3775	 * to work correctly the Sequence error interrupt had to be enabled
3776	 * all the time.
3777	 */
3778
3779	if (adapter->hw.mac_type == em_82542_rev2_0)
3780	    E1000_WRITE_REG(&adapter->hw, IMC,
3781		(0xffffffff & ~E1000_IMC_RXSEQ));
3782	else
3783	    E1000_WRITE_REG(&adapter->hw, IMC,
3784		0xffffffff);
3785}
3786
3787static int
3788em_is_valid_ether_addr(uint8_t *addr)
3789{
3790	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3791
3792	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3793		return (FALSE);
3794	}
3795
3796	return (TRUE);
3797}
3798
3799void
3800em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3801{
3802	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3803}
3804
3805void
3806em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3807{
3808	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3809}
3810
3811void
3812em_pci_set_mwi(struct em_hw *hw)
3813{
3814	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3815	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3816}
3817
3818void
3819em_pci_clear_mwi(struct em_hw *hw)
3820{
3821	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3822	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3823}
3824
3825/*
3826 * We may eventually really do this, but its unnecessary
3827 * for now so we just return unsupported.
3828 */
3829int32_t
3830em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3831{
3832	int32_t  rc;
3833	uint16_t pectl;
3834	device_t dev;
3835
3836	dev = ((struct em_osdep *)hw->back)->dev;
3837
3838	/* find the PCIe link width and set max read request to 4KB*/
3839	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3840		em_read_pci_cfg(hw, reg + 0x12, value);
3841
3842		em_read_pci_cfg(hw, reg + 0x8, &pectl);
3843		pectl = (pectl & ~0x7000) | (5 << 12);
3844		em_write_pci_cfg(hw, reg + 0x8, &pectl);
3845		rc = 0;
3846	} else
3847		rc = -1;
3848
3849	return (rc);
3850}
3851
3852/*********************************************************************
3853* 82544 Coexistence issue workaround.
3854*    There are 2 issues.
3855*       1. Transmit Hang issue.
3856*    To detect this issue, following equation can be used...
3857*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3858*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3859*
3860*       2. DAC issue.
3861*    To detect this issue, following equation can be used...
3862*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3863*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3864*
3865*
3866*    WORKAROUND:
3867*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3868*
3869*** *********************************************************************/
3870static uint32_t
3871em_fill_descriptors (bus_addr_t address, uint32_t length,
3872		PDESC_ARRAY desc_array)
3873{
3874	/* Since issue is sensitive to length and address.*/
3875	/* Let us first check the address...*/
3876	uint32_t safe_terminator;
3877	if (length <= 4) {
3878		desc_array->descriptor[0].address = address;
3879		desc_array->descriptor[0].length = length;
3880		desc_array->elements = 1;
3881		return (desc_array->elements);
3882	}
3883	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3884	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3885	if (safe_terminator == 0   ||
3886	(safe_terminator > 4   &&
3887	safe_terminator < 9)   ||
3888	(safe_terminator > 0xC &&
3889	safe_terminator <= 0xF)) {
3890		desc_array->descriptor[0].address = address;
3891		desc_array->descriptor[0].length = length;
3892		desc_array->elements = 1;
3893		return (desc_array->elements);
3894	}
3895
3896	desc_array->descriptor[0].address = address;
3897	desc_array->descriptor[0].length = length - 4;
3898	desc_array->descriptor[1].address = address + (length - 4);
3899	desc_array->descriptor[1].length = 4;
3900	desc_array->elements = 2;
3901	return (desc_array->elements);
3902}
3903
3904/**********************************************************************
3905 *
3906 *  Update the board statistics counters.
3907 *
3908 **********************************************************************/
3909static void
3910em_update_stats_counters(struct adapter *adapter)
3911{
3912	struct ifnet   *ifp;
3913
3914	if(adapter->hw.media_type == em_media_type_copper ||
3915	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3916		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3917		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3918	}
3919	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3920	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3921	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3922	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3923
3924	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3925	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3926	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3927	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3928	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3929	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3930	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3931	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3932	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3933	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3934	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3935	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3936	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3937	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3938	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3939	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3940	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3941	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3942	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3943	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3944
3945	/* For the 64-bit byte counters the low dword must be read first. */
3946	/* Both registers clear on the read of the high dword */
3947
3948	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3949	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3950	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3951	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3952
3953	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3954	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3955	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3956	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3957	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3958
3959	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3960	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3961	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3962	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3963
3964	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3965	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3966	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3967	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3968	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3969	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3970	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3971	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3972	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3973	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3974
3975	if (adapter->hw.mac_type >= em_82543) {
3976		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3977		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3978		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3979		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3980		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3981		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3982	}
3983	ifp = adapter->ifp;
3984
3985	ifp->if_collisions = adapter->stats.colc;
3986
3987	/* Rx Errors */
3988	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3989	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3990	    adapter->stats.mpc + adapter->stats.cexterr;
3991
3992	/* Tx Errors */
3993	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3994	    adapter->watchdog_events;
3995}
3996
3997
3998/**********************************************************************
3999 *
4000 *  This routine is called only when em_display_debug_stats is enabled.
4001 *  This routine provides a way to take a look at important statistics
4002 *  maintained by the driver and hardware.
4003 *
4004 **********************************************************************/
4005static void
4006em_print_debug_info(struct adapter *adapter)
4007{
4008	device_t dev = adapter->dev;
4009	uint8_t *hw_addr = adapter->hw.hw_addr;
4010
4011	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4012	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4013	    E1000_READ_REG(&adapter->hw, CTRL),
4014	    E1000_READ_REG(&adapter->hw, RCTL));
4015	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4016	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
4017	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
4018	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4019	    adapter->hw.fc_high_water,
4020	    adapter->hw.fc_low_water);
4021	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4022	    E1000_READ_REG(&adapter->hw, TIDV),
4023	    E1000_READ_REG(&adapter->hw, TADV));
4024	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4025	    E1000_READ_REG(&adapter->hw, RDTR),
4026	    E1000_READ_REG(&adapter->hw, RADV));
4027	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4028	    (long long)adapter->tx_fifo_wrk_cnt,
4029	    (long long)adapter->tx_fifo_reset_cnt);
4030	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4031	    E1000_READ_REG(&adapter->hw, TDH),
4032	    E1000_READ_REG(&adapter->hw, TDT));
4033	device_printf(dev, "Num Tx descriptors avail = %d\n",
4034	    adapter->num_tx_desc_avail);
4035	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4036	    adapter->no_tx_desc_avail1);
4037	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4038	    adapter->no_tx_desc_avail2);
4039	device_printf(dev, "Std mbuf failed = %ld\n",
4040	    adapter->mbuf_alloc_failed);
4041	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4042	    adapter->mbuf_cluster_failed);
4043}
4044
4045static void
4046em_print_hw_stats(struct adapter *adapter)
4047{
4048	device_t dev = adapter->dev;
4049
4050	device_printf(dev, "Excessive collisions = %lld\n",
4051	    (long long)adapter->stats.ecol);
4052	device_printf(dev, "Symbol errors = %lld\n",
4053	    (long long)adapter->stats.symerrs);
4054	device_printf(dev, "Sequence errors = %lld\n",
4055	    (long long)adapter->stats.sec);
4056	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4057
4058	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4059	device_printf(dev, "Receive No Buffers = %lld\n",
4060	    (long long)adapter->stats.rnbc);
4061	/* RLEC is inaccurate on some hardware, calculate our own. */
4062	device_printf(dev, "Receive Length Errors = %lld\n",
4063	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4064	device_printf(dev, "Receive errors = %lld\n",
4065	    (long long)adapter->stats.rxerrc);
4066	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4067	device_printf(dev, "Alignment errors = %lld\n",
4068	    (long long)adapter->stats.algnerrc);
4069	device_printf(dev, "Carrier extension errors = %lld\n",
4070	    (long long)adapter->stats.cexterr);
4071	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4072	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4073
4074	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4075	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4076	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4077	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4078
4079	device_printf(dev, "Good Packets Rcvd = %lld\n",
4080	    (long long)adapter->stats.gprc);
4081	device_printf(dev, "Good Packets Xmtd = %lld\n",
4082	    (long long)adapter->stats.gptc);
4083	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4084	    (long long)adapter->stats.tsctc);
4085	device_printf(dev, "TSO Contexts Failed = %lld\n",
4086	    (long long)adapter->stats.tsctfc);
4087}
4088
4089static int
4090em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4091{
4092	struct adapter *adapter;
4093	int error;
4094	int result;
4095
4096	result = -1;
4097	error = sysctl_handle_int(oidp, &result, 0, req);
4098
4099	if (error || !req->newptr)
4100		return (error);
4101
4102	if (result == 1) {
4103		adapter = (struct adapter *)arg1;
4104		em_print_debug_info(adapter);
4105	}
4106
4107	return (error);
4108}
4109
4110
4111static int
4112em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4113{
4114	struct adapter *adapter;
4115	int error;
4116	int result;
4117
4118	result = -1;
4119	error = sysctl_handle_int(oidp, &result, 0, req);
4120
4121	if (error || !req->newptr)
4122		return (error);
4123
4124	if (result == 1) {
4125		adapter = (struct adapter *)arg1;
4126		em_print_hw_stats(adapter);
4127	}
4128
4129	return (error);
4130}
4131
4132static int
4133em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4134{
4135	struct em_int_delay_info *info;
4136	struct adapter *adapter;
4137	uint32_t regval;
4138	int error;
4139	int usecs;
4140	int ticks;
4141
4142	info = (struct em_int_delay_info *)arg1;
4143	usecs = info->value;
4144	error = sysctl_handle_int(oidp, &usecs, 0, req);
4145	if (error != 0 || req->newptr == NULL)
4146		return (error);
4147	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4148		return (EINVAL);
4149	info->value = usecs;
4150	ticks = E1000_USECS_TO_TICKS(usecs);
4151
4152	adapter = info->adapter;
4153
4154	EM_LOCK(adapter);
4155	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4156	regval = (regval & ~0xffff) | (ticks & 0xffff);
4157	/* Handle a few special cases. */
4158	switch (info->offset) {
4159	case E1000_RDTR:
4160	case E1000_82542_RDTR:
4161		regval |= E1000_RDT_FPDB;
4162		break;
4163	case E1000_TIDV:
4164	case E1000_82542_TIDV:
4165		if (ticks == 0) {
4166			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4167			/* Don't write 0 into the TIDV register. */
4168			regval++;
4169		} else
4170			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4171		break;
4172	}
4173	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4174	EM_UNLOCK(adapter);
4175	return (0);
4176}
4177
4178static void
4179em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4180	const char *description, struct em_int_delay_info *info,
4181	int offset, int value)
4182{
4183	info->adapter = adapter;
4184	info->offset = offset;
4185	info->value = value;
4186	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4187	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4188	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4189	    info, 0, em_sysctl_int_delay, "I", description);
4190}
4191
4192#ifndef DEVICE_POLLING
4193static void
4194em_add_int_process_limit(struct adapter *adapter, const char *name,
4195	const char *description, int *limit, int value)
4196{
4197	*limit = value;
4198	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4199	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4200	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4201}
4202#endif
4203