if_em.c revision 163826
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 163826 2006-10-31 16:19:21Z glebius $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.2.9";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159
160	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
164
165	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
183
184	/* required last entry */
185	{ 0, 0, 0, 0, 0}
186};
187
188/*********************************************************************
189 *  Table of branding strings for all supported NICs.
190 *********************************************************************/
191
192static char *em_strings[] = {
193	"Intel(R) PRO/1000 Network Connection"
194};
195
196/*********************************************************************
197 *  Function prototypes
198 *********************************************************************/
199static int	em_probe(device_t);
200static int	em_attach(device_t);
201static int	em_detach(device_t);
202static int	em_shutdown(device_t);
203static int	em_suspend(device_t);
204static int	em_resume(device_t);
205static void	em_start(struct ifnet *);
206static void	em_start_locked(struct ifnet *ifp);
207static int	em_ioctl(struct ifnet *, u_long, caddr_t);
208static void	em_watchdog(struct ifnet *);
209static void	em_init(void *);
210static void	em_init_locked(struct adapter *);
211static void	em_stop(void *);
212static void	em_media_status(struct ifnet *, struct ifmediareq *);
213static int	em_media_change(struct ifnet *);
214static void	em_identify_hardware(struct adapter *);
215static int	em_allocate_pci_resources(struct adapter *);
216static int	em_allocate_intr(struct adapter *);
217static void	em_free_intr(struct adapter *);
218static void	em_free_pci_resources(struct adapter *);
219static void	em_local_timer(void *);
220static int	em_hardware_init(struct adapter *);
221static void	em_setup_interface(device_t, struct adapter *);
222static int	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_setup_receive_structures(struct adapter *);
225static void	em_initialize_receive_unit(struct adapter *);
226static void	em_enable_intr(struct adapter *);
227static void	em_disable_intr(struct adapter *);
228static void	em_free_transmit_structures(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_update_stats_counters(struct adapter *);
231static void	em_txeof(struct adapter *);
232static int	em_allocate_receive_structures(struct adapter *);
233static int	em_allocate_transmit_structures(struct adapter *);
234static int	em_rxeof(struct adapter *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct adapter *);
237#endif
238static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
239		    struct mbuf *);
240static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
241		    uint32_t *, uint32_t *);
242static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
243		    uint32_t *, uint32_t *);
244static void	em_set_promisc(struct adapter *);
245static void	em_disable_promisc(struct adapter *);
246static void	em_set_multi(struct adapter *);
247static void	em_print_hw_stats(struct adapter *);
248static void	em_update_link_status(struct adapter *);
249static int	em_get_buf(struct adapter *, int);
250static void	em_enable_vlans(struct adapter *);
251static void	em_disable_vlans(struct adapter *);
252static int	em_encap(struct adapter *, struct mbuf **);
253static void	em_smartspeed(struct adapter *);
254static int	em_82547_fifo_workaround(struct adapter *, int);
255static void	em_82547_update_fifo_head(struct adapter *, int);
256static int	em_82547_tx_fifo_reset(struct adapter *);
257static void	em_82547_move_tail(void *arg);
258static void	em_82547_move_tail_locked(struct adapter *);
259static int	em_dma_malloc(struct adapter *, bus_size_t,
260		struct em_dma_alloc *, int);
261static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
262static void	em_print_debug_info(struct adapter *);
263static int 	em_is_valid_ether_addr(uint8_t *);
264static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
265static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
266static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
267		    PDESC_ARRAY desc_array);
268static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
269static void	em_add_int_delay_sysctl(struct adapter *, const char *,
270		const char *, struct em_int_delay_info *, int, int);
271
272/*
273 * Fast interrupt handler and legacy ithread/polling modes are
274 * mutually exclusive.
275 */
276#ifdef DEVICE_POLLING
277static poll_handler_t em_poll;
278static void	em_intr(void *);
279#else
280static void	em_intr_fast(void *);
281static void	em_add_int_process_limit(struct adapter *, const char *,
282		const char *, int *, int);
283static void	em_handle_rxtx(void *context, int pending);
284static void	em_handle_link(void *context, int pending);
285#endif
286
287/*********************************************************************
288 *  FreeBSD Device Interface Entry Points
289 *********************************************************************/
290
291static device_method_t em_methods[] = {
292	/* Device interface */
293	DEVMETHOD(device_probe, em_probe),
294	DEVMETHOD(device_attach, em_attach),
295	DEVMETHOD(device_detach, em_detach),
296	DEVMETHOD(device_shutdown, em_shutdown),
297	DEVMETHOD(device_suspend, em_suspend),
298	DEVMETHOD(device_resume, em_resume),
299	{0, 0}
300};
301
302static driver_t em_driver = {
303	"em", em_methods, sizeof(struct adapter),
304};
305
306static devclass_t em_devclass;
307DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
308MODULE_DEPEND(em, pci, 1, 1, 1);
309MODULE_DEPEND(em, ether, 1, 1, 1);
310
311/*********************************************************************
312 *  Tunable default values.
313 *********************************************************************/
314
315#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
316#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
317#define M_TSO_LEN			66
318
319static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
320static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
321static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
322static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
323static int em_rxd = EM_DEFAULT_RXD;
324static int em_txd = EM_DEFAULT_TXD;
325static int em_smart_pwr_down = FALSE;
326
327TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
331TUNABLE_INT("hw.em.rxd", &em_rxd);
332TUNABLE_INT("hw.em.txd", &em_txd);
333TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
334#ifndef DEVICE_POLLING
335static int em_rx_process_limit = 100;
336TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
337#endif
338
339/*********************************************************************
340 *  Device identification routine
341 *
342 *  em_probe determines if the driver should be loaded on
343 *  adapter based on PCI vendor/device id of the adapter.
344 *
345 *  return BUS_PROBE_DEFAULT on success, positive on failure
346 *********************************************************************/
347
348static int
349em_probe(device_t dev)
350{
351	char		adapter_name[60];
352	uint16_t	pci_vendor_id = 0;
353	uint16_t	pci_device_id = 0;
354	uint16_t	pci_subvendor_id = 0;
355	uint16_t	pci_subdevice_id = 0;
356	em_vendor_info_t *ent;
357
358	INIT_DEBUGOUT("em_probe: begin");
359
360	pci_vendor_id = pci_get_vendor(dev);
361	if (pci_vendor_id != EM_VENDOR_ID)
362		return (ENXIO);
363
364	pci_device_id = pci_get_device(dev);
365	pci_subvendor_id = pci_get_subvendor(dev);
366	pci_subdevice_id = pci_get_subdevice(dev);
367
368	ent = em_vendor_info_array;
369	while (ent->vendor_id != 0) {
370		if ((pci_vendor_id == ent->vendor_id) &&
371		    (pci_device_id == ent->device_id) &&
372
373		    ((pci_subvendor_id == ent->subvendor_id) ||
374		    (ent->subvendor_id == PCI_ANY_ID)) &&
375
376		    ((pci_subdevice_id == ent->subdevice_id) ||
377		    (ent->subdevice_id == PCI_ANY_ID))) {
378			sprintf(adapter_name, "%s %s",
379				em_strings[ent->index],
380				em_driver_version);
381			device_set_desc_copy(dev, adapter_name);
382			return (BUS_PROBE_DEFAULT);
383		}
384		ent++;
385	}
386
387	return (ENXIO);
388}
389
390/*********************************************************************
391 *  Device initialization routine
392 *
393 *  The attach entry point is called when the driver is being loaded.
394 *  This routine identifies the type of hardware, allocates all resources
395 *  and initializes the hardware.
396 *
397 *  return 0 on success, positive on failure
398 *********************************************************************/
399
400static int
401em_attach(device_t dev)
402{
403	struct adapter	*adapter;
404	int		tsize, rsize;
405	int		error = 0;
406
407	INIT_DEBUGOUT("em_attach: begin");
408
409	adapter = device_get_softc(dev);
410	adapter->dev = adapter->osdep.dev = dev;
411	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
412
413	/* SYSCTL stuff */
414	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
415	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
416	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
417	    em_sysctl_debug_info, "I", "Debug Information");
418
419	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
420	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
421	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
422	    em_sysctl_stats, "I", "Statistics");
423
424	callout_init(&adapter->timer, CALLOUT_MPSAFE);
425	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
426
427	/* Determine hardware revision */
428	em_identify_hardware(adapter);
429
430	/* Set up some sysctls for the tunable interrupt delays */
431	em_add_int_delay_sysctl(adapter, "rx_int_delay",
432	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
433	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
434	em_add_int_delay_sysctl(adapter, "tx_int_delay",
435	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
436	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
437	if (adapter->hw.mac_type >= em_82540) {
438		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
439		    "receive interrupt delay limit in usecs",
440		    &adapter->rx_abs_int_delay,
441		    E1000_REG_OFFSET(&adapter->hw, RADV),
442		    em_rx_abs_int_delay_dflt);
443		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
444		    "transmit interrupt delay limit in usecs",
445		    &adapter->tx_abs_int_delay,
446		    E1000_REG_OFFSET(&adapter->hw, TADV),
447		    em_tx_abs_int_delay_dflt);
448	}
449
450#ifndef DEVICE_POLLING
451	/* Sysctls for limiting the amount of work done in the taskqueue */
452	em_add_int_process_limit(adapter, "rx_processing_limit",
453	    "max number of rx packets to process", &adapter->rx_process_limit,
454	    em_rx_process_limit);
455#endif
456
457	/*
458	 * Validate number of transmit and receive descriptors. It
459	 * must not exceed hardware maximum, and must be multiple
460	 * of EM_DBA_ALIGN.
461	 */
462	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
463	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
464	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
465	    (em_txd < EM_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    EM_DEFAULT_TXD, em_txd);
468		adapter->num_tx_desc = EM_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = em_txd;
471	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
472	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
473	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
474	    (em_rxd < EM_MIN_RXD)) {
475		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
476		    EM_DEFAULT_RXD, em_rxd);
477		adapter->num_rx_desc = EM_DEFAULT_RXD;
478	} else
479		adapter->num_rx_desc = em_rxd;
480
481	adapter->hw.autoneg = DO_AUTO_NEG;
482	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
483	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484	adapter->hw.tbi_compatibility_en = TRUE;
485	adapter->rx_buffer_len = EM_RXBUFFER_2048;
486
487	adapter->hw.phy_init_script = 1;
488	adapter->hw.phy_reset_disable = FALSE;
489
490#ifndef EM_MASTER_SLAVE
491	adapter->hw.master_slave = em_ms_hw_default;
492#else
493	adapter->hw.master_slave = EM_MASTER_SLAVE;
494#endif
495	/*
496	 * Set the max frame size assuming standard ethernet
497	 * sized frames.
498	 */
499	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
500
501	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
502
503	/*
504	 * This controls when hardware reports transmit completion
505	 * status.
506	 */
507	adapter->hw.report_tx_early = 1;
508	if (em_allocate_pci_resources(adapter)) {
509		device_printf(dev, "Allocation of PCI resources failed\n");
510		error = ENXIO;
511		goto err_pci;
512	}
513
514	/* Initialize eeprom parameters */
515	em_init_eeprom_params(&adapter->hw);
516
517	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
518	    EM_DBA_ALIGN);
519
520	/* Allocate Transmit Descriptor ring */
521	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
522		device_printf(dev, "Unable to allocate tx_desc memory\n");
523		error = ENOMEM;
524		goto err_tx_desc;
525	}
526	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
527
528	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
529	    EM_DBA_ALIGN);
530
531	/* Allocate Receive Descriptor ring */
532	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
533		device_printf(dev, "Unable to allocate rx_desc memory\n");
534		error = ENOMEM;
535		goto err_rx_desc;
536	}
537	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
538
539	/* Initialize the hardware */
540	if (em_hardware_init(adapter)) {
541		device_printf(dev, "Unable to initialize the hardware\n");
542		error = EIO;
543		goto err_hw_init;
544	}
545
546	/* Copy the permanent MAC address out of the EEPROM */
547	if (em_read_mac_addr(&adapter->hw) < 0) {
548		device_printf(dev, "EEPROM read error while reading MAC"
549		    " address\n");
550		error = EIO;
551		goto err_hw_init;
552	}
553
554	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
555		device_printf(dev, "Invalid MAC address\n");
556		error = EIO;
557		goto err_hw_init;
558	}
559
560	/* Setup OS specific network interface */
561	em_setup_interface(dev, adapter);
562
563	em_allocate_intr(adapter);
564
565	/* Initialize statistics */
566	em_clear_hw_cntrs(&adapter->hw);
567	em_update_stats_counters(adapter);
568	adapter->hw.get_link_status = 1;
569	em_update_link_status(adapter);
570
571	/* Indicate SOL/IDER usage */
572	if (em_check_phy_reset_block(&adapter->hw))
573		device_printf(dev,
574		    "PHY reset is blocked due to SOL/IDER session.\n");
575
576	/* Identify 82544 on PCIX */
577	em_get_bus_info(&adapter->hw);
578	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
579		adapter->pcix_82544 = TRUE;
580	else
581		adapter->pcix_82544 = FALSE;
582
583	INIT_DEBUGOUT("em_attach: end");
584
585	return (0);
586
587err_hw_init:
588	em_dma_free(adapter, &adapter->rxdma);
589err_rx_desc:
590	em_dma_free(adapter, &adapter->txdma);
591err_tx_desc:
592err_pci:
593	em_free_intr(adapter);
594	em_free_pci_resources(adapter);
595	EM_LOCK_DESTROY(adapter);
596
597	return (error);
598}
599
600/*********************************************************************
601 *  Device removal routine
602 *
603 *  The detach entry point is called when the driver is being removed.
604 *  This routine stops the adapter and deallocates all the resources
605 *  that were allocated for driver operation.
606 *
607 *  return 0 on success, positive on failure
608 *********************************************************************/
609
610static int
611em_detach(device_t dev)
612{
613	struct adapter	*adapter = device_get_softc(dev);
614	struct ifnet	*ifp = adapter->ifp;
615
616	INIT_DEBUGOUT("em_detach: begin");
617
618#ifdef DEVICE_POLLING
619	if (ifp->if_capenable & IFCAP_POLLING)
620		ether_poll_deregister(ifp);
621#endif
622
623	em_free_intr(adapter);
624	EM_LOCK(adapter);
625	adapter->in_detach = 1;
626	em_stop(adapter);
627	em_phy_hw_reset(&adapter->hw);
628	EM_UNLOCK(adapter);
629	ether_ifdetach(adapter->ifp);
630
631	em_free_pci_resources(adapter);
632	bus_generic_detach(dev);
633	if_free(ifp);
634
635	/* Free Transmit Descriptor ring */
636	if (adapter->tx_desc_base) {
637		em_dma_free(adapter, &adapter->txdma);
638		adapter->tx_desc_base = NULL;
639	}
640
641	/* Free Receive Descriptor ring */
642	if (adapter->rx_desc_base) {
643		em_dma_free(adapter, &adapter->rxdma);
644		adapter->rx_desc_base = NULL;
645	}
646
647	EM_LOCK_DESTROY(adapter);
648
649	return (0);
650}
651
652/*********************************************************************
653 *
654 *  Shutdown entry point
655 *
656 **********************************************************************/
657
658static int
659em_shutdown(device_t dev)
660{
661	struct adapter *adapter = device_get_softc(dev);
662	EM_LOCK(adapter);
663	em_stop(adapter);
664	EM_UNLOCK(adapter);
665	return (0);
666}
667
668/*
669 * Suspend/resume device methods.
670 */
671static int
672em_suspend(device_t dev)
673{
674	struct adapter *adapter = device_get_softc(dev);
675
676	EM_LOCK(adapter);
677	em_stop(adapter);
678	EM_UNLOCK(adapter);
679
680	return bus_generic_suspend(dev);
681}
682
683static int
684em_resume(device_t dev)
685{
686	struct adapter *adapter = device_get_softc(dev);
687	struct ifnet *ifp = adapter->ifp;
688
689	EM_LOCK(adapter);
690	em_init_locked(adapter);
691	if ((ifp->if_flags & IFF_UP) &&
692	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
693		em_start_locked(ifp);
694	EM_UNLOCK(adapter);
695
696	return bus_generic_resume(dev);
697}
698
699
700/*********************************************************************
701 *  Transmit entry point
702 *
703 *  em_start is called by the stack to initiate a transmit.
704 *  The driver will remain in this routine as long as there are
705 *  packets to transmit and transmit resources are available.
706 *  In case resources are not available stack is notified and
707 *  the packet is requeued.
708 **********************************************************************/
709
710static void
711em_start_locked(struct ifnet *ifp)
712{
713	struct adapter	*adapter = ifp->if_softc;
714	struct mbuf	*m_head;
715
716	EM_LOCK_ASSERT(adapter);
717
718	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
719	    IFF_DRV_RUNNING)
720		return;
721	if (!adapter->link_active)
722		return;
723
724	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
725
726		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
727		if (m_head == NULL)
728			break;
729		/*
730		 * em_encap() can modify our pointer, and or make it NULL on
731		 * failure.  In that event, we can't requeue.
732		 */
733		if (em_encap(adapter, &m_head)) {
734			if (m_head == NULL)
735				break;
736			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
737			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
738			break;
739		}
740
741		/* Send a copy of the frame to the BPF listener */
742		BPF_MTAP(ifp, m_head);
743
744		/* Set timeout in case hardware has problems transmitting. */
745		ifp->if_timer = EM_TX_TIMEOUT;
746	}
747}
748
749static void
750em_start(struct ifnet *ifp)
751{
752	struct adapter *adapter = ifp->if_softc;
753
754	EM_LOCK(adapter);
755	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
756		em_start_locked(ifp);
757	EM_UNLOCK(adapter);
758}
759
760/*********************************************************************
761 *  Ioctl entry point
762 *
763 *  em_ioctl is called when the user wants to configure the
764 *  interface.
765 *
766 *  return 0 on success, positive on failure
767 **********************************************************************/
768
769static int
770em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
771{
772	struct adapter	*adapter = ifp->if_softc;
773	struct ifreq *ifr = (struct ifreq *)data;
774	struct ifaddr *ifa = (struct ifaddr *)data;
775	int error = 0;
776
777	if (adapter->in_detach)
778		return (error);
779
780	switch (command) {
781	case SIOCSIFADDR:
782	case SIOCGIFADDR:
783		if (ifa->ifa_addr->sa_family == AF_INET) {
784			/*
785			 * XXX
786			 * Since resetting hardware takes a very long time
787			 * and results in link renegotiation we only
788			 * initialize the hardware only when it is absolutely
789			 * required.
790			 */
791			ifp->if_flags |= IFF_UP;
792			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
793				EM_LOCK(adapter);
794				em_init_locked(adapter);
795				EM_UNLOCK(adapter);
796			}
797			arp_ifinit(ifp, ifa);
798		} else
799			error = ether_ioctl(ifp, command, data);
800		break;
801	case SIOCSIFMTU:
802	    {
803		int max_frame_size;
804		uint16_t eeprom_data = 0;
805
806		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
807
808		EM_LOCK(adapter);
809		switch (adapter->hw.mac_type) {
810		case em_82573:
811			/*
812			 * 82573 only supports jumbo frames
813			 * if ASPM is disabled.
814			 */
815			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
816			    &eeprom_data);
817			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
818				max_frame_size = ETHER_MAX_LEN;
819				break;
820			}
821			/* Allow Jumbo frames - fall thru */
822		case em_82571:
823		case em_82572:
824		case em_80003es2lan:	/* Limit Jumbo Frame size */
825			max_frame_size = 9234;
826			break;
827		case em_ich8lan:
828			/* ICH8 does not support jumbo frames */
829			max_frame_size = ETHER_MAX_LEN;
830			break;
831		default:
832			max_frame_size = MAX_JUMBO_FRAME_SIZE;
833		}
834		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
835		    ETHER_CRC_LEN) {
836			EM_UNLOCK(adapter);
837			error = EINVAL;
838			break;
839		}
840
841		ifp->if_mtu = ifr->ifr_mtu;
842		adapter->hw.max_frame_size =
843		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
844		em_init_locked(adapter);
845		EM_UNLOCK(adapter);
846		break;
847	    }
848	case SIOCSIFFLAGS:
849		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
850		EM_LOCK(adapter);
851		if (ifp->if_flags & IFF_UP) {
852			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
853				if ((ifp->if_flags ^ adapter->if_flags) &
854				    IFF_PROMISC) {
855					em_disable_promisc(adapter);
856					em_set_promisc(adapter);
857				}
858			} else
859				em_init_locked(adapter);
860		} else {
861			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
862				em_stop(adapter);
863			}
864		}
865		adapter->if_flags = ifp->if_flags;
866		EM_UNLOCK(adapter);
867		break;
868	case SIOCADDMULTI:
869	case SIOCDELMULTI:
870		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
871		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
872			EM_LOCK(adapter);
873			em_disable_intr(adapter);
874			em_set_multi(adapter);
875			if (adapter->hw.mac_type == em_82542_rev2_0) {
876				em_initialize_receive_unit(adapter);
877			}
878#ifdef DEVICE_POLLING
879			if (!(ifp->if_capenable & IFCAP_POLLING))
880#endif
881				em_enable_intr(adapter);
882			EM_UNLOCK(adapter);
883		}
884		break;
885	case SIOCSIFMEDIA:
886		/* Check SOL/IDER usage */
887		EM_LOCK(adapter);
888		if (em_check_phy_reset_block(&adapter->hw)) {
889			EM_UNLOCK(adapter);
890			device_printf(adapter->dev, "Media change is"
891			    "blocked due to SOL/IDER session.\n");
892			break;
893		}
894		EM_UNLOCK(adapter);
895	case SIOCGIFMEDIA:
896		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
897		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
898		break;
899	case SIOCSIFCAP:
900	    {
901		int mask, reinit;
902
903		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
904		reinit = 0;
905		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
906#ifdef DEVICE_POLLING
907		if (mask & IFCAP_POLLING) {
908			if (ifr->ifr_reqcap & IFCAP_POLLING) {
909				error = ether_poll_register(em_poll, ifp);
910				if (error)
911					return (error);
912				EM_LOCK(adapter);
913				em_disable_intr(adapter);
914				ifp->if_capenable |= IFCAP_POLLING;
915				EM_UNLOCK(adapter);
916			} else {
917				error = ether_poll_deregister(ifp);
918				/* Enable interrupt even in error case */
919				EM_LOCK(adapter);
920				em_enable_intr(adapter);
921				ifp->if_capenable &= ~IFCAP_POLLING;
922				EM_UNLOCK(adapter);
923			}
924		}
925#endif
926		if (mask & IFCAP_HWCSUM) {
927			ifp->if_capenable ^= IFCAP_HWCSUM;
928			reinit = 1;
929		}
930		if (mask & IFCAP_TSO4) {
931			ifp->if_capenable ^= IFCAP_TSO4;
932			reinit = 1;
933		}
934		if (mask & IFCAP_VLAN_HWTAGGING) {
935			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
936			reinit = 1;
937		}
938		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
939			em_init(adapter);
940		VLAN_CAPABILITIES(ifp);
941		break;
942	    }
943	default:
944		error = ether_ioctl(ifp, command, data);
945		break;
946	}
947
948	return (error);
949}
950
951/*********************************************************************
952 *  Watchdog entry point
953 *
954 *  This routine is called whenever hardware quits transmitting.
955 *
956 **********************************************************************/
957
958static void
959em_watchdog(struct ifnet *ifp)
960{
961	struct adapter *adapter = ifp->if_softc;
962
963	EM_LOCK(adapter);
964	/* If we are in this routine because of pause frames, then
965	 * don't reset the hardware.
966	 */
967	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
968		ifp->if_timer = EM_TX_TIMEOUT;
969		EM_UNLOCK(adapter);
970		return;
971	}
972
973	/*
974	 * Reclaim first as there is a possibility of losing Tx completion
975	 * interrupts. Possible cause of missing Tx completion interrupts
976	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
977	 * or chipset bug.
978	 */
979	em_txeof(adapter);
980	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
981		EM_UNLOCK(adapter);
982		return;
983	}
984
985	if (em_check_for_link(&adapter->hw) == 0)
986		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
987
988	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
989	adapter->watchdog_events++;
990
991	em_init_locked(adapter);
992	EM_UNLOCK(adapter);
993}
994
995/*********************************************************************
996 *  Init entry point
997 *
998 *  This routine is used in two ways. It is used by the stack as
999 *  init entry point in network interface structure. It is also used
1000 *  by the driver as a hw/sw initialization routine to get to a
1001 *  consistent state.
1002 *
1003 *  return 0 on success, positive on failure
1004 **********************************************************************/
1005
1006static void
1007em_init_locked(struct adapter *adapter)
1008{
1009	struct ifnet	*ifp = adapter->ifp;
1010	device_t	dev = adapter->dev;
1011	uint32_t	pba;
1012
1013	INIT_DEBUGOUT("em_init: begin");
1014
1015	EM_LOCK_ASSERT(adapter);
1016
1017	em_stop(adapter);
1018
1019	/*
1020	 * Packet Buffer Allocation (PBA)
1021	 * Writing PBA sets the receive portion of the buffer
1022	 * the remainder is used for the transmit buffer.
1023	 *
1024	 * Devices before the 82547 had a Packet Buffer of 64K.
1025	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1026	 * After the 82547 the buffer was reduced to 40K.
1027	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1028	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1029	 */
1030	switch (adapter->hw.mac_type) {
1031	case em_82547:
1032	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1033		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1034			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1035		else
1036			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1037		adapter->tx_fifo_head = 0;
1038		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1039		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1040		break;
1041	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1042	case em_82571: /* 82571: Total Packet Buffer is 48K */
1043	case em_82572: /* 82572: Total Packet Buffer is 48K */
1044			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1045		break;
1046	case em_82573: /* 82573: Total Packet Buffer is 32K */
1047		/* Jumbo frames not supported */
1048			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1049		break;
1050	case em_ich8lan:
1051		pba = E1000_PBA_8K;
1052		break;
1053	default:
1054		/* Devices before 82547 had a Packet Buffer of 64K.   */
1055		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1056			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1057		else
1058			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1059	}
1060
1061	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1062	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1063
1064	/* Get the latest mac address, User can use a LAA */
1065	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1066
1067	/* Initialize the hardware */
1068	if (em_hardware_init(adapter)) {
1069		device_printf(dev, "Unable to initialize the hardware\n");
1070		return;
1071	}
1072	em_update_link_status(adapter);
1073
1074	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1075		em_enable_vlans(adapter);
1076
1077	ifp->if_hwassist = 0;
1078	if (adapter->hw.mac_type >= em_82543) {
1079		if (ifp->if_capenable & IFCAP_TXCSUM)
1080			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1081		/*
1082		 * em_setup_transmit_structures() will behave differently
1083		 * based on the state of TSO.
1084		 */
1085		if (ifp->if_capenable & IFCAP_TSO)
1086			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1087	}
1088
1089	/* Prepare transmit descriptors and buffers */
1090	if (em_setup_transmit_structures(adapter)) {
1091		device_printf(dev, "Could not setup transmit structures\n");
1092		em_stop(adapter);
1093		return;
1094	}
1095	em_initialize_transmit_unit(adapter);
1096
1097	/* Setup Multicast table */
1098	em_set_multi(adapter);
1099
1100	/* Prepare receive descriptors and buffers */
1101	if (em_setup_receive_structures(adapter)) {
1102		device_printf(dev, "Could not setup receive structures\n");
1103		em_stop(adapter);
1104		return;
1105	}
1106	em_initialize_receive_unit(adapter);
1107
1108	/* Don't lose promiscuous settings */
1109	em_set_promisc(adapter);
1110
1111	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1112	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1113
1114	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1115	em_clear_hw_cntrs(&adapter->hw);
1116#ifdef DEVICE_POLLING
1117	/*
1118	 * Only enable interrupts if we are not polling, make sure
1119	 * they are off otherwise.
1120	 */
1121	if (ifp->if_capenable & IFCAP_POLLING)
1122		em_disable_intr(adapter);
1123	else
1124#endif /* DEVICE_POLLING */
1125		em_enable_intr(adapter);
1126
1127	/* Don't reset the phy next time init gets called */
1128	adapter->hw.phy_reset_disable = TRUE;
1129}
1130
1131static void
1132em_init(void *arg)
1133{
1134	struct adapter *adapter = arg;
1135
1136	EM_LOCK(adapter);
1137	em_init_locked(adapter);
1138	EM_UNLOCK(adapter);
1139}
1140
1141
1142#ifdef DEVICE_POLLING
1143/*********************************************************************
1144 *
1145 *  Legacy polling routine
1146 *
1147 *********************************************************************/
1148static void
1149em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1150{
1151	struct adapter *adapter = ifp->if_softc;
1152	uint32_t reg_icr;
1153
1154	EM_LOCK(adapter);
1155	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1156		EM_UNLOCK(adapter);
1157		return;
1158	}
1159
1160	if (cmd == POLL_AND_CHECK_STATUS) {
1161		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1162		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1163			callout_stop(&adapter->timer);
1164			adapter->hw.get_link_status = 1;
1165			em_check_for_link(&adapter->hw);
1166			em_update_link_status(adapter);
1167			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1168		}
1169	}
1170	em_rxeof(adapter, count);
1171	em_txeof(adapter);
1172
1173	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1174		em_start_locked(ifp);
1175	EM_UNLOCK(adapter);
1176}
1177
1178/*********************************************************************
1179 *
1180 *  Legacy Interrupt Service routine
1181 *
1182 *********************************************************************/
1183static void
1184em_intr(void *arg)
1185{
1186	struct adapter	*adapter = arg;
1187	struct ifnet	*ifp;
1188	uint32_t	reg_icr;
1189
1190	EM_LOCK(adapter);
1191
1192	ifp = adapter->ifp;
1193
1194	if (ifp->if_capenable & IFCAP_POLLING) {
1195		EM_UNLOCK(adapter);
1196		return;
1197	}
1198
1199	for (;;) {
1200		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1201		if (adapter->hw.mac_type >= em_82571 &&
1202		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1203			break;
1204		else if (reg_icr == 0)
1205			break;
1206
1207		/*
1208		 * XXX: some laptops trigger several spurious interrupts
1209		 * on em(4) when in the resume cycle. The ICR register
1210		 * reports all-ones value in this case. Processing such
1211		 * interrupts would lead to a freeze. I don't know why.
1212		 */
1213		if (reg_icr == 0xffffffff)
1214			break;
1215
1216		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1217			em_rxeof(adapter, -1);
1218			em_txeof(adapter);
1219		}
1220
1221		/* Link status change */
1222		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1223			callout_stop(&adapter->timer);
1224			adapter->hw.get_link_status = 1;
1225			em_check_for_link(&adapter->hw);
1226			em_update_link_status(adapter);
1227			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1228		}
1229
1230		if (reg_icr & E1000_ICR_RXO)
1231			adapter->rx_overruns++;
1232	}
1233
1234	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1235	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1236		em_start_locked(ifp);
1237
1238	EM_UNLOCK(adapter);
1239}
1240
1241#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1242
1243static void
1244em_handle_link(void *context, int pending)
1245{
1246	struct adapter	*adapter = context;
1247	struct ifnet *ifp;
1248
1249	ifp = adapter->ifp;
1250
1251	EM_LOCK(adapter);
1252
1253	callout_stop(&adapter->timer);
1254	adapter->hw.get_link_status = 1;
1255	em_check_for_link(&adapter->hw);
1256	em_update_link_status(adapter);
1257	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1258	EM_UNLOCK(adapter);
1259}
1260
1261static void
1262em_handle_rxtx(void *context, int pending)
1263{
1264	struct adapter	*adapter = context;
1265	struct ifnet	*ifp;
1266
1267	NET_LOCK_GIANT();
1268	ifp = adapter->ifp;
1269
1270	/*
1271	 * TODO:
1272	 * It should be possible to run the tx clean loop without the lock.
1273	 */
1274	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1275		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1276			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1277		EM_LOCK(adapter);
1278		em_txeof(adapter);
1279
1280		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1281			em_start_locked(ifp);
1282		EM_UNLOCK(adapter);
1283	}
1284
1285	em_enable_intr(adapter);
1286	NET_UNLOCK_GIANT();
1287}
1288
1289/*********************************************************************
1290 *
1291 *  Fast Interrupt Service routine
1292 *
1293 *********************************************************************/
1294static void
1295em_intr_fast(void *arg)
1296{
1297	struct adapter	*adapter = arg;
1298	struct ifnet	*ifp;
1299	uint32_t	reg_icr;
1300
1301	ifp = adapter->ifp;
1302
1303	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1304
1305	/* Hot eject?  */
1306	if (reg_icr == 0xffffffff)
1307		return;
1308
1309	/* Definitely not our interrupt.  */
1310	if (reg_icr == 0x0)
1311		return;
1312
1313	/*
1314	 * Starting with the 82571 chip, bit 31 should be used to
1315	 * determine whether the interrupt belongs to us.
1316	 */
1317	if (adapter->hw.mac_type >= em_82571 &&
1318	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1319		return;
1320
1321	/*
1322	 * Mask interrupts until the taskqueue is finished running.  This is
1323	 * cheap, just assume that it is needed.  This also works around the
1324	 * MSI message reordering errata on certain systems.
1325	 */
1326	em_disable_intr(adapter);
1327	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1328
1329	/* Link status change */
1330	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1331		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1332
1333	if (reg_icr & E1000_ICR_RXO)
1334		adapter->rx_overruns++;
1335}
1336#endif /* ! DEVICE_POLLING */
1337
1338/*********************************************************************
1339 *
1340 *  Media Ioctl callback
1341 *
1342 *  This routine is called whenever the user queries the status of
1343 *  the interface using ifconfig.
1344 *
1345 **********************************************************************/
1346static void
1347em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1348{
1349	struct adapter *adapter = ifp->if_softc;
1350
1351	INIT_DEBUGOUT("em_media_status: begin");
1352
1353	EM_LOCK(adapter);
1354	em_check_for_link(&adapter->hw);
1355	em_update_link_status(adapter);
1356
1357	ifmr->ifm_status = IFM_AVALID;
1358	ifmr->ifm_active = IFM_ETHER;
1359
1360	if (!adapter->link_active) {
1361		EM_UNLOCK(adapter);
1362		return;
1363	}
1364
1365	ifmr->ifm_status |= IFM_ACTIVE;
1366
1367	if ((adapter->hw.media_type == em_media_type_fiber) ||
1368	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1369		if (adapter->hw.mac_type == em_82545)
1370			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1371		else
1372			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1373	} else {
1374		switch (adapter->link_speed) {
1375		case 10:
1376			ifmr->ifm_active |= IFM_10_T;
1377			break;
1378		case 100:
1379			ifmr->ifm_active |= IFM_100_TX;
1380			break;
1381		case 1000:
1382			ifmr->ifm_active |= IFM_1000_T;
1383			break;
1384		}
1385		if (adapter->link_duplex == FULL_DUPLEX)
1386			ifmr->ifm_active |= IFM_FDX;
1387		else
1388			ifmr->ifm_active |= IFM_HDX;
1389	}
1390	EM_UNLOCK(adapter);
1391}
1392
1393/*********************************************************************
1394 *
1395 *  Media Ioctl callback
1396 *
1397 *  This routine is called when the user changes speed/duplex using
1398 *  media/mediopt option with ifconfig.
1399 *
1400 **********************************************************************/
1401static int
1402em_media_change(struct ifnet *ifp)
1403{
1404	struct adapter *adapter = ifp->if_softc;
1405	struct ifmedia  *ifm = &adapter->media;
1406
1407	INIT_DEBUGOUT("em_media_change: begin");
1408
1409	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1410		return (EINVAL);
1411
1412	EM_LOCK(adapter);
1413	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1414	case IFM_AUTO:
1415		adapter->hw.autoneg = DO_AUTO_NEG;
1416		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1417		break;
1418	case IFM_1000_LX:
1419	case IFM_1000_SX:
1420	case IFM_1000_T:
1421		adapter->hw.autoneg = DO_AUTO_NEG;
1422		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1423		break;
1424	case IFM_100_TX:
1425		adapter->hw.autoneg = FALSE;
1426		adapter->hw.autoneg_advertised = 0;
1427		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1428			adapter->hw.forced_speed_duplex = em_100_full;
1429		else
1430			adapter->hw.forced_speed_duplex = em_100_half;
1431		break;
1432	case IFM_10_T:
1433		adapter->hw.autoneg = FALSE;
1434		adapter->hw.autoneg_advertised = 0;
1435		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1436			adapter->hw.forced_speed_duplex = em_10_full;
1437		else
1438			adapter->hw.forced_speed_duplex = em_10_half;
1439		break;
1440	default:
1441		device_printf(adapter->dev, "Unsupported media type\n");
1442	}
1443
1444	/* As the speed/duplex settings my have changed we need to
1445	 * reset the PHY.
1446	 */
1447	adapter->hw.phy_reset_disable = FALSE;
1448
1449	em_init_locked(adapter);
1450	EM_UNLOCK(adapter);
1451
1452	return (0);
1453}
1454
1455/*********************************************************************
1456 *
1457 *  This routine maps the mbufs to tx descriptors.
1458 *
1459 *  return 0 on success, positive on failure
1460 **********************************************************************/
1461static int
1462em_encap(struct adapter *adapter, struct mbuf **m_headp)
1463{
1464	struct ifnet		*ifp = adapter->ifp;
1465	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1466	bus_dmamap_t		map;
1467	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1468	struct em_tx_desc	*current_tx_desc;
1469	struct mbuf		*m_head;
1470	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1471	int			nsegs, i, j, first, last = 0;
1472	int			error, do_tso, tso_desc = 0;
1473
1474	m_head = *m_headp;
1475	current_tx_desc = NULL;
1476	txd_upper = txd_lower = txd_used = txd_saved = 0;
1477
1478	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1479
1480	/*
1481	 * Force a cleanup if number of TX descriptors
1482	 * available hits the threshold.
1483	 */
1484	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1485		em_txeof(adapter);
1486		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1487			adapter->no_tx_desc_avail1++;
1488			return (ENOBUFS);
1489		}
1490	}
1491
1492	/*
1493	 * When operating in promiscuous mode, hardware stripping of the
1494	 * VLAN tag on receive is disabled.  This should not prevent us
1495	 * from doing hardware insertion of the VLAN tag here as that
1496	 * is controlled by the dma descriptor flags and not the receive
1497	 * tag strip setting.  Unfortunatly this hardware switches the
1498	 * VLAN encapsulation type from 802.1q to ISL when stripping om
1499	 * receive is disabled.  This means we have to add the vlan
1500	 * encapsulation here in the driver, since it will have come down
1501	 * from the VLAN layer with a tag instead of a VLAN header.
1502	 */
1503	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1504		struct ether_vlan_header *evl;
1505		struct ether_header eh;
1506
1507		m_head = m_pullup(m_head, sizeof(eh));
1508		if (m_head == NULL) {
1509			*m_headp = NULL;
1510			return (ENOBUFS);
1511		}
1512		eh = *mtod(m_head, struct ether_header *);
1513		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1514		if (m_head == NULL) {
1515			*m_headp = NULL;
1516			return (ENOBUFS);
1517		}
1518		m_head = m_pullup(m_head, sizeof(*evl));
1519		if (m_head == NULL) {
1520			*m_headp = NULL;
1521			return (ENOBUFS);
1522		}
1523		evl = mtod(m_head, struct ether_vlan_header *);
1524		bcopy(&eh, evl, sizeof(*evl));
1525		evl->evl_proto = evl->evl_encap_proto;
1526		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1527		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1528		*m_headp = m_head;
1529	}
1530
1531	/*
1532	 * TSO workaround:
1533	 *  If an mbuf contains only the IP and TCP header we have
1534	 *  to pull 4 bytes of data into it.
1535	 */
1536	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1537		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1538		*m_headp = m_head;
1539		if (m_head == NULL) {
1540			return (ENOBUFS);
1541		}
1542	}
1543
1544	/*
1545	 * Map the packet for DMA.
1546	 *
1547	 * Capture the first descriptor index,
1548	 * this descriptor will have the index
1549	 * of the EOP which is the only one that
1550	 * now gets a DONE bit writeback.
1551	 */
1552	first = adapter->next_avail_tx_desc;
1553	tx_buffer = &adapter->tx_buffer_area[first];
1554	tx_buffer_mapped = tx_buffer;
1555	map = tx_buffer->map;
1556
1557	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1558	    &nsegs, BUS_DMA_NOWAIT);
1559
1560	/*
1561	 * There are two types of errors we can (try) to handle:
1562	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1563	 *   out of segments.  Defragment the mbuf chain and try again.
1564	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1565	 *   at this point in time.  Defer sending and try again later.
1566	 * All other errors, in particular EINVAL, are fatal and prevent the
1567	 * mbuf chain from ever going through.  Drop it and report error.
1568	 */
1569	if (error == EFBIG) {
1570		struct mbuf *m;
1571
1572		m = m_defrag(*m_headp, M_DONTWAIT);
1573		if (m == NULL) {
1574			/* Assume m_defrag(9) used only m_get(9). */
1575			adapter->mbuf_alloc_failed++;
1576			m_freem(*m_headp);
1577			*m_headp = NULL;
1578			return (ENOBUFS);
1579		}
1580		*m_headp = m;
1581
1582		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1583		    segs, &nsegs, BUS_DMA_NOWAIT);
1584
1585		if (error == ENOMEM) {
1586			adapter->no_tx_dma_setup++;
1587			return (error);
1588		} else if (error != 0) {
1589			adapter->no_tx_dma_setup++;
1590			m_freem(*m_headp);
1591			*m_headp = NULL;
1592			return (error);
1593		}
1594	} else if (error == ENOMEM) {
1595		adapter->no_tx_dma_setup++;
1596		return (error);
1597	} else if (error != 0) {
1598		adapter->no_tx_dma_setup++;
1599		m_freem(*m_headp);
1600		*m_headp = NULL;
1601		return (error);
1602	}
1603
1604	/*
1605	 * TSO Hardware workaround, if this packet is not
1606	 * TSO, and is only a single descriptor long, and
1607	 * it follows a TSO burst, then we need to add a
1608	 * sentinel descriptor to prevent premature writeback.
1609	 */
1610	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1611		if (nsegs == 1)
1612			tso_desc = TRUE;
1613		adapter->tx_tso = FALSE;
1614	}
1615
1616	if (nsegs > adapter->num_tx_desc_avail - 2) {
1617		adapter->no_tx_desc_avail2++;
1618		bus_dmamap_unload(adapter->txtag, map);
1619		return (ENOBUFS);
1620	}
1621	m_head = *m_headp;
1622
1623	/* Do hardware assists */
1624	if (ifp->if_hwassist) {
1625		if (do_tso &&
1626		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1627			/* we need to make a final sentinel transmit desc */
1628			tso_desc = TRUE;
1629		} else
1630			em_transmit_checksum_setup(adapter,  m_head,
1631			    &txd_upper, &txd_lower);
1632	}
1633
1634	i = adapter->next_avail_tx_desc;
1635	if (adapter->pcix_82544)
1636		txd_saved = i;
1637
1638	for (j = 0; j < nsegs; j++) {
1639		bus_size_t seg_len;
1640		bus_addr_t seg_addr;
1641		/* If adapter is 82544 and on PCIX bus. */
1642		if(adapter->pcix_82544) {
1643			DESC_ARRAY	desc_array;
1644			uint32_t	array_elements, counter;
1645
1646			/*
1647			 * Check the Address and Length combination and
1648			 * split the data accordingly
1649			 */
1650			array_elements = em_fill_descriptors(segs[j].ds_addr,
1651			    segs[j].ds_len, &desc_array);
1652			for (counter = 0; counter < array_elements; counter++) {
1653				if (txd_used == adapter->num_tx_desc_avail) {
1654					adapter->next_avail_tx_desc = txd_saved;
1655					adapter->no_tx_desc_avail2++;
1656					bus_dmamap_unload(adapter->txtag, map);
1657					return (ENOBUFS);
1658				}
1659				tx_buffer = &adapter->tx_buffer_area[i];
1660				current_tx_desc = &adapter->tx_desc_base[i];
1661				current_tx_desc->buffer_addr = htole64(
1662					desc_array.descriptor[counter].address);
1663				current_tx_desc->lower.data = htole32(
1664					(adapter->txd_cmd | txd_lower |
1665					(uint16_t)desc_array.descriptor[counter].length));
1666				current_tx_desc->upper.data = htole32((txd_upper));
1667				last = i;
1668				if (++i == adapter->num_tx_desc)
1669					i = 0;
1670
1671				tx_buffer->m_head = NULL;
1672				tx_buffer->next_eop = -1;
1673				txd_used++;
1674			}
1675		} else {
1676			tx_buffer = &adapter->tx_buffer_area[i];
1677			current_tx_desc = &adapter->tx_desc_base[i];
1678			seg_addr = htole64(segs[j].ds_addr);
1679			seg_len  = segs[j].ds_len;
1680			/*
1681			** TSO Workaround:
1682			** If this is the last descriptor, we want to
1683			** split it so we have a small final sentinel
1684			*/
1685			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1686				seg_len -= 4;
1687				current_tx_desc->buffer_addr = seg_addr;
1688				current_tx_desc->lower.data = htole32(
1689				adapter->txd_cmd | txd_lower | seg_len);
1690				current_tx_desc->upper.data =
1691				    htole32(txd_upper);
1692				if (++i == adapter->num_tx_desc)
1693					i = 0;
1694				/* Now make the sentinel */
1695				++txd_used; /* using an extra txd */
1696				current_tx_desc = &adapter->tx_desc_base[i];
1697				tx_buffer = &adapter->tx_buffer_area[i];
1698				current_tx_desc->buffer_addr =
1699				    seg_addr + seg_len;
1700				current_tx_desc->lower.data = htole32(
1701				adapter->txd_cmd | txd_lower | 4);
1702				current_tx_desc->upper.data =
1703				    htole32(txd_upper);
1704				last = i;
1705				if (++i == adapter->num_tx_desc)
1706					i = 0;
1707			} else {
1708				current_tx_desc->buffer_addr = seg_addr;
1709				current_tx_desc->lower.data = htole32(
1710				adapter->txd_cmd | txd_lower | seg_len);
1711				current_tx_desc->upper.data =
1712				    htole32(txd_upper);
1713				last = i;
1714				if (++i == adapter->num_tx_desc)
1715					i = 0;
1716			}
1717			tx_buffer->m_head = NULL;
1718			tx_buffer->next_eop = -1;
1719		}
1720	}
1721
1722	adapter->next_avail_tx_desc = i;
1723	if (adapter->pcix_82544)
1724		adapter->num_tx_desc_avail -= txd_used;
1725	else {
1726		adapter->num_tx_desc_avail -= nsegs;
1727		if (tso_desc) /* TSO used an extra for sentinel */
1728			adapter->num_tx_desc_avail -= txd_used;
1729	}
1730
1731	if (m_head->m_flags & M_VLANTAG) {
1732		/* Set the vlan id. */
1733		current_tx_desc->upper.fields.special =
1734		    htole16(m_head->m_pkthdr.ether_vtag);
1735
1736		/* Tell hardware to add tag. */
1737		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1738	}
1739
1740	tx_buffer->m_head = m_head;
1741	tx_buffer_mapped->map = tx_buffer->map;
1742	tx_buffer->map = map;
1743	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1744
1745	/*
1746	 * Last Descriptor of Packet
1747	 * needs End Of Packet (EOP)
1748	 * and Report Status (RS)
1749	 */
1750	current_tx_desc->lower.data |=
1751	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1752	/*
1753	 * Keep track in the first buffer which
1754	 * descriptor will be written back
1755	 */
1756	tx_buffer = &adapter->tx_buffer_area[first];
1757	tx_buffer->next_eop = last;
1758
1759	/*
1760	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1761	 * that this frame is available to transmit.
1762	 */
1763	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1764	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1765
1766	if (adapter->hw.mac_type == em_82547 &&
1767	    adapter->link_duplex == HALF_DUPLEX)
1768		em_82547_move_tail_locked(adapter);
1769	else {
1770		E1000_WRITE_REG(&adapter->hw, TDT, i);
1771		if (adapter->hw.mac_type == em_82547)
1772			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1773	}
1774
1775	return (0);
1776}
1777
1778/*********************************************************************
1779 *
1780 * 82547 workaround to avoid controller hang in half-duplex environment.
1781 * The workaround is to avoid queuing a large packet that would span
1782 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1783 * in this case. We do that only when FIFO is quiescent.
1784 *
1785 **********************************************************************/
1786static void
1787em_82547_move_tail_locked(struct adapter *adapter)
1788{
1789	uint16_t hw_tdt;
1790	uint16_t sw_tdt;
1791	struct em_tx_desc *tx_desc;
1792	uint16_t length = 0;
1793	boolean_t eop = 0;
1794
1795	EM_LOCK_ASSERT(adapter);
1796
1797	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1798	sw_tdt = adapter->next_avail_tx_desc;
1799
1800	while (hw_tdt != sw_tdt) {
1801		tx_desc = &adapter->tx_desc_base[hw_tdt];
1802		length += tx_desc->lower.flags.length;
1803		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1804		if(++hw_tdt == adapter->num_tx_desc)
1805			hw_tdt = 0;
1806
1807		if (eop) {
1808			if (em_82547_fifo_workaround(adapter, length)) {
1809				adapter->tx_fifo_wrk_cnt++;
1810				callout_reset(&adapter->tx_fifo_timer, 1,
1811					em_82547_move_tail, adapter);
1812				break;
1813			}
1814			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1815			em_82547_update_fifo_head(adapter, length);
1816			length = 0;
1817		}
1818	}
1819}
1820
1821static void
1822em_82547_move_tail(void *arg)
1823{
1824	struct adapter *adapter = arg;
1825
1826	EM_LOCK(adapter);
1827	em_82547_move_tail_locked(adapter);
1828	EM_UNLOCK(adapter);
1829}
1830
1831static int
1832em_82547_fifo_workaround(struct adapter *adapter, int len)
1833{
1834	int fifo_space, fifo_pkt_len;
1835
1836	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1837
1838	if (adapter->link_duplex == HALF_DUPLEX) {
1839		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1840
1841		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1842			if (em_82547_tx_fifo_reset(adapter))
1843				return (0);
1844			else
1845				return (1);
1846		}
1847	}
1848
1849	return (0);
1850}
1851
1852static void
1853em_82547_update_fifo_head(struct adapter *adapter, int len)
1854{
1855	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1856
1857	/* tx_fifo_head is always 16 byte aligned */
1858	adapter->tx_fifo_head += fifo_pkt_len;
1859	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1860		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1861	}
1862}
1863
1864
1865static int
1866em_82547_tx_fifo_reset(struct adapter *adapter)
1867{
1868	uint32_t tctl;
1869
1870	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1871	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1872	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1873	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1874
1875		/* Disable TX unit */
1876		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1877		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1878
1879		/* Reset FIFO pointers */
1880		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1881		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1882		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1883		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1884
1885		/* Re-enable TX unit */
1886		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1887		E1000_WRITE_FLUSH(&adapter->hw);
1888
1889		adapter->tx_fifo_head = 0;
1890		adapter->tx_fifo_reset_cnt++;
1891
1892		return (TRUE);
1893	}
1894	else {
1895		return (FALSE);
1896	}
1897}
1898
1899static void
1900em_set_promisc(struct adapter *adapter)
1901{
1902	struct ifnet	*ifp = adapter->ifp;
1903	uint32_t	reg_rctl;
1904
1905	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1906
1907	if (ifp->if_flags & IFF_PROMISC) {
1908		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1909		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1910		/*
1911		 * Disable VLAN stripping in promiscous mode.
1912		 * This enables bridging of vlan tagged frames to occur
1913		 * and also allows vlan tags to be seen in tcpdump.
1914		 * XXX: This is a bit bogus as tcpdump may be used
1915		 * w/o promisc mode as well.
1916		 */
1917		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1918			em_disable_vlans(adapter);
1919		adapter->em_insert_vlan_header = 1;
1920	} else if (ifp->if_flags & IFF_ALLMULTI) {
1921		reg_rctl |= E1000_RCTL_MPE;
1922		reg_rctl &= ~E1000_RCTL_UPE;
1923		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1924		adapter->em_insert_vlan_header = 0;
1925	} else
1926		adapter->em_insert_vlan_header = 0;
1927}
1928
1929static void
1930em_disable_promisc(struct adapter *adapter)
1931{
1932	struct ifnet	*ifp = adapter->ifp;
1933	uint32_t	reg_rctl;
1934
1935	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1936
1937	reg_rctl &=  (~E1000_RCTL_UPE);
1938	reg_rctl &=  (~E1000_RCTL_MPE);
1939	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1940
1941	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1942		em_enable_vlans(adapter);
1943	adapter->em_insert_vlan_header = 0;
1944}
1945
1946
1947/*********************************************************************
1948 *  Multicast Update
1949 *
1950 *  This routine is called whenever multicast address list is updated.
1951 *
1952 **********************************************************************/
1953
1954static void
1955em_set_multi(struct adapter *adapter)
1956{
1957	struct ifnet	*ifp = adapter->ifp;
1958	struct ifmultiaddr *ifma;
1959	uint32_t reg_rctl = 0;
1960	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1961	int mcnt = 0;
1962
1963	IOCTL_DEBUGOUT("em_set_multi: begin");
1964
1965	if (adapter->hw.mac_type == em_82542_rev2_0) {
1966		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1967		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1968			em_pci_clear_mwi(&adapter->hw);
1969		reg_rctl |= E1000_RCTL_RST;
1970		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1971		msec_delay(5);
1972	}
1973
1974	IF_ADDR_LOCK(ifp);
1975	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1976		if (ifma->ifma_addr->sa_family != AF_LINK)
1977			continue;
1978
1979		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1980			break;
1981
1982		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1983		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1984		mcnt++;
1985	}
1986	IF_ADDR_UNLOCK(ifp);
1987
1988	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1989		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1990		reg_rctl |= E1000_RCTL_MPE;
1991		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1992	} else
1993		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1994
1995	if (adapter->hw.mac_type == em_82542_rev2_0) {
1996		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1997		reg_rctl &= ~E1000_RCTL_RST;
1998		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1999		msec_delay(5);
2000		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2001			em_pci_set_mwi(&adapter->hw);
2002	}
2003}
2004
2005
2006/*********************************************************************
2007 *  Timer routine
2008 *
2009 *  This routine checks for link status and updates statistics.
2010 *
2011 **********************************************************************/
2012
2013static void
2014em_local_timer(void *arg)
2015{
2016	struct adapter	*adapter = arg;
2017	struct ifnet	*ifp = adapter->ifp;
2018
2019	EM_LOCK(adapter);
2020
2021	em_check_for_link(&adapter->hw);
2022	em_update_link_status(adapter);
2023	em_update_stats_counters(adapter);
2024	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2025		em_print_hw_stats(adapter);
2026	em_smartspeed(adapter);
2027
2028	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2029
2030	EM_UNLOCK(adapter);
2031}
2032
2033static void
2034em_update_link_status(struct adapter *adapter)
2035{
2036	struct ifnet *ifp = adapter->ifp;
2037	device_t dev = adapter->dev;
2038
2039	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
2040		if (adapter->link_active == 0) {
2041			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
2042			    &adapter->link_duplex);
2043			/* Check if we may set SPEED_MODE bit on PCI-E */
2044			if ((adapter->link_speed == SPEED_1000) &&
2045			    ((adapter->hw.mac_type == em_82571) ||
2046			    (adapter->hw.mac_type == em_82572))) {
2047				int tarc0;
2048
2049				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2050				tarc0 |= SPEED_MODE_BIT;
2051				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2052			}
2053			if (bootverbose)
2054				device_printf(dev, "Link is up %d Mbps %s\n",
2055				    adapter->link_speed,
2056				    ((adapter->link_duplex == FULL_DUPLEX) ?
2057				    "Full Duplex" : "Half Duplex"));
2058			adapter->link_active = 1;
2059			adapter->smartspeed = 0;
2060			ifp->if_baudrate = adapter->link_speed * 1000000;
2061			if_link_state_change(ifp, LINK_STATE_UP);
2062		}
2063	} else {
2064		if (adapter->link_active == 1) {
2065			ifp->if_baudrate = adapter->link_speed = 0;
2066			adapter->link_duplex = 0;
2067			if (bootverbose)
2068				device_printf(dev, "Link is Down\n");
2069			adapter->link_active = 0;
2070			if_link_state_change(ifp, LINK_STATE_DOWN);
2071		}
2072	}
2073}
2074
2075/*********************************************************************
2076 *
2077 *  This routine disables all traffic on the adapter by issuing a
2078 *  global reset on the MAC and deallocates TX/RX buffers.
2079 *
2080 **********************************************************************/
2081
2082static void
2083em_stop(void *arg)
2084{
2085	struct adapter	*adapter = arg;
2086	struct ifnet	*ifp = adapter->ifp;
2087
2088	EM_LOCK_ASSERT(adapter);
2089
2090	INIT_DEBUGOUT("em_stop: begin");
2091
2092	em_disable_intr(adapter);
2093	em_reset_hw(&adapter->hw);
2094	callout_stop(&adapter->timer);
2095	callout_stop(&adapter->tx_fifo_timer);
2096	em_free_transmit_structures(adapter);
2097	em_free_receive_structures(adapter);
2098
2099	/* Tell the stack that the interface is no longer active */
2100	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2101}
2102
2103
2104/********************************************************************
2105 *
2106 *  Determine hardware revision.
2107 *
2108 **********************************************************************/
2109static void
2110em_identify_hardware(struct adapter *adapter)
2111{
2112	device_t dev = adapter->dev;
2113
2114	/* Make sure our PCI config space has the necessary stuff set */
2115	pci_enable_busmaster(dev);
2116	pci_enable_io(dev, SYS_RES_MEMORY);
2117	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2118
2119	/* Save off the information about this board */
2120	adapter->hw.vendor_id = pci_get_vendor(dev);
2121	adapter->hw.device_id = pci_get_device(dev);
2122	adapter->hw.revision_id = pci_get_revid(dev);
2123	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2124	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2125
2126	/* Identify the MAC */
2127	if (em_set_mac_type(&adapter->hw))
2128		device_printf(dev, "Unknown MAC Type\n");
2129
2130	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2131	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2132		adapter->hw.phy_init_script = TRUE;
2133}
2134
2135static int
2136em_allocate_pci_resources(struct adapter *adapter)
2137{
2138	device_t	dev = adapter->dev;
2139	int		val, rid;
2140
2141	rid = PCIR_BAR(0);
2142	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2143	    &rid, RF_ACTIVE);
2144	if (adapter->res_memory == NULL) {
2145		device_printf(dev, "Unable to allocate bus resource: memory\n");
2146		return (ENXIO);
2147	}
2148	adapter->osdep.mem_bus_space_tag =
2149	rman_get_bustag(adapter->res_memory);
2150	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2151	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2152
2153	if (adapter->hw.mac_type > em_82543) {
2154		/* Figure our where our IO BAR is ? */
2155		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2156			val = pci_read_config(dev, rid, 4);
2157			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2158				adapter->io_rid = rid;
2159				break;
2160			}
2161			rid += 4;
2162			/* check for 64bit BAR */
2163			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2164				rid += 4;
2165		}
2166		if (rid >= PCIR_CIS) {
2167			device_printf(dev, "Unable to locate IO BAR\n");
2168			return (ENXIO);
2169		}
2170		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2171		    &adapter->io_rid, RF_ACTIVE);
2172		if (adapter->res_ioport == NULL) {
2173			device_printf(dev, "Unable to allocate bus resource: "
2174			    "ioport\n");
2175			return (ENXIO);
2176		}
2177		adapter->hw.io_base = 0;
2178		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2179		adapter->osdep.io_bus_space_handle =
2180		    rman_get_bushandle(adapter->res_ioport);
2181	}
2182
2183	/* For ICH8 we need to find the flash memory. */
2184	if (adapter->hw.mac_type == em_ich8lan) {
2185		rid = EM_FLASH;
2186
2187		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2188		    &rid, RF_ACTIVE);
2189		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2190		adapter->osdep.flash_bus_space_handle =
2191		    rman_get_bushandle(adapter->flash_mem);
2192	}
2193
2194	rid = 0x0;
2195	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2196	    RF_SHAREABLE | RF_ACTIVE);
2197	if (adapter->res_interrupt == NULL) {
2198		device_printf(dev, "Unable to allocate bus resource: "
2199		    "interrupt\n");
2200		return (ENXIO);
2201	}
2202
2203	adapter->hw.back = &adapter->osdep;
2204
2205	return (0);
2206}
2207
2208int
2209em_allocate_intr(struct adapter *adapter)
2210{
2211	device_t dev = adapter->dev;
2212	int error;
2213
2214	/* Manually turn off all interrupts */
2215	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2216
2217#ifdef DEVICE_POLLING
2218	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2219	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2220	    &adapter->int_handler_tag)) != 0) {
2221		device_printf(dev, "Failed to register interrupt handler");
2222		return (error);
2223	}
2224#else
2225	/*
2226	 * Try allocating a fast interrupt and the associated deferred
2227	 * processing contexts.
2228	 */
2229	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2230	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2231	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2232	    taskqueue_thread_enqueue, &adapter->tq);
2233	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2234	    device_get_nameunit(adapter->dev));
2235	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2236	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2237	    &adapter->int_handler_tag)) != 0) {
2238		device_printf(dev, "Failed to register fast interrupt "
2239			    "handler: %d\n", error);
2240		taskqueue_free(adapter->tq);
2241		adapter->tq = NULL;
2242		return (error);
2243	}
2244#endif
2245
2246	em_enable_intr(adapter);
2247	return (0);
2248}
2249
2250static void
2251em_free_intr(struct adapter *adapter)
2252{
2253	device_t dev = adapter->dev;
2254
2255	if (adapter->int_handler_tag != NULL) {
2256		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2257		adapter->int_handler_tag = NULL;
2258	}
2259	if (adapter->tq != NULL) {
2260		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2261		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2262		taskqueue_free(adapter->tq);
2263		adapter->tq = NULL;
2264	}
2265}
2266
2267static void
2268em_free_pci_resources(struct adapter *adapter)
2269{
2270	device_t dev = adapter->dev;
2271
2272	if (adapter->res_interrupt != NULL)
2273		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2274
2275	if (adapter->res_memory != NULL)
2276		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2277		    adapter->res_memory);
2278
2279	if (adapter->flash_mem != NULL)
2280		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2281		    adapter->flash_mem);
2282
2283	if (adapter->res_ioport != NULL)
2284		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2285		    adapter->res_ioport);
2286}
2287
2288/*********************************************************************
2289 *
2290 *  Initialize the hardware to a configuration as specified by the
2291 *  adapter structure. The controller is reset, the EEPROM is
2292 *  verified, the MAC address is set, then the shared initialization
2293 *  routines are called.
2294 *
2295 **********************************************************************/
2296static int
2297em_hardware_init(struct adapter *adapter)
2298{
2299	device_t dev = adapter->dev;
2300	uint16_t rx_buffer_size;
2301
2302	INIT_DEBUGOUT("em_hardware_init: begin");
2303	/* Issue a global reset */
2304	em_reset_hw(&adapter->hw);
2305
2306	/* When hardware is reset, fifo_head is also reset */
2307	adapter->tx_fifo_head = 0;
2308
2309	/* Make sure we have a good EEPROM before we read from it */
2310	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2311		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2312		return (EIO);
2313	}
2314
2315	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2316		device_printf(dev, "EEPROM read error while reading part "
2317		    "number\n");
2318		return (EIO);
2319	}
2320
2321	/* Set up smart power down as default off on newer adapters. */
2322	if (!em_smart_pwr_down &&
2323	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2324		uint16_t phy_tmp = 0;
2325
2326		/* Speed up time to link by disabling smart power down. */
2327		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2328		phy_tmp &= ~IGP02E1000_PM_SPD;
2329		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2330	}
2331
2332	/*
2333	 * These parameters control the automatic generation (Tx) and
2334	 * response (Rx) to Ethernet PAUSE frames.
2335	 * - High water mark should allow for at least two frames to be
2336	 *   received after sending an XOFF.
2337	 * - Low water mark works best when it is very near the high water mark.
2338	 *   This allows the receiver to restart by sending XON when it has
2339	 *   drained a bit. Here we use an arbitary value of 1500 which will
2340	 *   restart after one full frame is pulled from the buffer. There
2341	 *   could be several smaller frames in the buffer and if so they will
2342	 *   not trigger the XON until their total number reduces the buffer
2343	 *   by 1500.
2344	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2345	 */
2346	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2347
2348	adapter->hw.fc_high_water = rx_buffer_size -
2349	    roundup2(adapter->hw.max_frame_size, 1024);
2350	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2351	if (adapter->hw.mac_type == em_80003es2lan)
2352		adapter->hw.fc_pause_time = 0xFFFF;
2353	else
2354		adapter->hw.fc_pause_time = 0x1000;
2355	adapter->hw.fc_send_xon = TRUE;
2356	adapter->hw.fc = E1000_FC_FULL;
2357
2358	if (em_init_hw(&adapter->hw) < 0) {
2359		device_printf(dev, "Hardware Initialization Failed");
2360		return (EIO);
2361	}
2362
2363	em_check_for_link(&adapter->hw);
2364
2365	return (0);
2366}
2367
2368/*********************************************************************
2369 *
2370 *  Setup networking device structure and register an interface.
2371 *
2372 **********************************************************************/
2373static void
2374em_setup_interface(device_t dev, struct adapter *adapter)
2375{
2376	struct ifnet   *ifp;
2377	INIT_DEBUGOUT("em_setup_interface: begin");
2378
2379	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2380	if (ifp == NULL)
2381		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2382	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2383	ifp->if_mtu = ETHERMTU;
2384	ifp->if_init =  em_init;
2385	ifp->if_softc = adapter;
2386	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2387	ifp->if_ioctl = em_ioctl;
2388	ifp->if_start = em_start;
2389	ifp->if_watchdog = em_watchdog;
2390	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2391	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2392	IFQ_SET_READY(&ifp->if_snd);
2393
2394	ether_ifattach(ifp, adapter->hw.mac_addr);
2395
2396	ifp->if_capabilities = ifp->if_capenable = 0;
2397
2398	if (adapter->hw.mac_type >= em_82543) {
2399		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2400		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2401	}
2402
2403	/* Enable TSO if available */
2404	if ((adapter->hw.mac_type > em_82544) &&
2405	    (adapter->hw.mac_type != em_82547)) {
2406		ifp->if_capabilities |= IFCAP_TSO4;
2407		ifp->if_capenable |= IFCAP_TSO4;
2408	}
2409
2410	/*
2411	 * Tell the upper layer(s) we support long frames.
2412	 */
2413	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2414	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2415	ifp->if_capenable |= IFCAP_VLAN_MTU;
2416
2417#ifdef DEVICE_POLLING
2418	ifp->if_capabilities |= IFCAP_POLLING;
2419#endif
2420
2421	/*
2422	 * Specify the media types supported by this adapter and register
2423	 * callbacks to update media and link information
2424	 */
2425	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2426	    em_media_status);
2427	if ((adapter->hw.media_type == em_media_type_fiber) ||
2428	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2429		u_char fiber_type = IFM_1000_SX;	/* default type; */
2430
2431		if (adapter->hw.mac_type == em_82545)
2432			fiber_type = IFM_1000_LX;
2433		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2434		    0, NULL);
2435		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2436	} else {
2437		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2438		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2439			    0, NULL);
2440		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2441			    0, NULL);
2442		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2443			    0, NULL);
2444		if (adapter->hw.phy_type != em_phy_ife) {
2445			ifmedia_add(&adapter->media,
2446				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2447			ifmedia_add(&adapter->media,
2448				IFM_ETHER | IFM_1000_T, 0, NULL);
2449		}
2450	}
2451	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2452	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2453}
2454
2455
2456/*********************************************************************
2457 *
2458 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2459 *
2460 **********************************************************************/
2461static void
2462em_smartspeed(struct adapter *adapter)
2463{
2464	uint16_t phy_tmp;
2465
2466	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2467	    adapter->hw.autoneg == 0 ||
2468	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2469		return;
2470
2471	if (adapter->smartspeed == 0) {
2472		/* If Master/Slave config fault is asserted twice,
2473		 * we assume back-to-back */
2474		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2475		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2476			return;
2477		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2478		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2479			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2480			if(phy_tmp & CR_1000T_MS_ENABLE) {
2481				phy_tmp &= ~CR_1000T_MS_ENABLE;
2482				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2483				    phy_tmp);
2484				adapter->smartspeed++;
2485				if(adapter->hw.autoneg &&
2486				   !em_phy_setup_autoneg(&adapter->hw) &&
2487				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2488				    &phy_tmp)) {
2489					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2490						    MII_CR_RESTART_AUTO_NEG);
2491					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2492					    phy_tmp);
2493				}
2494			}
2495		}
2496		return;
2497	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2498		/* If still no link, perhaps using 2/3 pair cable */
2499		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2500		phy_tmp |= CR_1000T_MS_ENABLE;
2501		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2502		if(adapter->hw.autoneg &&
2503		   !em_phy_setup_autoneg(&adapter->hw) &&
2504		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2505			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2506				    MII_CR_RESTART_AUTO_NEG);
2507			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2508		}
2509	}
2510	/* Restart process after EM_SMARTSPEED_MAX iterations */
2511	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2512		adapter->smartspeed = 0;
2513}
2514
2515
2516/*
2517 * Manage DMA'able memory.
2518 */
2519static void
2520em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2521{
2522	if (error)
2523		return;
2524	*(bus_addr_t *) arg = segs[0].ds_addr;
2525}
2526
2527static int
2528em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2529	int mapflags)
2530{
2531	int error;
2532
2533	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2534				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2535				BUS_SPACE_MAXADDR,	/* lowaddr */
2536				BUS_SPACE_MAXADDR,	/* highaddr */
2537				NULL, NULL,		/* filter, filterarg */
2538				size,			/* maxsize */
2539				1,			/* nsegments */
2540				size,			/* maxsegsize */
2541				0,			/* flags */
2542				NULL,			/* lockfunc */
2543				NULL,			/* lockarg */
2544				&dma->dma_tag);
2545	if (error) {
2546		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2547		    __func__, error);
2548		goto fail_0;
2549	}
2550
2551	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2552	    BUS_DMA_NOWAIT, &dma->dma_map);
2553	if (error) {
2554		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2555		    __func__, (uintmax_t)size, error);
2556		goto fail_2;
2557	}
2558
2559	dma->dma_paddr = 0;
2560	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2561	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2562	if (error || dma->dma_paddr == 0) {
2563		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2564		    __func__, error);
2565		goto fail_3;
2566	}
2567
2568	return (0);
2569
2570fail_3:
2571	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2572fail_2:
2573	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2574	bus_dma_tag_destroy(dma->dma_tag);
2575fail_0:
2576	dma->dma_map = NULL;
2577	dma->dma_tag = NULL;
2578
2579	return (error);
2580}
2581
2582static void
2583em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2584{
2585	if (dma->dma_tag == NULL)
2586		return;
2587	if (dma->dma_map != NULL) {
2588		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2589		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2590		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2591		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2592		dma->dma_map = NULL;
2593	}
2594	bus_dma_tag_destroy(dma->dma_tag);
2595	dma->dma_tag = NULL;
2596}
2597
2598
2599/*********************************************************************
2600 *
2601 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2602 *  the information needed to transmit a packet on the wire.
2603 *
2604 **********************************************************************/
2605static int
2606em_allocate_transmit_structures(struct adapter *adapter)
2607{
2608	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2609	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2610	if (adapter->tx_buffer_area == NULL) {
2611		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2612		return (ENOMEM);
2613	}
2614
2615	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2616
2617	return (0);
2618}
2619
2620/*********************************************************************
2621 *
2622 *  Allocate and initialize transmit structures.
2623 *
2624 **********************************************************************/
2625static int
2626em_setup_transmit_structures(struct adapter *adapter)
2627{
2628	struct ifnet   *ifp = adapter->ifp;
2629	device_t dev = adapter->dev;
2630	struct em_buffer *tx_buffer;
2631	bus_size_t size, segsize;
2632	int error, i;
2633
2634	/*
2635	 * Setup DMA descriptor areas.
2636	 */
2637	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2638
2639	/* Overrides for TSO - want large sizes */
2640	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2641		size = EM_TSO_SIZE;
2642		segsize = PAGE_SIZE;
2643	}
2644
2645	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2646				1, 0,			/* alignment, bounds */
2647				BUS_SPACE_MAXADDR,	/* lowaddr */
2648				BUS_SPACE_MAXADDR,	/* highaddr */
2649				NULL, NULL,		/* filter, filterarg */
2650				size,			/* maxsize */
2651				EM_MAX_SCATTER,		/* nsegments */
2652				segsize,		/* maxsegsize */
2653				0,			/* flags */
2654				NULL,		/* lockfunc */
2655				NULL,		/* lockarg */
2656				&adapter->txtag)) != 0) {
2657		device_printf(dev, "Unable to allocate TX DMA tag\n");
2658		goto fail;
2659	}
2660
2661	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2662		goto fail;
2663
2664	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2665	tx_buffer = adapter->tx_buffer_area;
2666	for (i = 0; i < adapter->num_tx_desc; i++) {
2667		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2668		if (error != 0) {
2669			device_printf(dev, "Unable to create TX DMA map\n");
2670			goto fail;
2671		}
2672		tx_buffer++;
2673	}
2674
2675	adapter->next_avail_tx_desc = 0;
2676	adapter->next_tx_to_clean = 0;
2677
2678	/* Set number of descriptors available */
2679	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2680
2681	/* Set checksum context */
2682	adapter->active_checksum_context = OFFLOAD_NONE;
2683	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2684	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2685
2686	return (0);
2687
2688fail:
2689	em_free_transmit_structures(adapter);
2690	return (error);
2691}
2692
2693/*********************************************************************
2694 *
2695 *  Enable transmit unit.
2696 *
2697 **********************************************************************/
2698static void
2699em_initialize_transmit_unit(struct adapter *adapter)
2700{
2701	uint32_t	reg_tctl;
2702	uint32_t	reg_tipg = 0;
2703	uint64_t	bus_addr;
2704
2705	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2706	/* Setup the Base and Length of the Tx Descriptor Ring */
2707	bus_addr = adapter->txdma.dma_paddr;
2708	E1000_WRITE_REG(&adapter->hw, TDLEN,
2709	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2710	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2711	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2712
2713	/* Setup the HW Tx Head and Tail descriptor pointers */
2714	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2715	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2716
2717
2718	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2719	    E1000_READ_REG(&adapter->hw, TDLEN));
2720
2721	/* Set the default values for the Tx Inter Packet Gap timer */
2722	switch (adapter->hw.mac_type) {
2723	case em_82542_rev2_0:
2724	case em_82542_rev2_1:
2725		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2726		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2727		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2728		break;
2729	case em_80003es2lan:
2730		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2731		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2732		    E1000_TIPG_IPGR2_SHIFT;
2733		break;
2734	default:
2735		if ((adapter->hw.media_type == em_media_type_fiber) ||
2736		    (adapter->hw.media_type == em_media_type_internal_serdes))
2737			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2738		else
2739			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2740		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2741		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2742	}
2743
2744	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2745	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2746	if(adapter->hw.mac_type >= em_82540)
2747		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2748
2749	/* Program the Transmit Control Register */
2750	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2751		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2752	if (adapter->hw.mac_type >= em_82571)
2753		reg_tctl |= E1000_TCTL_MULR;
2754	if (adapter->link_duplex == FULL_DUPLEX) {
2755		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2756	} else {
2757		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2758	}
2759	/* This write will effectively turn on the transmit unit. */
2760	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2761
2762	/* Setup Transmit Descriptor Base Settings */
2763	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2764
2765	if (adapter->tx_int_delay.value > 0)
2766		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2767}
2768
2769/*********************************************************************
2770 *
2771 *  Free all transmit related data structures.
2772 *
2773 **********************************************************************/
2774static void
2775em_free_transmit_structures(struct adapter *adapter)
2776{
2777	struct em_buffer *tx_buffer;
2778	int i;
2779
2780	INIT_DEBUGOUT("free_transmit_structures: begin");
2781
2782	if (adapter->tx_buffer_area != NULL) {
2783		tx_buffer = adapter->tx_buffer_area;
2784		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2785			if (tx_buffer->m_head != NULL) {
2786				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2787				    BUS_DMASYNC_POSTWRITE);
2788				bus_dmamap_unload(adapter->txtag,
2789				    tx_buffer->map);
2790				m_freem(tx_buffer->m_head);
2791				tx_buffer->m_head = NULL;
2792			} else if (tx_buffer->map != NULL)
2793				bus_dmamap_unload(adapter->txtag,
2794				    tx_buffer->map);
2795			if (tx_buffer->map != NULL) {
2796				bus_dmamap_destroy(adapter->txtag,
2797				    tx_buffer->map);
2798				tx_buffer->map = NULL;
2799			}
2800		}
2801	}
2802	if (adapter->tx_buffer_area != NULL) {
2803		free(adapter->tx_buffer_area, M_DEVBUF);
2804		adapter->tx_buffer_area = NULL;
2805	}
2806	if (adapter->txtag != NULL) {
2807		bus_dma_tag_destroy(adapter->txtag);
2808		adapter->txtag = NULL;
2809	}
2810}
2811
2812/*********************************************************************
2813 *
2814 *  The offload context needs to be set when we transfer the first
2815 *  packet of a particular protocol (TCP/UDP). We change the
2816 *  context only if the protocol type changes.
2817 *
2818 **********************************************************************/
2819static void
2820em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2821    uint32_t *txd_upper, uint32_t *txd_lower)
2822{
2823	struct em_context_desc *TXD;
2824	struct em_buffer *tx_buffer;
2825	struct ether_vlan_header *eh;
2826	struct ip *ip;
2827	struct ip6_hdr *ip6;
2828	struct tcp_hdr *th;
2829	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2830	uint32_t cmd = 0;
2831	uint16_t etype;
2832	uint8_t ipproto;
2833
2834	/* Setup checksum offload context. */
2835	curr_txd = adapter->next_avail_tx_desc;
2836	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2837	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2838
2839	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2840		     E1000_TXD_DTYP_D;		/* Data descr */
2841
2842	/*
2843	 * Determine where frame payload starts.
2844	 * Jump over vlan headers if already present,
2845	 * helpful for QinQ too.
2846	 */
2847	eh = mtod(mp, struct ether_vlan_header *);
2848	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2849		etype = ntohs(eh->evl_proto);
2850		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2851	} else {
2852		etype = ntohs(eh->evl_encap_proto);
2853		ehdrlen = ETHER_HDR_LEN;
2854	}
2855
2856	/*
2857	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2858	 * TODO: Support SCTP too when it hits the tree.
2859	 */
2860	switch (etype) {
2861	case ETHERTYPE_IP:
2862		ip = (struct ip *)(mp->m_data + ehdrlen);
2863		ip_hlen = ip->ip_hl << 2;
2864
2865		/* Setup of IP header checksum. */
2866		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2867			/*
2868			 * Start offset for header checksum calculation.
2869			 * End offset for header checksum calculation.
2870			 * Offset of place to put the checksum.
2871			 */
2872			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2873			TXD->lower_setup.ip_fields.ipcse =
2874			    htole16(ehdrlen + ip_hlen);
2875			TXD->lower_setup.ip_fields.ipcso =
2876			    ehdrlen + offsetof(struct ip, ip_sum);
2877			cmd |= E1000_TXD_CMD_IP;
2878			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2879		}
2880
2881		if (mp->m_len < ehdrlen + ip_hlen)
2882			return;	/* failure */
2883
2884		hdr_len = ehdrlen + ip_hlen;
2885		ipproto = ip->ip_p;
2886
2887		break;
2888	case ETHERTYPE_IPV6:
2889		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2890		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2891
2892		if (mp->m_len < ehdrlen + ip_hlen)
2893			return;	/* failure */
2894
2895		/* IPv6 doesn't have a header checksum. */
2896
2897		hdr_len = ehdrlen + ip_hlen;
2898		ipproto = ip6->ip6_nxt;
2899
2900		break;
2901	default:
2902		*txd_upper = 0;
2903		*txd_lower = 0;
2904		return;
2905	}
2906
2907	switch (ipproto) {
2908	case IPPROTO_TCP:
2909		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2910			/*
2911			 * Start offset for payload checksum calculation.
2912			 * End offset for payload checksum calculation.
2913			 * Offset of place to put the checksum.
2914			 */
2915			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2916			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2917			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2918			TXD->upper_setup.tcp_fields.tucso =
2919			    hdr_len + offsetof(struct tcphdr, th_sum);
2920			cmd |= E1000_TXD_CMD_TCP;
2921			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2922		}
2923		break;
2924	case IPPROTO_UDP:
2925		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2926			/*
2927			 * Start offset for header checksum calculation.
2928			 * End offset for header checksum calculation.
2929			 * Offset of place to put the checksum.
2930			 */
2931			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2932			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2933			TXD->upper_setup.tcp_fields.tucso =
2934			    hdr_len + offsetof(struct udphdr, uh_sum);
2935			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2936		}
2937		break;
2938	default:
2939		break;
2940	}
2941
2942	TXD->tcp_seg_setup.data = htole32(0);
2943	TXD->cmd_and_length =
2944	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2945	tx_buffer->m_head = NULL;
2946	tx_buffer->next_eop = -1;
2947
2948	if (++curr_txd == adapter->num_tx_desc)
2949		curr_txd = 0;
2950
2951	adapter->num_tx_desc_avail--;
2952	adapter->next_avail_tx_desc = curr_txd;
2953}
2954
2955/**********************************************************************
2956 *
2957 *  Setup work for hardware segmentation offload (TSO)
2958 *
2959 **********************************************************************/
2960static boolean_t
2961em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2962   uint32_t *txd_lower)
2963{
2964	struct em_context_desc *TXD;
2965	struct em_buffer *tx_buffer;
2966	struct ether_vlan_header *eh;
2967	struct ip *ip;
2968	struct ip6_hdr *ip6;
2969	struct tcphdr *th;
2970	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2971	uint16_t etype;
2972
2973	/*
2974	 * XXX: This is not really correct as the stack would not have
2975	 * set up all checksums.
2976	 * XXX: Return FALSE is not sufficient as we may have to return
2977	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2978	 * and 1 (success).
2979	 */
2980	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2981		return FALSE;	/* 0 */
2982
2983	/*
2984	 * This function could/should be extended to support IP/IPv6
2985	 * fragmentation as well.  But as they say, one step at a time.
2986	 */
2987
2988	/*
2989	 * Determine where frame payload starts.
2990	 * Jump over vlan headers if already present,
2991	 * helpful for QinQ too.
2992	 */
2993	eh = mtod(mp, struct ether_vlan_header *);
2994	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2995		etype = ntohs(eh->evl_proto);
2996		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2997	} else {
2998		etype = ntohs(eh->evl_encap_proto);
2999		ehdrlen = ETHER_HDR_LEN;
3000	}
3001
3002	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3003	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3004		return FALSE;	/* -1 */
3005
3006	/*
3007	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3008	 * TODO: Support SCTP too when it hits the tree.
3009	 */
3010	switch (etype) {
3011	case ETHERTYPE_IP:
3012		isip6 = 0;
3013		ip = (struct ip *)(mp->m_data + ehdrlen);
3014		if (ip->ip_p != IPPROTO_TCP)
3015			return FALSE;	/* 0 */
3016		ip->ip_len = 0;
3017		ip->ip_sum = 0;
3018		ip_hlen = ip->ip_hl << 2;
3019		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3020			return FALSE;	/* -1 */
3021		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3022#if 1
3023		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3024		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3025#else
3026		th->th_sum = mp->m_pkthdr.csum_data;
3027#endif
3028		break;
3029	case ETHERTYPE_IPV6:
3030		isip6 = 1;
3031		return FALSE;			/* Not supported yet. */
3032		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3033		if (ip6->ip6_nxt != IPPROTO_TCP)
3034			return FALSE;	/* 0 */
3035		ip6->ip6_plen = 0;
3036		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3037		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3038			return FALSE;	/* -1 */
3039		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3040#if 0
3041		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3042		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3043#else
3044		th->th_sum = mp->m_pkthdr.csum_data;
3045#endif
3046		break;
3047	default:
3048		return FALSE;
3049	}
3050	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3051
3052	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3053		      E1000_TXD_DTYP_D |	/* Data descr type */
3054		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3055
3056	/* IP and/or TCP header checksum calculation and insertion. */
3057	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3058		      E1000_TXD_POPTS_TXSM) << 8;
3059
3060	curr_txd = adapter->next_avail_tx_desc;
3061	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3062	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3063
3064	/* IPv6 doesn't have a header checksum. */
3065	if (!isip6) {
3066		/*
3067		 * Start offset for header checksum calculation.
3068		 * End offset for header checksum calculation.
3069		 * Offset of place put the checksum.
3070		 */
3071		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3072		TXD->lower_setup.ip_fields.ipcse =
3073		    htole16(ehdrlen + ip_hlen - 1);
3074		TXD->lower_setup.ip_fields.ipcso =
3075		    ehdrlen + offsetof(struct ip, ip_sum);
3076	}
3077	/*
3078	 * Start offset for payload checksum calculation.
3079	 * End offset for payload checksum calculation.
3080	 * Offset of place to put the checksum.
3081	 */
3082	TXD->upper_setup.tcp_fields.tucss =
3083	    ehdrlen + ip_hlen;
3084	TXD->upper_setup.tcp_fields.tucse = 0;
3085	TXD->upper_setup.tcp_fields.tucso =
3086	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3087	/*
3088	 * Payload size per packet w/o any headers.
3089	 * Length of all headers up to payload.
3090	 */
3091	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3092	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3093
3094	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3095				E1000_TXD_CMD_DEXT |	/* Extended descr */
3096				E1000_TXD_CMD_TSE |	/* TSE context */
3097				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3098				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3099				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3100
3101	tx_buffer->m_head = NULL;
3102
3103	if (++curr_txd == adapter->num_tx_desc)
3104		curr_txd = 0;
3105
3106	adapter->num_tx_desc_avail--;
3107	adapter->next_avail_tx_desc = curr_txd;
3108	adapter->tx_tso = TRUE;
3109
3110	return TRUE;
3111}
3112
3113/**********************************************************************
3114 *
3115 *  Examine each tx_buffer in the used queue. If the hardware is done
3116 *  processing the packet then free associated resources. The
3117 *  tx_buffer is put back on the free queue.
3118 *
3119 **********************************************************************/
3120static void
3121em_txeof(struct adapter *adapter)
3122{
3123	int first, last, done, num_avail;
3124	struct em_buffer *tx_buffer;
3125	struct em_tx_desc   *tx_desc, *eop_desc;
3126	struct ifnet   *ifp = adapter->ifp;
3127
3128	EM_LOCK_ASSERT(adapter);
3129
3130	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3131		return;
3132
3133	num_avail = adapter->num_tx_desc_avail;
3134	first = adapter->next_tx_to_clean;
3135	tx_desc = &adapter->tx_desc_base[first];
3136	tx_buffer = &adapter->tx_buffer_area[first];
3137	last = tx_buffer->next_eop;
3138	eop_desc = &adapter->tx_desc_base[last];
3139
3140	/*
3141	 * Now calculate the terminating index
3142	 * for the cleanup loop below.
3143	 */
3144	if (++last == adapter->num_tx_desc)
3145		last = 0;
3146	done = last;
3147
3148	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3149	    BUS_DMASYNC_POSTREAD);
3150	while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3151		/* We clean the range of the packet */
3152		while (first != done) {
3153			tx_desc->upper.data = 0;
3154			tx_desc->lower.data = 0;
3155			num_avail++;
3156
3157			if (tx_buffer->m_head) {
3158				ifp->if_opackets++;
3159				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3160				    BUS_DMASYNC_POSTWRITE);
3161				bus_dmamap_unload(adapter->txtag,
3162				    tx_buffer->map);
3163
3164				m_freem(tx_buffer->m_head);
3165				tx_buffer->m_head = NULL;
3166			}
3167			tx_buffer->next_eop = -1;
3168
3169			if (++first == adapter->num_tx_desc)
3170				first = 0;
3171
3172			tx_buffer = &adapter->tx_buffer_area[first];
3173			tx_desc = &adapter->tx_desc_base[first];
3174		}
3175		/* See if we can continue to the next packet */
3176		last = tx_buffer->next_eop;
3177		if (last != -1) {
3178			eop_desc = &adapter->tx_desc_base[last];
3179			/* Get new done point */
3180			if (++last == adapter->num_tx_desc)
3181				last = 0;
3182			done = last;
3183		} else
3184			break;
3185	}
3186	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3187	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3188
3189	adapter->next_tx_to_clean = first;
3190
3191	/*
3192	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3193	 * that it is OK to send packets.
3194	 * If there are no pending descriptors, clear the timeout. Otherwise,
3195	 * if some descriptors have been freed, restart the timeout.
3196	 */
3197	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3198		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3199		if (num_avail == adapter->num_tx_desc)
3200			ifp->if_timer = 0;
3201		else if (num_avail != adapter->num_tx_desc_avail)
3202			ifp->if_timer = EM_TX_TIMEOUT;
3203	}
3204	adapter->num_tx_desc_avail = num_avail;
3205}
3206
3207/*********************************************************************
3208 *
3209 *  Get a buffer from system mbuf buffer pool.
3210 *
3211 **********************************************************************/
3212static int
3213em_get_buf(struct adapter *adapter, int i)
3214{
3215	struct mbuf		*m;
3216	bus_dma_segment_t	segs[1];
3217	bus_dmamap_t		map;
3218	struct em_buffer	*rx_buffer;
3219	int			error, nsegs;
3220
3221	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3222	if (m == NULL) {
3223		adapter->mbuf_cluster_failed++;
3224		return (ENOBUFS);
3225	}
3226	m->m_len = m->m_pkthdr.len = MCLBYTES;
3227	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3228		m_adj(m, ETHER_ALIGN);
3229
3230	/*
3231	 * Using memory from the mbuf cluster pool, invoke the
3232	 * bus_dma machinery to arrange the memory mapping.
3233	 */
3234	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3235	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3236	if (error != 0) {
3237		m_free(m);
3238		return (error);
3239	}
3240	/* If nsegs is wrong then the stack is corrupt. */
3241	KASSERT(nsegs == 1, ("Too many segments returned!"));
3242
3243	rx_buffer = &adapter->rx_buffer_area[i];
3244	if (rx_buffer->m_head != NULL)
3245		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3246
3247	map = rx_buffer->map;
3248	rx_buffer->map = adapter->rx_sparemap;
3249	adapter->rx_sparemap = map;
3250	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3251	rx_buffer->m_head = m;
3252
3253	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3254
3255	return (0);
3256}
3257
3258/*********************************************************************
3259 *
3260 *  Allocate memory for rx_buffer structures. Since we use one
3261 *  rx_buffer per received packet, the maximum number of rx_buffer's
3262 *  that we'll need is equal to the number of receive descriptors
3263 *  that we've allocated.
3264 *
3265 **********************************************************************/
3266static int
3267em_allocate_receive_structures(struct adapter *adapter)
3268{
3269	device_t dev = adapter->dev;
3270	struct em_buffer *rx_buffer;
3271	int i, error;
3272
3273	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3274	    M_DEVBUF, M_NOWAIT);
3275	if (adapter->rx_buffer_area == NULL) {
3276		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3277		return (ENOMEM);
3278	}
3279
3280	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3281
3282	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3283				1, 0,			/* alignment, bounds */
3284				BUS_SPACE_MAXADDR,	/* lowaddr */
3285				BUS_SPACE_MAXADDR,	/* highaddr */
3286				NULL, NULL,		/* filter, filterarg */
3287				MCLBYTES,		/* maxsize */
3288				1,			/* nsegments */
3289				MCLBYTES,		/* maxsegsize */
3290				0,			/* flags */
3291				NULL,			/* lockfunc */
3292				NULL,			/* lockarg */
3293				&adapter->rxtag);
3294	if (error) {
3295		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3296		    __func__, error);
3297		goto fail;
3298	}
3299
3300	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3301	    &adapter->rx_sparemap);
3302	if (error) {
3303		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3304		    __func__, error);
3305		goto fail;
3306	}
3307	rx_buffer = adapter->rx_buffer_area;
3308	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3309		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3310		    &rx_buffer->map);
3311		if (error) {
3312			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3313			    __func__, error);
3314			goto fail;
3315		}
3316	}
3317
3318	for (i = 0; i < adapter->num_rx_desc; i++) {
3319		error = em_get_buf(adapter, i);
3320		if (error)
3321			goto fail;
3322	}
3323	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3324	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3325
3326	return (0);
3327
3328fail:
3329	em_free_receive_structures(adapter);
3330	return (error);
3331}
3332
3333/*********************************************************************
3334 *
3335 *  Allocate and initialize receive structures.
3336 *
3337 **********************************************************************/
3338static int
3339em_setup_receive_structures(struct adapter *adapter)
3340{
3341	int error;
3342
3343	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3344
3345	if ((error = em_allocate_receive_structures(adapter)) != 0)
3346		return (error);
3347
3348	/* Setup our descriptor pointers */
3349	adapter->next_rx_desc_to_check = 0;
3350
3351	return (0);
3352}
3353
3354/*********************************************************************
3355 *
3356 *  Enable receive unit.
3357 *
3358 **********************************************************************/
3359static void
3360em_initialize_receive_unit(struct adapter *adapter)
3361{
3362	struct ifnet	*ifp = adapter->ifp;
3363	uint64_t	bus_addr;
3364	uint32_t	reg_rctl;
3365	uint32_t	reg_rxcsum;
3366
3367	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3368
3369	/*
3370	 * Make sure receives are disabled while setting
3371	 * up the descriptor ring
3372	 */
3373	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3374
3375	/* Set the Receive Delay Timer Register */
3376	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3377
3378	if(adapter->hw.mac_type >= em_82540) {
3379		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3380
3381		/*
3382		 * Set the interrupt throttling rate. Value is calculated
3383		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3384		 */
3385#define MAX_INTS_PER_SEC	8000
3386#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3387		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3388	}
3389
3390	/* Setup the Base and Length of the Rx Descriptor Ring */
3391	bus_addr = adapter->rxdma.dma_paddr;
3392	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3393			sizeof(struct em_rx_desc));
3394	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3395	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3396
3397	/* Setup the Receive Control Register */
3398	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3399		   E1000_RCTL_RDMTS_HALF |
3400		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3401
3402	if (adapter->hw.tbi_compatibility_on == TRUE)
3403		reg_rctl |= E1000_RCTL_SBP;
3404
3405
3406	switch (adapter->rx_buffer_len) {
3407	default:
3408	case EM_RXBUFFER_2048:
3409		reg_rctl |= E1000_RCTL_SZ_2048;
3410		break;
3411	case EM_RXBUFFER_4096:
3412		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3413		break;
3414	case EM_RXBUFFER_8192:
3415		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3416		break;
3417	case EM_RXBUFFER_16384:
3418		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3419		break;
3420	}
3421
3422	if (ifp->if_mtu > ETHERMTU)
3423		reg_rctl |= E1000_RCTL_LPE;
3424
3425	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3426	if ((adapter->hw.mac_type >= em_82543) &&
3427	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3428		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3429		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3430		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3431	}
3432
3433	/* Enable Receives */
3434	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3435
3436	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3437	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3438	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3439}
3440
3441/*********************************************************************
3442 *
3443 *  Free receive related data structures.
3444 *
3445 **********************************************************************/
3446static void
3447em_free_receive_structures(struct adapter *adapter)
3448{
3449	struct em_buffer *rx_buffer;
3450	int i;
3451
3452	INIT_DEBUGOUT("free_receive_structures: begin");
3453
3454	if (adapter->rx_sparemap) {
3455		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3456		adapter->rx_sparemap = NULL;
3457	}
3458	if (adapter->rx_buffer_area != NULL) {
3459		rx_buffer = adapter->rx_buffer_area;
3460		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3461			if (rx_buffer->m_head != NULL) {
3462				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3463				    BUS_DMASYNC_POSTREAD);
3464				bus_dmamap_unload(adapter->rxtag,
3465				    rx_buffer->map);
3466				m_freem(rx_buffer->m_head);
3467				rx_buffer->m_head = NULL;
3468			} else if (rx_buffer->map != NULL)
3469				bus_dmamap_unload(adapter->rxtag,
3470				    rx_buffer->map);
3471			if (rx_buffer->map != NULL) {
3472				bus_dmamap_destroy(adapter->rxtag,
3473				    rx_buffer->map);
3474				rx_buffer->map = NULL;
3475			}
3476		}
3477	}
3478	if (adapter->rx_buffer_area != NULL) {
3479		free(adapter->rx_buffer_area, M_DEVBUF);
3480		adapter->rx_buffer_area = NULL;
3481	}
3482	if (adapter->rxtag != NULL) {
3483		bus_dma_tag_destroy(adapter->rxtag);
3484		adapter->rxtag = NULL;
3485	}
3486}
3487
3488/*********************************************************************
3489 *
3490 *  This routine executes in interrupt context. It replenishes
3491 *  the mbufs in the descriptor and sends data which has been
3492 *  dma'ed into host memory to upper layer.
3493 *
3494 *  We loop at most count times if count is > 0, or until done if
3495 *  count < 0.
3496 *
3497 *********************************************************************/
3498static int
3499em_rxeof(struct adapter *adapter, int count)
3500{
3501	struct ifnet	*ifp;
3502	struct mbuf	*mp;
3503	uint8_t		accept_frame = 0;
3504	uint8_t		eop = 0;
3505	uint16_t 	len, desc_len, prev_len_adj;
3506	int		i;
3507
3508	/* Pointer to the receive descriptor being examined. */
3509	struct em_rx_desc   *current_desc;
3510	uint8_t		status;
3511
3512	ifp = adapter->ifp;
3513	i = adapter->next_rx_desc_to_check;
3514	current_desc = &adapter->rx_desc_base[i];
3515	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3516	    BUS_DMASYNC_POSTREAD);
3517
3518	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3519		return (0);
3520
3521	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3522	    (count != 0) &&
3523	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3524		struct mbuf *m = NULL;
3525
3526		mp = adapter->rx_buffer_area[i].m_head;
3527		/*
3528		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3529		 * needs to access the last received byte in the mbuf.
3530		 */
3531		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3532		    BUS_DMASYNC_POSTREAD);
3533
3534		accept_frame = 1;
3535		prev_len_adj = 0;
3536		desc_len = le16toh(current_desc->length);
3537		status = current_desc->status;
3538		if (status & E1000_RXD_STAT_EOP) {
3539			count--;
3540			eop = 1;
3541			if (desc_len < ETHER_CRC_LEN) {
3542				len = 0;
3543				prev_len_adj = ETHER_CRC_LEN - desc_len;
3544			} else
3545				len = desc_len - ETHER_CRC_LEN;
3546		} else {
3547			eop = 0;
3548			len = desc_len;
3549		}
3550
3551		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3552			uint8_t		last_byte;
3553			uint32_t	pkt_len = desc_len;
3554
3555			if (adapter->fmp != NULL)
3556				pkt_len += adapter->fmp->m_pkthdr.len;
3557
3558			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3559			if (TBI_ACCEPT(&adapter->hw, status,
3560			    current_desc->errors, pkt_len, last_byte)) {
3561				em_tbi_adjust_stats(&adapter->hw,
3562				    &adapter->stats, pkt_len,
3563				    adapter->hw.mac_addr);
3564				if (len > 0)
3565					len--;
3566			} else
3567				accept_frame = 0;
3568		}
3569
3570		if (accept_frame) {
3571			if (em_get_buf(adapter, i) != 0) {
3572				ifp->if_iqdrops++;
3573				goto discard;
3574			}
3575
3576			/* Assign correct length to the current fragment */
3577			mp->m_len = len;
3578
3579			if (adapter->fmp == NULL) {
3580				mp->m_pkthdr.len = len;
3581				adapter->fmp = mp; /* Store the first mbuf */
3582				adapter->lmp = mp;
3583			} else {
3584				/* Chain mbuf's together */
3585				mp->m_flags &= ~M_PKTHDR;
3586				/*
3587				 * Adjust length of previous mbuf in chain if
3588				 * we received less than 4 bytes in the last
3589				 * descriptor.
3590				 */
3591				if (prev_len_adj > 0) {
3592					adapter->lmp->m_len -= prev_len_adj;
3593					adapter->fmp->m_pkthdr.len -=
3594					    prev_len_adj;
3595				}
3596				adapter->lmp->m_next = mp;
3597				adapter->lmp = adapter->lmp->m_next;
3598				adapter->fmp->m_pkthdr.len += len;
3599			}
3600
3601			if (eop) {
3602				adapter->fmp->m_pkthdr.rcvif = ifp;
3603				ifp->if_ipackets++;
3604				em_receive_checksum(adapter, current_desc,
3605				    adapter->fmp);
3606#ifndef __NO_STRICT_ALIGNMENT
3607				if (adapter->hw.max_frame_size >
3608				    (MCLBYTES - ETHER_ALIGN) &&
3609				    em_fixup_rx(adapter) != 0)
3610					goto skip;
3611#endif
3612				if (status & E1000_RXD_STAT_VP) {
3613					adapter->fmp->m_pkthdr.ether_vtag =
3614					    (le16toh(current_desc->special) &
3615					    E1000_RXD_SPC_VLAN_MASK);
3616					adapter->fmp->m_flags |= M_VLANTAG;
3617				}
3618#ifndef __NO_STRICT_ALIGNMENT
3619skip:
3620#endif
3621				m = adapter->fmp;
3622				adapter->fmp = NULL;
3623				adapter->lmp = NULL;
3624			}
3625		} else {
3626			ifp->if_ierrors++;
3627discard:
3628			/* Reuse loaded DMA map and just update mbuf chain */
3629			mp = adapter->rx_buffer_area[i].m_head;
3630			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3631			mp->m_data = mp->m_ext.ext_buf;
3632			mp->m_next = NULL;
3633			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3634				m_adj(mp, ETHER_ALIGN);
3635			if (adapter->fmp != NULL) {
3636				m_freem(adapter->fmp);
3637				adapter->fmp = NULL;
3638				adapter->lmp = NULL;
3639			}
3640			m = NULL;
3641		}
3642
3643		/* Zero out the receive descriptors status. */
3644		current_desc->status = 0;
3645		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3646		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3647
3648		/* Advance our pointers to the next descriptor. */
3649		if (++i == adapter->num_rx_desc)
3650			i = 0;
3651		if (m != NULL) {
3652			adapter->next_rx_desc_to_check = i;
3653#ifdef DEVICE_POLLING
3654			EM_UNLOCK(adapter);
3655			(*ifp->if_input)(ifp, m);
3656			EM_LOCK(adapter);
3657#else
3658			(*ifp->if_input)(ifp, m);
3659#endif
3660			i = adapter->next_rx_desc_to_check;
3661		}
3662		current_desc = &adapter->rx_desc_base[i];
3663	}
3664	adapter->next_rx_desc_to_check = i;
3665
3666	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3667	if (--i < 0)
3668		i = adapter->num_rx_desc - 1;
3669	E1000_WRITE_REG(&adapter->hw, RDT, i);
3670	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3671		return (0);
3672
3673	return (1);
3674}
3675
3676#ifndef __NO_STRICT_ALIGNMENT
3677/*
3678 * When jumbo frames are enabled we should realign entire payload on
3679 * architecures with strict alignment. This is serious design mistake of 8254x
3680 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3681 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3682 * payload. On architecures without strict alignment restrictions 8254x still
3683 * performs unaligned memory access which would reduce the performance too.
3684 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3685 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3686 * existing mbuf chain.
3687 *
3688 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3689 * not used at all on architectures with strict alignment.
3690 */
3691static int
3692em_fixup_rx(struct adapter *adapter)
3693{
3694	struct mbuf *m, *n;
3695	int error;
3696
3697	error = 0;
3698	m = adapter->fmp;
3699	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3700		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3701		m->m_data += ETHER_HDR_LEN;
3702	} else {
3703		MGETHDR(n, M_DONTWAIT, MT_DATA);
3704		if (n != NULL) {
3705			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3706			m->m_data += ETHER_HDR_LEN;
3707			m->m_len -= ETHER_HDR_LEN;
3708			n->m_len = ETHER_HDR_LEN;
3709			M_MOVE_PKTHDR(n, m);
3710			n->m_next = m;
3711			adapter->fmp = n;
3712		} else {
3713			adapter->ifp->if_iqdrops++;
3714			adapter->mbuf_alloc_failed++;
3715			m_freem(adapter->fmp);
3716			adapter->fmp = NULL;
3717			adapter->lmp = NULL;
3718			error = ENOBUFS;
3719		}
3720	}
3721
3722	return (error);
3723}
3724#endif
3725
3726/*********************************************************************
3727 *
3728 *  Verify that the hardware indicated that the checksum is valid.
3729 *  Inform the stack about the status of checksum so that stack
3730 *  doesn't spend time verifying the checksum.
3731 *
3732 *********************************************************************/
3733static void
3734em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3735		    struct mbuf *mp)
3736{
3737	/* 82543 or newer only */
3738	if ((adapter->hw.mac_type < em_82543) ||
3739	    /* Ignore Checksum bit is set */
3740	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3741		mp->m_pkthdr.csum_flags = 0;
3742		return;
3743	}
3744
3745	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3746		/* Did it pass? */
3747		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3748			/* IP Checksum Good */
3749			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3750			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3751
3752		} else {
3753			mp->m_pkthdr.csum_flags = 0;
3754		}
3755	}
3756
3757	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3758		/* Did it pass? */
3759		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3760			mp->m_pkthdr.csum_flags |=
3761			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3762			mp->m_pkthdr.csum_data = htons(0xffff);
3763		}
3764	}
3765}
3766
3767
3768static void
3769em_enable_vlans(struct adapter *adapter)
3770{
3771	uint32_t ctrl;
3772
3773	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3774
3775	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3776	ctrl |= E1000_CTRL_VME;
3777	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3778}
3779
3780static void
3781em_disable_vlans(struct adapter *adapter)
3782{
3783	uint32_t ctrl;
3784
3785	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3786	ctrl &= ~E1000_CTRL_VME;
3787	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3788}
3789
3790static void
3791em_enable_intr(struct adapter *adapter)
3792{
3793	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3794}
3795
3796static void
3797em_disable_intr(struct adapter *adapter)
3798{
3799	/*
3800	 * The first version of 82542 had an errata where when link was forced
3801	 * it would stay up even up even if the cable was disconnected.
3802	 * Sequence errors were used to detect the disconnect and then the
3803	 * driver would unforce the link. This code in the in the ISR. For this
3804	 * to work correctly the Sequence error interrupt had to be enabled
3805	 * all the time.
3806	 */
3807
3808	if (adapter->hw.mac_type == em_82542_rev2_0)
3809	    E1000_WRITE_REG(&adapter->hw, IMC,
3810		(0xffffffff & ~E1000_IMC_RXSEQ));
3811	else
3812	    E1000_WRITE_REG(&adapter->hw, IMC,
3813		0xffffffff);
3814}
3815
3816static int
3817em_is_valid_ether_addr(uint8_t *addr)
3818{
3819	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3820
3821	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3822		return (FALSE);
3823	}
3824
3825	return (TRUE);
3826}
3827
3828void
3829em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3830{
3831	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3832}
3833
3834void
3835em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3836{
3837	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3838}
3839
3840void
3841em_pci_set_mwi(struct em_hw *hw)
3842{
3843	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3844	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3845}
3846
3847void
3848em_pci_clear_mwi(struct em_hw *hw)
3849{
3850	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3851	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3852}
3853
3854/*
3855 * We may eventually really do this, but its unnecessary
3856 * for now so we just return unsupported.
3857 */
3858int32_t
3859em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3860{
3861	return (0);
3862}
3863
3864/*********************************************************************
3865* 82544 Coexistence issue workaround.
3866*    There are 2 issues.
3867*       1. Transmit Hang issue.
3868*    To detect this issue, following equation can be used...
3869*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3870*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3871*
3872*       2. DAC issue.
3873*    To detect this issue, following equation can be used...
3874*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3875*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3876*
3877*
3878*    WORKAROUND:
3879*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3880*
3881*** *********************************************************************/
3882static uint32_t
3883em_fill_descriptors (bus_addr_t address, uint32_t length,
3884		PDESC_ARRAY desc_array)
3885{
3886	/* Since issue is sensitive to length and address.*/
3887	/* Let us first check the address...*/
3888	uint32_t safe_terminator;
3889	if (length <= 4) {
3890		desc_array->descriptor[0].address = address;
3891		desc_array->descriptor[0].length = length;
3892		desc_array->elements = 1;
3893		return (desc_array->elements);
3894	}
3895	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3896	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3897	if (safe_terminator == 0   ||
3898	(safe_terminator > 4   &&
3899	safe_terminator < 9)   ||
3900	(safe_terminator > 0xC &&
3901	safe_terminator <= 0xF)) {
3902		desc_array->descriptor[0].address = address;
3903		desc_array->descriptor[0].length = length;
3904		desc_array->elements = 1;
3905		return (desc_array->elements);
3906	}
3907
3908	desc_array->descriptor[0].address = address;
3909	desc_array->descriptor[0].length = length - 4;
3910	desc_array->descriptor[1].address = address + (length - 4);
3911	desc_array->descriptor[1].length = 4;
3912	desc_array->elements = 2;
3913	return (desc_array->elements);
3914}
3915
3916/**********************************************************************
3917 *
3918 *  Update the board statistics counters.
3919 *
3920 **********************************************************************/
3921static void
3922em_update_stats_counters(struct adapter *adapter)
3923{
3924	struct ifnet   *ifp;
3925
3926	if(adapter->hw.media_type == em_media_type_copper ||
3927	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3928		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3929		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3930	}
3931	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3932	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3933	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3934	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3935
3936	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3937	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3938	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3939	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3940	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3941	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3942	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3943	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3944	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3945	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3946	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3947	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3948	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3949	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3950	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3951	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3952	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3953	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3954	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3955	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3956
3957	/* For the 64-bit byte counters the low dword must be read first. */
3958	/* Both registers clear on the read of the high dword */
3959
3960	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3961	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3962	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3963	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3964
3965	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3966	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3967	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3968	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3969	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3970
3971	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3972	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3973	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3974	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3975
3976	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3977	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3978	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3979	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3980	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3981	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3982	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3983	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3984	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3985	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3986
3987	if (adapter->hw.mac_type >= em_82543) {
3988		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3989		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3990		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3991		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3992		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3993		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3994	}
3995	ifp = adapter->ifp;
3996
3997	ifp->if_collisions = adapter->stats.colc;
3998
3999	/* Rx Errors */
4000	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
4001	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
4002	    adapter->stats.mpc + adapter->stats.cexterr;
4003
4004	/* Tx Errors */
4005	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
4006	    adapter->watchdog_events;
4007}
4008
4009
4010/**********************************************************************
4011 *
4012 *  This routine is called only when em_display_debug_stats is enabled.
4013 *  This routine provides a way to take a look at important statistics
4014 *  maintained by the driver and hardware.
4015 *
4016 **********************************************************************/
4017static void
4018em_print_debug_info(struct adapter *adapter)
4019{
4020	device_t dev = adapter->dev;
4021	uint8_t *hw_addr = adapter->hw.hw_addr;
4022
4023	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4024	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4025	    E1000_READ_REG(&adapter->hw, CTRL),
4026	    E1000_READ_REG(&adapter->hw, RCTL));
4027	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4028	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
4029	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
4030	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4031	    adapter->hw.fc_high_water,
4032	    adapter->hw.fc_low_water);
4033	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4034	    E1000_READ_REG(&adapter->hw, TIDV),
4035	    E1000_READ_REG(&adapter->hw, TADV));
4036	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4037	    E1000_READ_REG(&adapter->hw, RDTR),
4038	    E1000_READ_REG(&adapter->hw, RADV));
4039	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4040	    (long long)adapter->tx_fifo_wrk_cnt,
4041	    (long long)adapter->tx_fifo_reset_cnt);
4042	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4043	    E1000_READ_REG(&adapter->hw, TDH),
4044	    E1000_READ_REG(&adapter->hw, TDT));
4045	device_printf(dev, "Num Tx descriptors avail = %d\n",
4046	    adapter->num_tx_desc_avail);
4047	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4048	    adapter->no_tx_desc_avail1);
4049	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4050	    adapter->no_tx_desc_avail2);
4051	device_printf(dev, "Std mbuf failed = %ld\n",
4052	    adapter->mbuf_alloc_failed);
4053	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4054	    adapter->mbuf_cluster_failed);
4055}
4056
4057static void
4058em_print_hw_stats(struct adapter *adapter)
4059{
4060	device_t dev = adapter->dev;
4061
4062	device_printf(dev, "Excessive collisions = %lld\n",
4063	    (long long)adapter->stats.ecol);
4064	device_printf(dev, "Symbol errors = %lld\n",
4065	    (long long)adapter->stats.symerrs);
4066	device_printf(dev, "Sequence errors = %lld\n",
4067	    (long long)adapter->stats.sec);
4068	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4069
4070	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4071	device_printf(dev, "Receive No Buffers = %lld\n",
4072	    (long long)adapter->stats.rnbc);
4073	/* RLEC is inaccurate on some hardware, calculate our own. */
4074	device_printf(dev, "Receive Length Errors = %lld\n",
4075	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4076	device_printf(dev, "Receive errors = %lld\n",
4077	    (long long)adapter->stats.rxerrc);
4078	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4079	device_printf(dev, "Alignment errors = %lld\n",
4080	    (long long)adapter->stats.algnerrc);
4081	device_printf(dev, "Carrier extension errors = %lld\n",
4082	    (long long)adapter->stats.cexterr);
4083	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4084	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4085
4086	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4087	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4088	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4089	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4090
4091	device_printf(dev, "Good Packets Rcvd = %lld\n",
4092	    (long long)adapter->stats.gprc);
4093	device_printf(dev, "Good Packets Xmtd = %lld\n",
4094	    (long long)adapter->stats.gptc);
4095	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4096	    (long long)adapter->stats.tsctc);
4097	device_printf(dev, "TSO Contexts Failed = %lld\n",
4098	    (long long)adapter->stats.tsctfc);
4099}
4100
4101static int
4102em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4103{
4104	struct adapter *adapter;
4105	int error;
4106	int result;
4107
4108	result = -1;
4109	error = sysctl_handle_int(oidp, &result, 0, req);
4110
4111	if (error || !req->newptr)
4112		return (error);
4113
4114	if (result == 1) {
4115		adapter = (struct adapter *)arg1;
4116		em_print_debug_info(adapter);
4117	}
4118
4119	return (error);
4120}
4121
4122
4123static int
4124em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4125{
4126	struct adapter *adapter;
4127	int error;
4128	int result;
4129
4130	result = -1;
4131	error = sysctl_handle_int(oidp, &result, 0, req);
4132
4133	if (error || !req->newptr)
4134		return (error);
4135
4136	if (result == 1) {
4137		adapter = (struct adapter *)arg1;
4138		em_print_hw_stats(adapter);
4139	}
4140
4141	return (error);
4142}
4143
4144static int
4145em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4146{
4147	struct em_int_delay_info *info;
4148	struct adapter *adapter;
4149	uint32_t regval;
4150	int error;
4151	int usecs;
4152	int ticks;
4153
4154	info = (struct em_int_delay_info *)arg1;
4155	usecs = info->value;
4156	error = sysctl_handle_int(oidp, &usecs, 0, req);
4157	if (error != 0 || req->newptr == NULL)
4158		return (error);
4159	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4160		return (EINVAL);
4161	info->value = usecs;
4162	ticks = E1000_USECS_TO_TICKS(usecs);
4163
4164	adapter = info->adapter;
4165
4166	EM_LOCK(adapter);
4167	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4168	regval = (regval & ~0xffff) | (ticks & 0xffff);
4169	/* Handle a few special cases. */
4170	switch (info->offset) {
4171	case E1000_RDTR:
4172	case E1000_82542_RDTR:
4173		regval |= E1000_RDT_FPDB;
4174		break;
4175	case E1000_TIDV:
4176	case E1000_82542_TIDV:
4177		if (ticks == 0) {
4178			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4179			/* Don't write 0 into the TIDV register. */
4180			regval++;
4181		} else
4182			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4183		break;
4184	}
4185	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4186	EM_UNLOCK(adapter);
4187	return (0);
4188}
4189
4190static void
4191em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4192	const char *description, struct em_int_delay_info *info,
4193	int offset, int value)
4194{
4195	info->adapter = adapter;
4196	info->offset = offset;
4197	info->value = value;
4198	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4199	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4200	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4201	    info, 0, em_sysctl_int_delay, "I", description);
4202}
4203
4204#ifndef DEVICE_POLLING
4205static void
4206em_add_int_process_limit(struct adapter *adapter, const char *name,
4207	const char *description, int *limit, int value)
4208{
4209	*limit = value;
4210	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4211	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4212	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4213}
4214#endif
4215