if_em.c revision 163824
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 163824 2006-10-31 15:00:14Z glebius $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.2.9";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159
160	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
164
165	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
183
184	/* required last entry */
185	{ 0, 0, 0, 0, 0}
186};
187
188/*********************************************************************
189 *  Table of branding strings for all supported NICs.
190 *********************************************************************/
191
192static char *em_strings[] = {
193	"Intel(R) PRO/1000 Network Connection"
194};
195
196/*********************************************************************
197 *  Function prototypes
198 *********************************************************************/
199static int	em_probe(device_t);
200static int	em_attach(device_t);
201static int	em_detach(device_t);
202static int	em_shutdown(device_t);
203static int	em_suspend(device_t);
204static int	em_resume(device_t);
205static void	em_start(struct ifnet *);
206static void	em_start_locked(struct ifnet *ifp);
207static int	em_ioctl(struct ifnet *, u_long, caddr_t);
208static void	em_watchdog(struct ifnet *);
209static void	em_init(void *);
210static void	em_init_locked(struct adapter *);
211static void	em_stop(void *);
212static void	em_media_status(struct ifnet *, struct ifmediareq *);
213static int	em_media_change(struct ifnet *);
214static void	em_identify_hardware(struct adapter *);
215static int	em_allocate_pci_resources(struct adapter *);
216static int	em_allocate_intr(struct adapter *);
217static void	em_free_intr(struct adapter *);
218static void	em_free_pci_resources(struct adapter *);
219static void	em_local_timer(void *);
220static int	em_hardware_init(struct adapter *);
221static void	em_setup_interface(device_t, struct adapter *);
222static int	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_setup_receive_structures(struct adapter *);
225static void	em_initialize_receive_unit(struct adapter *);
226static void	em_enable_intr(struct adapter *);
227static void	em_disable_intr(struct adapter *);
228static void	em_free_transmit_structures(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_update_stats_counters(struct adapter *);
231static void	em_txeof(struct adapter *);
232static int	em_allocate_receive_structures(struct adapter *);
233static int	em_allocate_transmit_structures(struct adapter *);
234static int	em_rxeof(struct adapter *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct adapter *);
237#endif
238static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
239		    struct mbuf *);
240static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
241		    uint32_t *, uint32_t *);
242static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
243		    uint32_t *, uint32_t *);
244static void	em_set_promisc(struct adapter *);
245static void	em_disable_promisc(struct adapter *);
246static void	em_set_multi(struct adapter *);
247static void	em_print_hw_stats(struct adapter *);
248static void	em_update_link_status(struct adapter *);
249static int	em_get_buf(struct adapter *, int);
250static void	em_enable_vlans(struct adapter *);
251static void	em_disable_vlans(struct adapter *);
252static int	em_encap(struct adapter *, struct mbuf **);
253static void	em_smartspeed(struct adapter *);
254static int	em_82547_fifo_workaround(struct adapter *, int);
255static void	em_82547_update_fifo_head(struct adapter *, int);
256static int	em_82547_tx_fifo_reset(struct adapter *);
257static void	em_82547_move_tail(void *arg);
258static void	em_82547_move_tail_locked(struct adapter *);
259static int	em_dma_malloc(struct adapter *, bus_size_t,
260		struct em_dma_alloc *, int);
261static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
262static void	em_print_debug_info(struct adapter *);
263static int 	em_is_valid_ether_addr(uint8_t *);
264static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
265static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
266static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
267		    PDESC_ARRAY desc_array);
268static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
269static void	em_add_int_delay_sysctl(struct adapter *, const char *,
270		const char *, struct em_int_delay_info *, int, int);
271
272/*
273 * Fast interrupt handler and legacy ithread/polling modes are
274 * mutually exclusive.
275 */
276#ifdef DEVICE_POLLING
277static poll_handler_t em_poll;
278static void	em_intr(void *);
279#else
280static void	em_intr_fast(void *);
281static void	em_add_int_process_limit(struct adapter *, const char *,
282		const char *, int *, int);
283static void	em_handle_rxtx(void *context, int pending);
284static void	em_handle_link(void *context, int pending);
285#endif
286
287/*********************************************************************
288 *  FreeBSD Device Interface Entry Points
289 *********************************************************************/
290
291static device_method_t em_methods[] = {
292	/* Device interface */
293	DEVMETHOD(device_probe, em_probe),
294	DEVMETHOD(device_attach, em_attach),
295	DEVMETHOD(device_detach, em_detach),
296	DEVMETHOD(device_shutdown, em_shutdown),
297	DEVMETHOD(device_suspend, em_suspend),
298	DEVMETHOD(device_resume, em_resume),
299	{0, 0}
300};
301
302static driver_t em_driver = {
303	"em", em_methods, sizeof(struct adapter),
304};
305
306static devclass_t em_devclass;
307DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
308MODULE_DEPEND(em, pci, 1, 1, 1);
309MODULE_DEPEND(em, ether, 1, 1, 1);
310
311/*********************************************************************
312 *  Tunable default values.
313 *********************************************************************/
314
315#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
316#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
317#define M_TSO_LEN			66
318
319static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
320static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
321static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
322static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
323static int em_rxd = EM_DEFAULT_RXD;
324static int em_txd = EM_DEFAULT_TXD;
325static int em_smart_pwr_down = FALSE;
326
327TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
331TUNABLE_INT("hw.em.rxd", &em_rxd);
332TUNABLE_INT("hw.em.txd", &em_txd);
333TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
334#ifndef DEVICE_POLLING
335static int em_rx_process_limit = 100;
336TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
337#endif
338
339/*********************************************************************
340 *  Device identification routine
341 *
342 *  em_probe determines if the driver should be loaded on
343 *  adapter based on PCI vendor/device id of the adapter.
344 *
345 *  return BUS_PROBE_DEFAULT on success, positive on failure
346 *********************************************************************/
347
348static int
349em_probe(device_t dev)
350{
351	char		adapter_name[60];
352	uint16_t	pci_vendor_id = 0;
353	uint16_t	pci_device_id = 0;
354	uint16_t	pci_subvendor_id = 0;
355	uint16_t	pci_subdevice_id = 0;
356	em_vendor_info_t *ent;
357
358	INIT_DEBUGOUT("em_probe: begin");
359
360	pci_vendor_id = pci_get_vendor(dev);
361	if (pci_vendor_id != EM_VENDOR_ID)
362		return (ENXIO);
363
364	pci_device_id = pci_get_device(dev);
365	pci_subvendor_id = pci_get_subvendor(dev);
366	pci_subdevice_id = pci_get_subdevice(dev);
367
368	ent = em_vendor_info_array;
369	while (ent->vendor_id != 0) {
370		if ((pci_vendor_id == ent->vendor_id) &&
371		    (pci_device_id == ent->device_id) &&
372
373		    ((pci_subvendor_id == ent->subvendor_id) ||
374		    (ent->subvendor_id == PCI_ANY_ID)) &&
375
376		    ((pci_subdevice_id == ent->subdevice_id) ||
377		    (ent->subdevice_id == PCI_ANY_ID))) {
378			sprintf(adapter_name, "%s %s",
379				em_strings[ent->index],
380				em_driver_version);
381			device_set_desc_copy(dev, adapter_name);
382			return (BUS_PROBE_DEFAULT);
383		}
384		ent++;
385	}
386
387	return (ENXIO);
388}
389
390/*********************************************************************
391 *  Device initialization routine
392 *
393 *  The attach entry point is called when the driver is being loaded.
394 *  This routine identifies the type of hardware, allocates all resources
395 *  and initializes the hardware.
396 *
397 *  return 0 on success, positive on failure
398 *********************************************************************/
399
400static int
401em_attach(device_t dev)
402{
403	struct adapter	*adapter;
404	int		tsize, rsize;
405	int		error = 0;
406
407	INIT_DEBUGOUT("em_attach: begin");
408
409	adapter = device_get_softc(dev);
410	adapter->dev = adapter->osdep.dev = dev;
411	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
412
413	/* SYSCTL stuff */
414	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
415	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
416	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
417	    em_sysctl_debug_info, "I", "Debug Information");
418
419	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
420	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
421	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
422	    em_sysctl_stats, "I", "Statistics");
423
424	callout_init(&adapter->timer, CALLOUT_MPSAFE);
425	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
426
427	/* Determine hardware revision */
428	em_identify_hardware(adapter);
429
430	/* Set up some sysctls for the tunable interrupt delays */
431	em_add_int_delay_sysctl(adapter, "rx_int_delay",
432	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
433	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
434	em_add_int_delay_sysctl(adapter, "tx_int_delay",
435	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
436	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
437	if (adapter->hw.mac_type >= em_82540) {
438		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
439		    "receive interrupt delay limit in usecs",
440		    &adapter->rx_abs_int_delay,
441		    E1000_REG_OFFSET(&adapter->hw, RADV),
442		    em_rx_abs_int_delay_dflt);
443		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
444		    "transmit interrupt delay limit in usecs",
445		    &adapter->tx_abs_int_delay,
446		    E1000_REG_OFFSET(&adapter->hw, TADV),
447		    em_tx_abs_int_delay_dflt);
448	}
449
450#ifndef DEVICE_POLLING
451	/* Sysctls for limiting the amount of work done in the taskqueue */
452	em_add_int_process_limit(adapter, "rx_processing_limit",
453	    "max number of rx packets to process", &adapter->rx_process_limit,
454	    em_rx_process_limit);
455#endif
456
457	/*
458	 * Validate number of transmit and receive descriptors. It
459	 * must not exceed hardware maximum, and must be multiple
460	 * of EM_DBA_ALIGN.
461	 */
462	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
463	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
464	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
465	    (em_txd < EM_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    EM_DEFAULT_TXD, em_txd);
468		adapter->num_tx_desc = EM_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = em_txd;
471	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
472	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
473	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
474	    (em_rxd < EM_MIN_RXD)) {
475		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
476		    EM_DEFAULT_RXD, em_rxd);
477		adapter->num_rx_desc = EM_DEFAULT_RXD;
478	} else
479		adapter->num_rx_desc = em_rxd;
480
481	adapter->hw.autoneg = DO_AUTO_NEG;
482	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
483	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484	adapter->hw.tbi_compatibility_en = TRUE;
485	adapter->rx_buffer_len = EM_RXBUFFER_2048;
486
487	adapter->hw.phy_init_script = 1;
488	adapter->hw.phy_reset_disable = FALSE;
489
490#ifndef EM_MASTER_SLAVE
491	adapter->hw.master_slave = em_ms_hw_default;
492#else
493	adapter->hw.master_slave = EM_MASTER_SLAVE;
494#endif
495	/*
496	 * Set the max frame size assuming standard ethernet
497	 * sized frames.
498	 */
499	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
500
501	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
502
503	/*
504	 * This controls when hardware reports transmit completion
505	 * status.
506	 */
507	adapter->hw.report_tx_early = 1;
508	if (em_allocate_pci_resources(adapter)) {
509		device_printf(dev, "Allocation of PCI resources failed\n");
510		error = ENXIO;
511		goto err_pci;
512	}
513
514	/* Initialize eeprom parameters */
515	em_init_eeprom_params(&adapter->hw);
516
517	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
518	    EM_DBA_ALIGN);
519
520	/* Allocate Transmit Descriptor ring */
521	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
522		device_printf(dev, "Unable to allocate tx_desc memory\n");
523		error = ENOMEM;
524		goto err_tx_desc;
525	}
526	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
527
528	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
529	    EM_DBA_ALIGN);
530
531	/* Allocate Receive Descriptor ring */
532	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
533		device_printf(dev, "Unable to allocate rx_desc memory\n");
534		error = ENOMEM;
535		goto err_rx_desc;
536	}
537	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
538
539	/* Initialize the hardware */
540	if (em_hardware_init(adapter)) {
541		device_printf(dev, "Unable to initialize the hardware\n");
542		error = EIO;
543		goto err_hw_init;
544	}
545
546	/* Copy the permanent MAC address out of the EEPROM */
547	if (em_read_mac_addr(&adapter->hw) < 0) {
548		device_printf(dev, "EEPROM read error while reading MAC"
549		    " address\n");
550		error = EIO;
551		goto err_hw_init;
552	}
553
554	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
555		device_printf(dev, "Invalid MAC address\n");
556		error = EIO;
557		goto err_hw_init;
558	}
559
560	/* Setup OS specific network interface */
561	em_setup_interface(dev, adapter);
562
563	em_allocate_intr(adapter);
564
565	/* Initialize statistics */
566	em_clear_hw_cntrs(&adapter->hw);
567	em_update_stats_counters(adapter);
568	adapter->hw.get_link_status = 1;
569	em_update_link_status(adapter);
570
571	/* Indicate SOL/IDER usage */
572	if (em_check_phy_reset_block(&adapter->hw))
573		device_printf(dev,
574		    "PHY reset is blocked due to SOL/IDER session.\n");
575
576	/* Identify 82544 on PCIX */
577	em_get_bus_info(&adapter->hw);
578	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
579		adapter->pcix_82544 = TRUE;
580	else
581		adapter->pcix_82544 = FALSE;
582
583	INIT_DEBUGOUT("em_attach: end");
584
585	return (0);
586
587err_hw_init:
588	em_dma_free(adapter, &adapter->rxdma);
589err_rx_desc:
590	em_dma_free(adapter, &adapter->txdma);
591err_tx_desc:
592err_pci:
593	em_free_intr(adapter);
594	em_free_pci_resources(adapter);
595	EM_LOCK_DESTROY(adapter);
596
597	return (error);
598}
599
600/*********************************************************************
601 *  Device removal routine
602 *
603 *  The detach entry point is called when the driver is being removed.
604 *  This routine stops the adapter and deallocates all the resources
605 *  that were allocated for driver operation.
606 *
607 *  return 0 on success, positive on failure
608 *********************************************************************/
609
610static int
611em_detach(device_t dev)
612{
613	struct adapter	*adapter = device_get_softc(dev);
614	struct ifnet	*ifp = adapter->ifp;
615
616	INIT_DEBUGOUT("em_detach: begin");
617
618#ifdef DEVICE_POLLING
619	if (ifp->if_capenable & IFCAP_POLLING)
620		ether_poll_deregister(ifp);
621#endif
622
623	em_free_intr(adapter);
624	EM_LOCK(adapter);
625	adapter->in_detach = 1;
626	em_stop(adapter);
627	em_phy_hw_reset(&adapter->hw);
628	EM_UNLOCK(adapter);
629	ether_ifdetach(adapter->ifp);
630
631	em_free_pci_resources(adapter);
632	bus_generic_detach(dev);
633	if_free(ifp);
634
635	/* Free Transmit Descriptor ring */
636	if (adapter->tx_desc_base) {
637		em_dma_free(adapter, &adapter->txdma);
638		adapter->tx_desc_base = NULL;
639	}
640
641	/* Free Receive Descriptor ring */
642	if (adapter->rx_desc_base) {
643		em_dma_free(adapter, &adapter->rxdma);
644		adapter->rx_desc_base = NULL;
645	}
646
647	EM_LOCK_DESTROY(adapter);
648
649	return (0);
650}
651
652/*********************************************************************
653 *
654 *  Shutdown entry point
655 *
656 **********************************************************************/
657
658static int
659em_shutdown(device_t dev)
660{
661	struct adapter *adapter = device_get_softc(dev);
662	EM_LOCK(adapter);
663	em_stop(adapter);
664	EM_UNLOCK(adapter);
665	return (0);
666}
667
668/*
669 * Suspend/resume device methods.
670 */
671static int
672em_suspend(device_t dev)
673{
674	struct adapter *adapter = device_get_softc(dev);
675
676	EM_LOCK(adapter);
677	em_stop(adapter);
678	EM_UNLOCK(adapter);
679
680	return bus_generic_suspend(dev);
681}
682
683static int
684em_resume(device_t dev)
685{
686	struct adapter *adapter = device_get_softc(dev);
687	struct ifnet *ifp = adapter->ifp;
688
689	EM_LOCK(adapter);
690	em_init_locked(adapter);
691	if ((ifp->if_flags & IFF_UP) &&
692	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
693		em_start_locked(ifp);
694	EM_UNLOCK(adapter);
695
696	return bus_generic_resume(dev);
697}
698
699
700/*********************************************************************
701 *  Transmit entry point
702 *
703 *  em_start is called by the stack to initiate a transmit.
704 *  The driver will remain in this routine as long as there are
705 *  packets to transmit and transmit resources are available.
706 *  In case resources are not available stack is notified and
707 *  the packet is requeued.
708 **********************************************************************/
709
710static void
711em_start_locked(struct ifnet *ifp)
712{
713	struct adapter	*adapter = ifp->if_softc;
714	struct mbuf	*m_head;
715
716	EM_LOCK_ASSERT(adapter);
717
718	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
719	    IFF_DRV_RUNNING)
720		return;
721	if (!adapter->link_active)
722		return;
723
724	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
725
726		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
727		if (m_head == NULL)
728			break;
729		/*
730		 * em_encap() can modify our pointer, and or make it NULL on
731		 * failure.  In that event, we can't requeue.
732		 */
733		if (em_encap(adapter, &m_head)) {
734			if (m_head == NULL)
735				break;
736			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
737			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
738			break;
739		}
740
741		/* Send a copy of the frame to the BPF listener */
742		BPF_MTAP(ifp, m_head);
743
744		/* Set timeout in case hardware has problems transmitting. */
745		ifp->if_timer = EM_TX_TIMEOUT;
746	}
747}
748
749static void
750em_start(struct ifnet *ifp)
751{
752	struct adapter *adapter = ifp->if_softc;
753
754	EM_LOCK(adapter);
755	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
756		em_start_locked(ifp);
757	EM_UNLOCK(adapter);
758}
759
760/*********************************************************************
761 *  Ioctl entry point
762 *
763 *  em_ioctl is called when the user wants to configure the
764 *  interface.
765 *
766 *  return 0 on success, positive on failure
767 **********************************************************************/
768
769static int
770em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
771{
772	struct adapter	*adapter = ifp->if_softc;
773	struct ifreq *ifr = (struct ifreq *)data;
774	struct ifaddr *ifa = (struct ifaddr *)data;
775	int error = 0;
776
777	if (adapter->in_detach)
778		return (error);
779
780	switch (command) {
781	case SIOCSIFADDR:
782	case SIOCGIFADDR:
783		if (ifa->ifa_addr->sa_family == AF_INET) {
784			/*
785			 * XXX
786			 * Since resetting hardware takes a very long time
787			 * and results in link renegotiation we only
788			 * initialize the hardware only when it is absolutely
789			 * required.
790			 */
791			ifp->if_flags |= IFF_UP;
792			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
793				EM_LOCK(adapter);
794				em_init_locked(adapter);
795				EM_UNLOCK(adapter);
796			}
797			arp_ifinit(ifp, ifa);
798		} else
799			error = ether_ioctl(ifp, command, data);
800		break;
801	case SIOCSIFMTU:
802	    {
803		int max_frame_size;
804		uint16_t eeprom_data = 0;
805
806		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
807
808		EM_LOCK(adapter);
809		switch (adapter->hw.mac_type) {
810		case em_82573:
811			/*
812			 * 82573 only supports jumbo frames
813			 * if ASPM is disabled.
814			 */
815			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
816			    &eeprom_data);
817			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
818				max_frame_size = ETHER_MAX_LEN;
819				break;
820			}
821			/* Allow Jumbo frames - fall thru */
822		case em_82571:
823		case em_82572:
824		case em_80003es2lan:	/* Limit Jumbo Frame size */
825			max_frame_size = 9234;
826			break;
827		case em_ich8lan:
828			/* ICH8 does not support jumbo frames */
829			max_frame_size = ETHER_MAX_LEN;
830			break;
831		default:
832			max_frame_size = MAX_JUMBO_FRAME_SIZE;
833		}
834		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
835		    ETHER_CRC_LEN) {
836			EM_UNLOCK(adapter);
837			error = EINVAL;
838			break;
839		}
840
841		ifp->if_mtu = ifr->ifr_mtu;
842		adapter->hw.max_frame_size =
843		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
844		em_init_locked(adapter);
845		EM_UNLOCK(adapter);
846		break;
847	    }
848	case SIOCSIFFLAGS:
849		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
850		EM_LOCK(adapter);
851		if (ifp->if_flags & IFF_UP) {
852			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
853				if ((ifp->if_flags ^ adapter->if_flags) &
854				    IFF_PROMISC) {
855					em_disable_promisc(adapter);
856					em_set_promisc(adapter);
857				}
858			} else
859				em_init_locked(adapter);
860		} else {
861			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
862				em_stop(adapter);
863			}
864		}
865		adapter->if_flags = ifp->if_flags;
866		EM_UNLOCK(adapter);
867		break;
868	case SIOCADDMULTI:
869	case SIOCDELMULTI:
870		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
871		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
872			EM_LOCK(adapter);
873			em_disable_intr(adapter);
874			em_set_multi(adapter);
875			if (adapter->hw.mac_type == em_82542_rev2_0) {
876				em_initialize_receive_unit(adapter);
877			}
878#ifdef DEVICE_POLLING
879			if (!(ifp->if_capenable & IFCAP_POLLING))
880#endif
881				em_enable_intr(adapter);
882			EM_UNLOCK(adapter);
883		}
884		break;
885	case SIOCSIFMEDIA:
886		/* Check SOL/IDER usage */
887		EM_LOCK(adapter);
888		if (em_check_phy_reset_block(&adapter->hw)) {
889			EM_UNLOCK(adapter);
890			device_printf(adapter->dev, "Media change is"
891			    "blocked due to SOL/IDER session.\n");
892			break;
893		}
894		EM_UNLOCK(adapter);
895	case SIOCGIFMEDIA:
896		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
897		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
898		break;
899	case SIOCSIFCAP:
900	    {
901		int mask, reinit;
902
903		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
904		reinit = 0;
905		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
906#ifdef DEVICE_POLLING
907		if (mask & IFCAP_POLLING) {
908			if (ifr->ifr_reqcap & IFCAP_POLLING) {
909				error = ether_poll_register(em_poll, ifp);
910				if (error)
911					return (error);
912				EM_LOCK(adapter);
913				em_disable_intr(adapter);
914				ifp->if_capenable |= IFCAP_POLLING;
915				EM_UNLOCK(adapter);
916			} else {
917				error = ether_poll_deregister(ifp);
918				/* Enable interrupt even in error case */
919				EM_LOCK(adapter);
920				em_enable_intr(adapter);
921				ifp->if_capenable &= ~IFCAP_POLLING;
922				EM_UNLOCK(adapter);
923			}
924		}
925#endif
926		if (mask & IFCAP_HWCSUM) {
927			ifp->if_capenable ^= IFCAP_HWCSUM;
928			reinit = 1;
929		}
930		if (mask & IFCAP_TSO4) {
931			ifp->if_capenable ^= IFCAP_TSO4;
932			reinit = 1;
933		}
934		if (mask & IFCAP_VLAN_HWTAGGING) {
935			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
936			reinit = 1;
937		}
938		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
939			em_init(adapter);
940		VLAN_CAPABILITIES(ifp);
941		break;
942	    }
943	default:
944		error = ether_ioctl(ifp, command, data);
945		break;
946	}
947
948	return (error);
949}
950
951/*********************************************************************
952 *  Watchdog entry point
953 *
954 *  This routine is called whenever hardware quits transmitting.
955 *
956 **********************************************************************/
957
958static void
959em_watchdog(struct ifnet *ifp)
960{
961	struct adapter *adapter = ifp->if_softc;
962
963	EM_LOCK(adapter);
964	/* If we are in this routine because of pause frames, then
965	 * don't reset the hardware.
966	 */
967	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
968		ifp->if_timer = EM_TX_TIMEOUT;
969		EM_UNLOCK(adapter);
970		return;
971	}
972
973	/*
974	 * Reclaim first as there is a possibility of losing Tx completion
975	 * interrupts. Possible cause of missing Tx completion interrupts
976	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
977	 * or chipset bug.
978	 */
979	em_txeof(adapter);
980	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
981		EM_UNLOCK(adapter);
982		return;
983	}
984
985	if (em_check_for_link(&adapter->hw) == 0)
986		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
987
988	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
989	adapter->watchdog_events++;
990
991	em_init_locked(adapter);
992	EM_UNLOCK(adapter);
993}
994
995/*********************************************************************
996 *  Init entry point
997 *
998 *  This routine is used in two ways. It is used by the stack as
999 *  init entry point in network interface structure. It is also used
1000 *  by the driver as a hw/sw initialization routine to get to a
1001 *  consistent state.
1002 *
1003 *  return 0 on success, positive on failure
1004 **********************************************************************/
1005
1006static void
1007em_init_locked(struct adapter *adapter)
1008{
1009	struct ifnet	*ifp = adapter->ifp;
1010	device_t	dev = adapter->dev;
1011	uint32_t	pba;
1012
1013	INIT_DEBUGOUT("em_init: begin");
1014
1015	EM_LOCK_ASSERT(adapter);
1016
1017	em_stop(adapter);
1018
1019	/*
1020	 * Packet Buffer Allocation (PBA)
1021	 * Writing PBA sets the receive portion of the buffer
1022	 * the remainder is used for the transmit buffer.
1023	 *
1024	 * Devices before the 82547 had a Packet Buffer of 64K.
1025	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1026	 * After the 82547 the buffer was reduced to 40K.
1027	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1028	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1029	 */
1030	switch (adapter->hw.mac_type) {
1031	case em_82547:
1032	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1033		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1034			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1035		else
1036			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1037		adapter->tx_fifo_head = 0;
1038		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1039		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1040		break;
1041	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1042	case em_82571: /* 82571: Total Packet Buffer is 48K */
1043	case em_82572: /* 82572: Total Packet Buffer is 48K */
1044			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1045		break;
1046	case em_82573: /* 82573: Total Packet Buffer is 32K */
1047		/* Jumbo frames not supported */
1048			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1049		break;
1050	case em_ich8lan:
1051		pba = E1000_PBA_8K;
1052		break;
1053	default:
1054		/* Devices before 82547 had a Packet Buffer of 64K.   */
1055		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1056			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1057		else
1058			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1059	}
1060
1061	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1062	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1063
1064	/* Get the latest mac address, User can use a LAA */
1065	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1066
1067	/* Initialize the hardware */
1068	if (em_hardware_init(adapter)) {
1069		device_printf(dev, "Unable to initialize the hardware\n");
1070		return;
1071	}
1072	em_update_link_status(adapter);
1073
1074	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1075		em_enable_vlans(adapter);
1076
1077	ifp->if_hwassist = 0;
1078	if (adapter->hw.mac_type >= em_82543) {
1079		if (ifp->if_capenable & IFCAP_TXCSUM)
1080			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1081		/*
1082		 * em_setup_transmit_structures() will behave differently
1083		 * based on the state of TSO.
1084		 */
1085		if (ifp->if_capenable & IFCAP_TSO)
1086			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1087	}
1088
1089	/* Prepare transmit descriptors and buffers */
1090	if (em_setup_transmit_structures(adapter)) {
1091		device_printf(dev, "Could not setup transmit structures\n");
1092		em_stop(adapter);
1093		return;
1094	}
1095	em_initialize_transmit_unit(adapter);
1096
1097	/* Setup Multicast table */
1098	em_set_multi(adapter);
1099
1100	/* Prepare receive descriptors and buffers */
1101	if (em_setup_receive_structures(adapter)) {
1102		device_printf(dev, "Could not setup receive structures\n");
1103		em_stop(adapter);
1104		return;
1105	}
1106	em_initialize_receive_unit(adapter);
1107
1108	/* Don't lose promiscuous settings */
1109	em_set_promisc(adapter);
1110
1111	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1112	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1113
1114	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1115	em_clear_hw_cntrs(&adapter->hw);
1116#ifdef DEVICE_POLLING
1117	/*
1118	 * Only enable interrupts if we are not polling, make sure
1119	 * they are off otherwise.
1120	 */
1121	if (ifp->if_capenable & IFCAP_POLLING)
1122		em_disable_intr(adapter);
1123	else
1124#endif /* DEVICE_POLLING */
1125		em_enable_intr(adapter);
1126
1127	/* Don't reset the phy next time init gets called */
1128	adapter->hw.phy_reset_disable = TRUE;
1129}
1130
1131static void
1132em_init(void *arg)
1133{
1134	struct adapter *adapter = arg;
1135
1136	EM_LOCK(adapter);
1137	em_init_locked(adapter);
1138	EM_UNLOCK(adapter);
1139}
1140
1141
1142#ifdef DEVICE_POLLING
1143/*********************************************************************
1144 *
1145 *  Legacy polling routine
1146 *
1147 *********************************************************************/
1148static void
1149em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1150{
1151	struct adapter *adapter = ifp->if_softc;
1152	uint32_t reg_icr;
1153
1154	EM_LOCK(adapter);
1155	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1156		EM_UNLOCK(adapter);
1157		return;
1158	}
1159
1160	if (cmd == POLL_AND_CHECK_STATUS) {
1161		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1162		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1163			callout_stop(&adapter->timer);
1164			adapter->hw.get_link_status = 1;
1165			em_check_for_link(&adapter->hw);
1166			em_update_link_status(adapter);
1167			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1168		}
1169	}
1170	em_rxeof(adapter, count);
1171	em_txeof(adapter);
1172
1173	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1174		em_start_locked(ifp);
1175	EM_UNLOCK(adapter);
1176}
1177
1178/*********************************************************************
1179 *
1180 *  Legacy Interrupt Service routine
1181 *
1182 *********************************************************************/
1183static void
1184em_intr(void *arg)
1185{
1186	struct adapter	*adapter = arg;
1187	struct ifnet	*ifp;
1188	uint32_t	reg_icr;
1189
1190	EM_LOCK(adapter);
1191
1192	ifp = adapter->ifp;
1193
1194	if (ifp->if_capenable & IFCAP_POLLING) {
1195		EM_UNLOCK(adapter);
1196		return;
1197	}
1198
1199	for (;;) {
1200		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1201		if (adapter->hw.mac_type >= em_82571 &&
1202		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1203			break;
1204		else if (reg_icr == 0)
1205			break;
1206
1207		/*
1208		 * XXX: some laptops trigger several spurious interrupts
1209		 * on em(4) when in the resume cycle. The ICR register
1210		 * reports all-ones value in this case. Processing such
1211		 * interrupts would lead to a freeze. I don't know why.
1212		 */
1213		if (reg_icr == 0xffffffff)
1214			break;
1215
1216		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1217			em_rxeof(adapter, -1);
1218			em_txeof(adapter);
1219		}
1220
1221		/* Link status change */
1222		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1223			callout_stop(&adapter->timer);
1224			adapter->hw.get_link_status = 1;
1225			em_check_for_link(&adapter->hw);
1226			em_update_link_status(adapter);
1227			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1228		}
1229
1230		if (reg_icr & E1000_ICR_RXO)
1231			adapter->rx_overruns++;
1232	}
1233
1234	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1235	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1236		em_start_locked(ifp);
1237
1238	EM_UNLOCK(adapter);
1239}
1240
1241#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1242
1243static void
1244em_handle_link(void *context, int pending)
1245{
1246	struct adapter	*adapter = context;
1247	struct ifnet *ifp;
1248
1249	ifp = adapter->ifp;
1250
1251	EM_LOCK(adapter);
1252
1253	callout_stop(&adapter->timer);
1254	adapter->hw.get_link_status = 1;
1255	em_check_for_link(&adapter->hw);
1256	em_update_link_status(adapter);
1257	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1258	EM_UNLOCK(adapter);
1259}
1260
1261static void
1262em_handle_rxtx(void *context, int pending)
1263{
1264	struct adapter	*adapter = context;
1265	struct ifnet	*ifp;
1266
1267	NET_LOCK_GIANT();
1268	ifp = adapter->ifp;
1269
1270	/*
1271	 * TODO:
1272	 * It should be possible to run the tx clean loop without the lock.
1273	 */
1274	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1275		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1276			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1277		EM_LOCK(adapter);
1278		em_txeof(adapter);
1279
1280		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1281			em_start_locked(ifp);
1282		EM_UNLOCK(adapter);
1283	}
1284
1285	em_enable_intr(adapter);
1286	NET_UNLOCK_GIANT();
1287}
1288
1289/*********************************************************************
1290 *
1291 *  Fast Interrupt Service routine
1292 *
1293 *********************************************************************/
1294static void
1295em_intr_fast(void *arg)
1296{
1297	struct adapter	*adapter = arg;
1298	struct ifnet	*ifp;
1299	uint32_t	reg_icr;
1300
1301	ifp = adapter->ifp;
1302
1303	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1304
1305	/* Hot eject?  */
1306	if (reg_icr == 0xffffffff)
1307		return;
1308
1309	/* Definitely not our interrupt.  */
1310	if (reg_icr == 0x0)
1311		return;
1312
1313	/*
1314	 * Starting with the 82571 chip, bit 31 should be used to
1315	 * determine whether the interrupt belongs to us.
1316	 */
1317	if (adapter->hw.mac_type >= em_82571 &&
1318	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1319		return;
1320
1321	/*
1322	 * Mask interrupts until the taskqueue is finished running.  This is
1323	 * cheap, just assume that it is needed.  This also works around the
1324	 * MSI message reordering errata on certain systems.
1325	 */
1326	em_disable_intr(adapter);
1327	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1328
1329	/* Link status change */
1330	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1331		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1332
1333	if (reg_icr & E1000_ICR_RXO)
1334		adapter->rx_overruns++;
1335}
1336#endif /* ! DEVICE_POLLING */
1337
1338/*********************************************************************
1339 *
1340 *  Media Ioctl callback
1341 *
1342 *  This routine is called whenever the user queries the status of
1343 *  the interface using ifconfig.
1344 *
1345 **********************************************************************/
1346static void
1347em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1348{
1349	struct adapter *adapter = ifp->if_softc;
1350
1351	INIT_DEBUGOUT("em_media_status: begin");
1352
1353	EM_LOCK(adapter);
1354	em_check_for_link(&adapter->hw);
1355	em_update_link_status(adapter);
1356
1357	ifmr->ifm_status = IFM_AVALID;
1358	ifmr->ifm_active = IFM_ETHER;
1359
1360	if (!adapter->link_active) {
1361		EM_UNLOCK(adapter);
1362		return;
1363	}
1364
1365	ifmr->ifm_status |= IFM_ACTIVE;
1366
1367	if ((adapter->hw.media_type == em_media_type_fiber) ||
1368	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1369		if (adapter->hw.mac_type == em_82545)
1370			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1371		else
1372			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1373	} else {
1374		switch (adapter->link_speed) {
1375		case 10:
1376			ifmr->ifm_active |= IFM_10_T;
1377			break;
1378		case 100:
1379			ifmr->ifm_active |= IFM_100_TX;
1380			break;
1381		case 1000:
1382			ifmr->ifm_active |= IFM_1000_T;
1383			break;
1384		}
1385		if (adapter->link_duplex == FULL_DUPLEX)
1386			ifmr->ifm_active |= IFM_FDX;
1387		else
1388			ifmr->ifm_active |= IFM_HDX;
1389	}
1390	EM_UNLOCK(adapter);
1391}
1392
1393/*********************************************************************
1394 *
1395 *  Media Ioctl callback
1396 *
1397 *  This routine is called when the user changes speed/duplex using
1398 *  media/mediopt option with ifconfig.
1399 *
1400 **********************************************************************/
1401static int
1402em_media_change(struct ifnet *ifp)
1403{
1404	struct adapter *adapter = ifp->if_softc;
1405	struct ifmedia  *ifm = &adapter->media;
1406
1407	INIT_DEBUGOUT("em_media_change: begin");
1408
1409	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1410		return (EINVAL);
1411
1412	EM_LOCK(adapter);
1413	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1414	case IFM_AUTO:
1415		adapter->hw.autoneg = DO_AUTO_NEG;
1416		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1417		break;
1418	case IFM_1000_LX:
1419	case IFM_1000_SX:
1420	case IFM_1000_T:
1421		adapter->hw.autoneg = DO_AUTO_NEG;
1422		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1423		break;
1424	case IFM_100_TX:
1425		adapter->hw.autoneg = FALSE;
1426		adapter->hw.autoneg_advertised = 0;
1427		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1428			adapter->hw.forced_speed_duplex = em_100_full;
1429		else
1430			adapter->hw.forced_speed_duplex = em_100_half;
1431		break;
1432	case IFM_10_T:
1433		adapter->hw.autoneg = FALSE;
1434		adapter->hw.autoneg_advertised = 0;
1435		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1436			adapter->hw.forced_speed_duplex = em_10_full;
1437		else
1438			adapter->hw.forced_speed_duplex = em_10_half;
1439		break;
1440	default:
1441		device_printf(adapter->dev, "Unsupported media type\n");
1442	}
1443
1444	/* As the speed/duplex settings my have changed we need to
1445	 * reset the PHY.
1446	 */
1447	adapter->hw.phy_reset_disable = FALSE;
1448
1449	em_init_locked(adapter);
1450	EM_UNLOCK(adapter);
1451
1452	return (0);
1453}
1454
1455/*********************************************************************
1456 *
1457 *  This routine maps the mbufs to tx descriptors.
1458 *
1459 *  return 0 on success, positive on failure
1460 **********************************************************************/
1461static int
1462em_encap(struct adapter *adapter, struct mbuf **m_headp)
1463{
1464	struct ifnet		*ifp = adapter->ifp;
1465	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1466	bus_dmamap_t		map;
1467	struct em_buffer	*tx_buffer, *tx_buffer_last;
1468	struct em_tx_desc	*current_tx_desc;
1469	struct mbuf		*m_head;
1470	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1471	int			nsegs, i, j;
1472	int			error, do_tso, tso_desc = 0;
1473
1474	m_head = *m_headp;
1475	current_tx_desc = NULL;
1476	txd_upper = txd_lower = txd_used = txd_saved = 0;
1477
1478	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1479
1480	/*
1481	 * Force a cleanup if number of TX descriptors
1482	 * available hits the threshold.
1483	 */
1484	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1485		em_txeof(adapter);
1486		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1487			adapter->no_tx_desc_avail1++;
1488			return (ENOBUFS);
1489		}
1490	}
1491
1492	/*
1493	 * When operating in promiscuous mode, hardware stripping of the
1494	 * VLAN tag on receive is disabled.  This should not prevent us
1495	 * from doing hardware insertion of the VLAN tag here as that
1496	 * is controlled by the dma descriptor flags and not the receive
1497	 * tag strip setting.  Unfortunatly this hardware switches the
1498	 * VLAN encapsulation type from 802.1q to ISL when stripping om
1499	 * receive is disabled.  This means we have to add the vlan
1500	 * encapsulation here in the driver, since it will have come down
1501	 * from the VLAN layer with a tag instead of a VLAN header.
1502	 */
1503	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1504		struct ether_vlan_header *evl;
1505		struct ether_header eh;
1506
1507		m_head = m_pullup(m_head, sizeof(eh));
1508		if (m_head == NULL) {
1509			*m_headp = NULL;
1510			return (ENOBUFS);
1511		}
1512		eh = *mtod(m_head, struct ether_header *);
1513		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1514		if (m_head == NULL) {
1515			*m_headp = NULL;
1516			return (ENOBUFS);
1517		}
1518		m_head = m_pullup(m_head, sizeof(*evl));
1519		if (m_head == NULL) {
1520			*m_headp = NULL;
1521			return (ENOBUFS);
1522		}
1523		evl = mtod(m_head, struct ether_vlan_header *);
1524		bcopy(&eh, evl, sizeof(*evl));
1525		evl->evl_proto = evl->evl_encap_proto;
1526		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1527		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1528		*m_headp = m_head;
1529	}
1530
1531	/*
1532	 * TSO workaround:
1533	 *  If an mbuf contains only the IP and TCP header we have
1534	 *  to pull 4 bytes of data into it.
1535	 */
1536	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1537		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1538		*m_headp = m_head;
1539		if (m_head == NULL) {
1540			return (ENOBUFS);
1541		}
1542	}
1543
1544	/*
1545	 * Map the packet for DMA.
1546	 */
1547	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1548	tx_buffer_last = tx_buffer;
1549	map = tx_buffer->map;
1550
1551	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1552	    &nsegs, BUS_DMA_NOWAIT);
1553
1554	/*
1555	 * There are two types of errors we can (try) to handle:
1556	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1557	 *   out of segments.  Defragment the mbuf chain and try again.
1558	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1559	 *   at this point in time.  Defer sending and try again later.
1560	 * All other errors, in particular EINVAL, are fatal and prevent the
1561	 * mbuf chain from ever going through.  Drop it and report error.
1562	 */
1563	if (error == EFBIG) {
1564		struct mbuf *m;
1565
1566		m = m_defrag(*m_headp, M_DONTWAIT);
1567		if (m == NULL) {
1568			/* Assume m_defrag(9) used only m_get(9). */
1569			adapter->mbuf_alloc_failed++;
1570			m_freem(*m_headp);
1571			*m_headp = NULL;
1572			return (ENOBUFS);
1573		}
1574		*m_headp = m;
1575
1576		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1577		    segs, &nsegs, BUS_DMA_NOWAIT);
1578
1579		if (error == ENOMEM) {
1580			adapter->no_tx_dma_setup++;
1581			return (error);
1582		} else if (error != 0) {
1583			adapter->no_tx_dma_setup++;
1584			m_freem(*m_headp);
1585			*m_headp = NULL;
1586			return (error);
1587		}
1588	} else if (error == ENOMEM) {
1589		adapter->no_tx_dma_setup++;
1590		return (error);
1591	} else if (error != 0) {
1592		adapter->no_tx_dma_setup++;
1593		m_freem(*m_headp);
1594		*m_headp = NULL;
1595		return (error);
1596	}
1597
1598	/*
1599	 * TSO Hardware workaround, if this packet is not
1600	 * TSO, and is only a single descriptor long, and
1601	 * it follows a TSO burst, then we need to add a
1602	 * sentinel descriptor to prevent premature writeback.
1603	 */
1604	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1605		if (nsegs == 1)
1606			tso_desc = TRUE;
1607		adapter->tx_tso = FALSE;
1608	}
1609
1610	if (nsegs > adapter->num_tx_desc_avail - 2) {
1611		adapter->no_tx_desc_avail2++;
1612		bus_dmamap_unload(adapter->txtag, map);
1613		return (ENOBUFS);
1614	}
1615	m_head = *m_headp;
1616
1617	/* Do hardware assists */
1618	if (ifp->if_hwassist) {
1619		if (do_tso &&
1620		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1621			/* we need to make a final sentinel transmit desc */
1622			tso_desc = TRUE;
1623		} else
1624			em_transmit_checksum_setup(adapter,  m_head,
1625			    &txd_upper, &txd_lower);
1626	}
1627
1628	i = adapter->next_avail_tx_desc;
1629	if (adapter->pcix_82544)
1630		txd_saved = i;
1631
1632	for (j = 0; j < nsegs; j++) {
1633		bus_size_t seg_len;
1634		bus_addr_t seg_addr;
1635		/* If adapter is 82544 and on PCIX bus. */
1636		if(adapter->pcix_82544) {
1637			DESC_ARRAY	desc_array;
1638			uint32_t	array_elements, counter;
1639
1640			/*
1641			 * Check the Address and Length combination and
1642			 * split the data accordingly
1643			 */
1644			array_elements = em_fill_descriptors(segs[j].ds_addr,
1645			    segs[j].ds_len, &desc_array);
1646			for (counter = 0; counter < array_elements; counter++) {
1647				if (txd_used == adapter->num_tx_desc_avail) {
1648					adapter->next_avail_tx_desc = txd_saved;
1649					adapter->no_tx_desc_avail2++;
1650					bus_dmamap_unload(adapter->txtag, map);
1651					return (ENOBUFS);
1652				}
1653				tx_buffer = &adapter->tx_buffer_area[i];
1654				current_tx_desc = &adapter->tx_desc_base[i];
1655				current_tx_desc->buffer_addr = htole64(
1656					desc_array.descriptor[counter].address);
1657				current_tx_desc->lower.data = htole32(
1658					(adapter->txd_cmd | txd_lower |
1659					(uint16_t)desc_array.descriptor[counter].length));
1660				current_tx_desc->upper.data = htole32((txd_upper));
1661				if (++i == adapter->num_tx_desc)
1662					i = 0;
1663
1664				tx_buffer->m_head = NULL;
1665				txd_used++;
1666			}
1667		} else {
1668			tx_buffer = &adapter->tx_buffer_area[i];
1669			current_tx_desc = &adapter->tx_desc_base[i];
1670			seg_addr = htole64(segs[j].ds_addr);
1671			seg_len  = segs[j].ds_len;
1672			/*
1673			** TSO Workaround:
1674			** If this is the last descriptor, we want to
1675			** split it so we have a small final sentinel
1676			*/
1677			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1678				seg_len -= 4;
1679				current_tx_desc->buffer_addr = seg_addr;
1680				current_tx_desc->lower.data = htole32(
1681				adapter->txd_cmd | txd_lower | seg_len);
1682				current_tx_desc->upper.data =
1683				    htole32(txd_upper);
1684				if (++i == adapter->num_tx_desc)
1685					i = 0;
1686				/* Now make the sentinel */
1687				++txd_used; /* using an extra txd */
1688				current_tx_desc = &adapter->tx_desc_base[i];
1689				tx_buffer = &adapter->tx_buffer_area[i];
1690				current_tx_desc->buffer_addr =
1691				    seg_addr + seg_len;
1692				current_tx_desc->lower.data = htole32(
1693				adapter->txd_cmd | txd_lower | 4);
1694				current_tx_desc->upper.data =
1695				    htole32(txd_upper);
1696				if (++i == adapter->num_tx_desc)
1697					i = 0;
1698			} else {
1699				current_tx_desc->buffer_addr = seg_addr;
1700				current_tx_desc->lower.data = htole32(
1701				adapter->txd_cmd | txd_lower | seg_len);
1702				current_tx_desc->upper.data =
1703				    htole32(txd_upper);
1704				if (++i == adapter->num_tx_desc)
1705					i = 0;
1706			}
1707			tx_buffer->m_head = NULL;
1708		}
1709	}
1710
1711	adapter->next_avail_tx_desc = i;
1712	if (adapter->pcix_82544)
1713		adapter->num_tx_desc_avail -= txd_used;
1714	else {
1715		adapter->num_tx_desc_avail -= nsegs;
1716		if (tso_desc) /* TSO used an extra for sentinel */
1717			adapter->num_tx_desc_avail -= txd_used;
1718	}
1719
1720	if (m_head->m_flags & M_VLANTAG) {
1721		/* Set the vlan id. */
1722		current_tx_desc->upper.fields.special =
1723		    htole16(m_head->m_pkthdr.ether_vtag);
1724
1725		/* Tell hardware to add tag. */
1726		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1727	}
1728
1729	tx_buffer->m_head = m_head;
1730	tx_buffer_last->map = tx_buffer->map;
1731	tx_buffer->map = map;
1732	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1733
1734	/*
1735	 * Last Descriptor of Packet needs End Of Packet (EOP).
1736	 */
1737	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1738
1739	/*
1740	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1741	 * that this frame is available to transmit.
1742	 */
1743	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1744	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1745
1746	if (adapter->hw.mac_type == em_82547 &&
1747	    adapter->link_duplex == HALF_DUPLEX)
1748		em_82547_move_tail_locked(adapter);
1749	else {
1750		E1000_WRITE_REG(&adapter->hw, TDT, i);
1751		if (adapter->hw.mac_type == em_82547)
1752			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1753	}
1754
1755	return (0);
1756}
1757
1758/*********************************************************************
1759 *
1760 * 82547 workaround to avoid controller hang in half-duplex environment.
1761 * The workaround is to avoid queuing a large packet that would span
1762 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1763 * in this case. We do that only when FIFO is quiescent.
1764 *
1765 **********************************************************************/
1766static void
1767em_82547_move_tail_locked(struct adapter *adapter)
1768{
1769	uint16_t hw_tdt;
1770	uint16_t sw_tdt;
1771	struct em_tx_desc *tx_desc;
1772	uint16_t length = 0;
1773	boolean_t eop = 0;
1774
1775	EM_LOCK_ASSERT(adapter);
1776
1777	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1778	sw_tdt = adapter->next_avail_tx_desc;
1779
1780	while (hw_tdt != sw_tdt) {
1781		tx_desc = &adapter->tx_desc_base[hw_tdt];
1782		length += tx_desc->lower.flags.length;
1783		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1784		if(++hw_tdt == adapter->num_tx_desc)
1785			hw_tdt = 0;
1786
1787		if (eop) {
1788			if (em_82547_fifo_workaround(adapter, length)) {
1789				adapter->tx_fifo_wrk_cnt++;
1790				callout_reset(&adapter->tx_fifo_timer, 1,
1791					em_82547_move_tail, adapter);
1792				break;
1793			}
1794			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1795			em_82547_update_fifo_head(adapter, length);
1796			length = 0;
1797		}
1798	}
1799}
1800
1801static void
1802em_82547_move_tail(void *arg)
1803{
1804	struct adapter *adapter = arg;
1805
1806	EM_LOCK(adapter);
1807	em_82547_move_tail_locked(adapter);
1808	EM_UNLOCK(adapter);
1809}
1810
1811static int
1812em_82547_fifo_workaround(struct adapter *adapter, int len)
1813{
1814	int fifo_space, fifo_pkt_len;
1815
1816	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1817
1818	if (adapter->link_duplex == HALF_DUPLEX) {
1819		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1820
1821		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1822			if (em_82547_tx_fifo_reset(adapter))
1823				return (0);
1824			else
1825				return (1);
1826		}
1827	}
1828
1829	return (0);
1830}
1831
1832static void
1833em_82547_update_fifo_head(struct adapter *adapter, int len)
1834{
1835	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1836
1837	/* tx_fifo_head is always 16 byte aligned */
1838	adapter->tx_fifo_head += fifo_pkt_len;
1839	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1840		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1841	}
1842}
1843
1844
1845static int
1846em_82547_tx_fifo_reset(struct adapter *adapter)
1847{
1848	uint32_t tctl;
1849
1850	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1851	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1852	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1853	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1854
1855		/* Disable TX unit */
1856		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1857		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1858
1859		/* Reset FIFO pointers */
1860		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1861		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1862		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1863		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1864
1865		/* Re-enable TX unit */
1866		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1867		E1000_WRITE_FLUSH(&adapter->hw);
1868
1869		adapter->tx_fifo_head = 0;
1870		adapter->tx_fifo_reset_cnt++;
1871
1872		return (TRUE);
1873	}
1874	else {
1875		return (FALSE);
1876	}
1877}
1878
1879static void
1880em_set_promisc(struct adapter *adapter)
1881{
1882	struct ifnet	*ifp = adapter->ifp;
1883	uint32_t	reg_rctl;
1884
1885	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1886
1887	if (ifp->if_flags & IFF_PROMISC) {
1888		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1889		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1890		/*
1891		 * Disable VLAN stripping in promiscous mode.
1892		 * This enables bridging of vlan tagged frames to occur
1893		 * and also allows vlan tags to be seen in tcpdump.
1894		 * XXX: This is a bit bogus as tcpdump may be used
1895		 * w/o promisc mode as well.
1896		 */
1897		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1898			em_disable_vlans(adapter);
1899		adapter->em_insert_vlan_header = 1;
1900	} else if (ifp->if_flags & IFF_ALLMULTI) {
1901		reg_rctl |= E1000_RCTL_MPE;
1902		reg_rctl &= ~E1000_RCTL_UPE;
1903		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1904		adapter->em_insert_vlan_header = 0;
1905	} else
1906		adapter->em_insert_vlan_header = 0;
1907}
1908
1909static void
1910em_disable_promisc(struct adapter *adapter)
1911{
1912	struct ifnet	*ifp = adapter->ifp;
1913	uint32_t	reg_rctl;
1914
1915	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1916
1917	reg_rctl &=  (~E1000_RCTL_UPE);
1918	reg_rctl &=  (~E1000_RCTL_MPE);
1919	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1920
1921	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1922		em_enable_vlans(adapter);
1923	adapter->em_insert_vlan_header = 0;
1924}
1925
1926
1927/*********************************************************************
1928 *  Multicast Update
1929 *
1930 *  This routine is called whenever multicast address list is updated.
1931 *
1932 **********************************************************************/
1933
1934static void
1935em_set_multi(struct adapter *adapter)
1936{
1937	struct ifnet	*ifp = adapter->ifp;
1938	struct ifmultiaddr *ifma;
1939	uint32_t reg_rctl = 0;
1940	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1941	int mcnt = 0;
1942
1943	IOCTL_DEBUGOUT("em_set_multi: begin");
1944
1945	if (adapter->hw.mac_type == em_82542_rev2_0) {
1946		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1947		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1948			em_pci_clear_mwi(&adapter->hw);
1949		reg_rctl |= E1000_RCTL_RST;
1950		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1951		msec_delay(5);
1952	}
1953
1954	IF_ADDR_LOCK(ifp);
1955	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1956		if (ifma->ifma_addr->sa_family != AF_LINK)
1957			continue;
1958
1959		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1960			break;
1961
1962		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1963		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1964		mcnt++;
1965	}
1966	IF_ADDR_UNLOCK(ifp);
1967
1968	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1969		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1970		reg_rctl |= E1000_RCTL_MPE;
1971		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1972	} else
1973		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1974
1975	if (adapter->hw.mac_type == em_82542_rev2_0) {
1976		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1977		reg_rctl &= ~E1000_RCTL_RST;
1978		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1979		msec_delay(5);
1980		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1981			em_pci_set_mwi(&adapter->hw);
1982	}
1983}
1984
1985
1986/*********************************************************************
1987 *  Timer routine
1988 *
1989 *  This routine checks for link status and updates statistics.
1990 *
1991 **********************************************************************/
1992
1993static void
1994em_local_timer(void *arg)
1995{
1996	struct adapter	*adapter = arg;
1997	struct ifnet	*ifp = adapter->ifp;
1998
1999	EM_LOCK(adapter);
2000
2001	em_check_for_link(&adapter->hw);
2002	em_update_link_status(adapter);
2003	em_update_stats_counters(adapter);
2004	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2005		em_print_hw_stats(adapter);
2006	em_smartspeed(adapter);
2007
2008	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2009
2010	EM_UNLOCK(adapter);
2011}
2012
2013static void
2014em_update_link_status(struct adapter *adapter)
2015{
2016	struct ifnet *ifp = adapter->ifp;
2017	device_t dev = adapter->dev;
2018
2019	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
2020		if (adapter->link_active == 0) {
2021			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
2022			    &adapter->link_duplex);
2023			/* Check if we may set SPEED_MODE bit on PCI-E */
2024			if ((adapter->link_speed == SPEED_1000) &&
2025			    ((adapter->hw.mac_type == em_82571) ||
2026			    (adapter->hw.mac_type == em_82572))) {
2027				int tarc0;
2028
2029				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2030				tarc0 |= SPEED_MODE_BIT;
2031				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2032			}
2033			if (bootverbose)
2034				device_printf(dev, "Link is up %d Mbps %s\n",
2035				    adapter->link_speed,
2036				    ((adapter->link_duplex == FULL_DUPLEX) ?
2037				    "Full Duplex" : "Half Duplex"));
2038			adapter->link_active = 1;
2039			adapter->smartspeed = 0;
2040			ifp->if_baudrate = adapter->link_speed * 1000000;
2041			if_link_state_change(ifp, LINK_STATE_UP);
2042		}
2043	} else {
2044		if (adapter->link_active == 1) {
2045			ifp->if_baudrate = adapter->link_speed = 0;
2046			adapter->link_duplex = 0;
2047			if (bootverbose)
2048				device_printf(dev, "Link is Down\n");
2049			adapter->link_active = 0;
2050			if_link_state_change(ifp, LINK_STATE_DOWN);
2051		}
2052	}
2053}
2054
2055/*********************************************************************
2056 *
2057 *  This routine disables all traffic on the adapter by issuing a
2058 *  global reset on the MAC and deallocates TX/RX buffers.
2059 *
2060 **********************************************************************/
2061
2062static void
2063em_stop(void *arg)
2064{
2065	struct adapter	*adapter = arg;
2066	struct ifnet	*ifp = adapter->ifp;
2067
2068	EM_LOCK_ASSERT(adapter);
2069
2070	INIT_DEBUGOUT("em_stop: begin");
2071
2072	em_disable_intr(adapter);
2073	em_reset_hw(&adapter->hw);
2074	callout_stop(&adapter->timer);
2075	callout_stop(&adapter->tx_fifo_timer);
2076	em_free_transmit_structures(adapter);
2077	em_free_receive_structures(adapter);
2078
2079	/* Tell the stack that the interface is no longer active */
2080	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2081}
2082
2083
2084/********************************************************************
2085 *
2086 *  Determine hardware revision.
2087 *
2088 **********************************************************************/
2089static void
2090em_identify_hardware(struct adapter *adapter)
2091{
2092	device_t dev = adapter->dev;
2093
2094	/* Make sure our PCI config space has the necessary stuff set */
2095	pci_enable_busmaster(dev);
2096	pci_enable_io(dev, SYS_RES_MEMORY);
2097	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2098
2099	/* Save off the information about this board */
2100	adapter->hw.vendor_id = pci_get_vendor(dev);
2101	adapter->hw.device_id = pci_get_device(dev);
2102	adapter->hw.revision_id = pci_get_revid(dev);
2103	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2104	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2105
2106	/* Identify the MAC */
2107	if (em_set_mac_type(&adapter->hw))
2108		device_printf(dev, "Unknown MAC Type\n");
2109
2110	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2111	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2112		adapter->hw.phy_init_script = TRUE;
2113}
2114
2115static int
2116em_allocate_pci_resources(struct adapter *adapter)
2117{
2118	device_t	dev = adapter->dev;
2119	int		val, rid;
2120
2121	rid = PCIR_BAR(0);
2122	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2123	    &rid, RF_ACTIVE);
2124	if (adapter->res_memory == NULL) {
2125		device_printf(dev, "Unable to allocate bus resource: memory\n");
2126		return (ENXIO);
2127	}
2128	adapter->osdep.mem_bus_space_tag =
2129	rman_get_bustag(adapter->res_memory);
2130	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2131	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2132
2133	if (adapter->hw.mac_type > em_82543) {
2134		/* Figure our where our IO BAR is ? */
2135		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2136			val = pci_read_config(dev, rid, 4);
2137			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2138				adapter->io_rid = rid;
2139				break;
2140			}
2141			rid += 4;
2142			/* check for 64bit BAR */
2143			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2144				rid += 4;
2145		}
2146		if (rid >= PCIR_CIS) {
2147			device_printf(dev, "Unable to locate IO BAR\n");
2148			return (ENXIO);
2149		}
2150		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2151		    &adapter->io_rid, RF_ACTIVE);
2152		if (adapter->res_ioport == NULL) {
2153			device_printf(dev, "Unable to allocate bus resource: "
2154			    "ioport\n");
2155			return (ENXIO);
2156		}
2157		adapter->hw.io_base = 0;
2158		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2159		adapter->osdep.io_bus_space_handle =
2160		    rman_get_bushandle(adapter->res_ioport);
2161	}
2162
2163	/* For ICH8 we need to find the flash memory. */
2164	if (adapter->hw.mac_type == em_ich8lan) {
2165		rid = EM_FLASH;
2166
2167		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2168		    &rid, RF_ACTIVE);
2169		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2170		adapter->osdep.flash_bus_space_handle =
2171		    rman_get_bushandle(adapter->flash_mem);
2172	}
2173
2174	rid = 0x0;
2175	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2176	    RF_SHAREABLE | RF_ACTIVE);
2177	if (adapter->res_interrupt == NULL) {
2178		device_printf(dev, "Unable to allocate bus resource: "
2179		    "interrupt\n");
2180		return (ENXIO);
2181	}
2182
2183	adapter->hw.back = &adapter->osdep;
2184
2185	return (0);
2186}
2187
2188int
2189em_allocate_intr(struct adapter *adapter)
2190{
2191	device_t dev = adapter->dev;
2192	int error;
2193
2194	/* Manually turn off all interrupts */
2195	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2196
2197#ifdef DEVICE_POLLING
2198	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2199	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2200	    &adapter->int_handler_tag)) != 0) {
2201		device_printf(dev, "Failed to register interrupt handler");
2202		return (error);
2203	}
2204#else
2205	/*
2206	 * Try allocating a fast interrupt and the associated deferred
2207	 * processing contexts.
2208	 */
2209	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2210	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2211	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2212	    taskqueue_thread_enqueue, &adapter->tq);
2213	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2214	    device_get_nameunit(adapter->dev));
2215	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2216	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2217	    &adapter->int_handler_tag)) != 0) {
2218		device_printf(dev, "Failed to register fast interrupt "
2219			    "handler: %d\n", error);
2220		taskqueue_free(adapter->tq);
2221		adapter->tq = NULL;
2222		return (error);
2223	}
2224#endif
2225
2226	em_enable_intr(adapter);
2227	return (0);
2228}
2229
2230static void
2231em_free_intr(struct adapter *adapter)
2232{
2233	device_t dev = adapter->dev;
2234
2235	if (adapter->int_handler_tag != NULL) {
2236		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2237		adapter->int_handler_tag = NULL;
2238	}
2239	if (adapter->tq != NULL) {
2240		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2241		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2242		taskqueue_free(adapter->tq);
2243		adapter->tq = NULL;
2244	}
2245}
2246
2247static void
2248em_free_pci_resources(struct adapter *adapter)
2249{
2250	device_t dev = adapter->dev;
2251
2252	if (adapter->res_interrupt != NULL)
2253		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2254
2255	if (adapter->res_memory != NULL)
2256		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2257		    adapter->res_memory);
2258
2259	if (adapter->flash_mem != NULL)
2260		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2261		    adapter->flash_mem);
2262
2263	if (adapter->res_ioport != NULL)
2264		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2265		    adapter->res_ioport);
2266}
2267
2268/*********************************************************************
2269 *
2270 *  Initialize the hardware to a configuration as specified by the
2271 *  adapter structure. The controller is reset, the EEPROM is
2272 *  verified, the MAC address is set, then the shared initialization
2273 *  routines are called.
2274 *
2275 **********************************************************************/
2276static int
2277em_hardware_init(struct adapter *adapter)
2278{
2279	device_t dev = adapter->dev;
2280	uint16_t rx_buffer_size;
2281
2282	INIT_DEBUGOUT("em_hardware_init: begin");
2283	/* Issue a global reset */
2284	em_reset_hw(&adapter->hw);
2285
2286	/* When hardware is reset, fifo_head is also reset */
2287	adapter->tx_fifo_head = 0;
2288
2289	/* Make sure we have a good EEPROM before we read from it */
2290	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2291		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2292		return (EIO);
2293	}
2294
2295	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2296		device_printf(dev, "EEPROM read error while reading part "
2297		    "number\n");
2298		return (EIO);
2299	}
2300
2301	/* Set up smart power down as default off on newer adapters. */
2302	if (!em_smart_pwr_down &&
2303	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2304		uint16_t phy_tmp = 0;
2305
2306		/* Speed up time to link by disabling smart power down. */
2307		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2308		phy_tmp &= ~IGP02E1000_PM_SPD;
2309		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2310	}
2311
2312	/*
2313	 * These parameters control the automatic generation (Tx) and
2314	 * response (Rx) to Ethernet PAUSE frames.
2315	 * - High water mark should allow for at least two frames to be
2316	 *   received after sending an XOFF.
2317	 * - Low water mark works best when it is very near the high water mark.
2318	 *   This allows the receiver to restart by sending XON when it has
2319	 *   drained a bit. Here we use an arbitary value of 1500 which will
2320	 *   restart after one full frame is pulled from the buffer. There
2321	 *   could be several smaller frames in the buffer and if so they will
2322	 *   not trigger the XON until their total number reduces the buffer
2323	 *   by 1500.
2324	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2325	 */
2326	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2327
2328	adapter->hw.fc_high_water = rx_buffer_size -
2329	    roundup2(adapter->hw.max_frame_size, 1024);
2330	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2331	if (adapter->hw.mac_type == em_80003es2lan)
2332		adapter->hw.fc_pause_time = 0xFFFF;
2333	else
2334		adapter->hw.fc_pause_time = 0x1000;
2335	adapter->hw.fc_send_xon = TRUE;
2336	adapter->hw.fc = E1000_FC_FULL;
2337
2338	if (em_init_hw(&adapter->hw) < 0) {
2339		device_printf(dev, "Hardware Initialization Failed");
2340		return (EIO);
2341	}
2342
2343	em_check_for_link(&adapter->hw);
2344
2345	return (0);
2346}
2347
2348/*********************************************************************
2349 *
2350 *  Setup networking device structure and register an interface.
2351 *
2352 **********************************************************************/
2353static void
2354em_setup_interface(device_t dev, struct adapter *adapter)
2355{
2356	struct ifnet   *ifp;
2357	INIT_DEBUGOUT("em_setup_interface: begin");
2358
2359	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2360	if (ifp == NULL)
2361		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2362	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2363	ifp->if_mtu = ETHERMTU;
2364	ifp->if_init =  em_init;
2365	ifp->if_softc = adapter;
2366	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2367	ifp->if_ioctl = em_ioctl;
2368	ifp->if_start = em_start;
2369	ifp->if_watchdog = em_watchdog;
2370	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2371	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2372	IFQ_SET_READY(&ifp->if_snd);
2373
2374	ether_ifattach(ifp, adapter->hw.mac_addr);
2375
2376	ifp->if_capabilities = ifp->if_capenable = 0;
2377
2378	if (adapter->hw.mac_type >= em_82543) {
2379		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2380		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2381	}
2382
2383	/* Enable TSO if available */
2384	if ((adapter->hw.mac_type > em_82544) &&
2385	    (adapter->hw.mac_type != em_82547)) {
2386		ifp->if_capabilities |= IFCAP_TSO4;
2387		ifp->if_capenable |= IFCAP_TSO4;
2388	}
2389
2390	/*
2391	 * Tell the upper layer(s) we support long frames.
2392	 */
2393	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2394	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2395	ifp->if_capenable |= IFCAP_VLAN_MTU;
2396
2397#ifdef DEVICE_POLLING
2398	ifp->if_capabilities |= IFCAP_POLLING;
2399#endif
2400
2401	/*
2402	 * Specify the media types supported by this adapter and register
2403	 * callbacks to update media and link information
2404	 */
2405	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2406	    em_media_status);
2407	if ((adapter->hw.media_type == em_media_type_fiber) ||
2408	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2409		u_char fiber_type = IFM_1000_SX;	/* default type; */
2410
2411		if (adapter->hw.mac_type == em_82545)
2412			fiber_type = IFM_1000_LX;
2413		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2414		    0, NULL);
2415		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2416	} else {
2417		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2418		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2419			    0, NULL);
2420		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2421			    0, NULL);
2422		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2423			    0, NULL);
2424		if (adapter->hw.phy_type != em_phy_ife) {
2425			ifmedia_add(&adapter->media,
2426				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2427			ifmedia_add(&adapter->media,
2428				IFM_ETHER | IFM_1000_T, 0, NULL);
2429		}
2430	}
2431	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2432	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2433}
2434
2435
2436/*********************************************************************
2437 *
2438 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2439 *
2440 **********************************************************************/
2441static void
2442em_smartspeed(struct adapter *adapter)
2443{
2444	uint16_t phy_tmp;
2445
2446	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2447	    adapter->hw.autoneg == 0 ||
2448	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2449		return;
2450
2451	if (adapter->smartspeed == 0) {
2452		/* If Master/Slave config fault is asserted twice,
2453		 * we assume back-to-back */
2454		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2455		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2456			return;
2457		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2458		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2459			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2460			if(phy_tmp & CR_1000T_MS_ENABLE) {
2461				phy_tmp &= ~CR_1000T_MS_ENABLE;
2462				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2463				    phy_tmp);
2464				adapter->smartspeed++;
2465				if(adapter->hw.autoneg &&
2466				   !em_phy_setup_autoneg(&adapter->hw) &&
2467				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2468				    &phy_tmp)) {
2469					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2470						    MII_CR_RESTART_AUTO_NEG);
2471					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2472					    phy_tmp);
2473				}
2474			}
2475		}
2476		return;
2477	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2478		/* If still no link, perhaps using 2/3 pair cable */
2479		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2480		phy_tmp |= CR_1000T_MS_ENABLE;
2481		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2482		if(adapter->hw.autoneg &&
2483		   !em_phy_setup_autoneg(&adapter->hw) &&
2484		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2485			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2486				    MII_CR_RESTART_AUTO_NEG);
2487			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2488		}
2489	}
2490	/* Restart process after EM_SMARTSPEED_MAX iterations */
2491	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2492		adapter->smartspeed = 0;
2493}
2494
2495
2496/*
2497 * Manage DMA'able memory.
2498 */
2499static void
2500em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2501{
2502	if (error)
2503		return;
2504	*(bus_addr_t *) arg = segs[0].ds_addr;
2505}
2506
2507static int
2508em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2509	int mapflags)
2510{
2511	int error;
2512
2513	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2514				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2515				BUS_SPACE_MAXADDR,	/* lowaddr */
2516				BUS_SPACE_MAXADDR,	/* highaddr */
2517				NULL, NULL,		/* filter, filterarg */
2518				size,			/* maxsize */
2519				1,			/* nsegments */
2520				size,			/* maxsegsize */
2521				0,			/* flags */
2522				NULL,			/* lockfunc */
2523				NULL,			/* lockarg */
2524				&dma->dma_tag);
2525	if (error) {
2526		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2527		    __func__, error);
2528		goto fail_0;
2529	}
2530
2531	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2532	    BUS_DMA_NOWAIT, &dma->dma_map);
2533	if (error) {
2534		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2535		    __func__, (uintmax_t)size, error);
2536		goto fail_2;
2537	}
2538
2539	dma->dma_paddr = 0;
2540	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2541	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2542	if (error || dma->dma_paddr == 0) {
2543		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2544		    __func__, error);
2545		goto fail_3;
2546	}
2547
2548	return (0);
2549
2550fail_3:
2551	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2552fail_2:
2553	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2554	bus_dma_tag_destroy(dma->dma_tag);
2555fail_0:
2556	dma->dma_map = NULL;
2557	dma->dma_tag = NULL;
2558
2559	return (error);
2560}
2561
2562static void
2563em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2564{
2565	if (dma->dma_tag == NULL)
2566		return;
2567	if (dma->dma_map != NULL) {
2568		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2569		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2570		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2571		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2572		dma->dma_map = NULL;
2573	}
2574	bus_dma_tag_destroy(dma->dma_tag);
2575	dma->dma_tag = NULL;
2576}
2577
2578
2579/*********************************************************************
2580 *
2581 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2582 *  the information needed to transmit a packet on the wire.
2583 *
2584 **********************************************************************/
2585static int
2586em_allocate_transmit_structures(struct adapter *adapter)
2587{
2588	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2589	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2590	if (adapter->tx_buffer_area == NULL) {
2591		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2592		return (ENOMEM);
2593	}
2594
2595	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2596
2597	return (0);
2598}
2599
2600/*********************************************************************
2601 *
2602 *  Allocate and initialize transmit structures.
2603 *
2604 **********************************************************************/
2605static int
2606em_setup_transmit_structures(struct adapter *adapter)
2607{
2608	struct ifnet   *ifp = adapter->ifp;
2609	device_t dev = adapter->dev;
2610	struct em_buffer *tx_buffer;
2611	bus_size_t size, segsize;
2612	int error, i;
2613
2614	/*
2615	 * Setup DMA descriptor areas.
2616	 */
2617	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2618
2619	/* Overrides for TSO - want large sizes */
2620	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2621		size = EM_TSO_SIZE;
2622		segsize = PAGE_SIZE;
2623	}
2624
2625	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2626				1, 0,			/* alignment, bounds */
2627				BUS_SPACE_MAXADDR,	/* lowaddr */
2628				BUS_SPACE_MAXADDR,	/* highaddr */
2629				NULL, NULL,		/* filter, filterarg */
2630				size,			/* maxsize */
2631				EM_MAX_SCATTER,		/* nsegments */
2632				segsize,		/* maxsegsize */
2633				0,			/* flags */
2634				NULL,		/* lockfunc */
2635				NULL,		/* lockarg */
2636				&adapter->txtag)) != 0) {
2637		device_printf(dev, "Unable to allocate TX DMA tag\n");
2638		goto fail;
2639	}
2640
2641	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2642		goto fail;
2643
2644	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2645	tx_buffer = adapter->tx_buffer_area;
2646	for (i = 0; i < adapter->num_tx_desc; i++) {
2647		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2648		if (error != 0) {
2649			device_printf(dev, "Unable to create TX DMA map\n");
2650			goto fail;
2651		}
2652		tx_buffer++;
2653	}
2654
2655	adapter->next_avail_tx_desc = 0;
2656	adapter->oldest_used_tx_desc = 0;
2657
2658	/* Set number of descriptors available */
2659	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2660
2661	/* Set checksum context */
2662	adapter->active_checksum_context = OFFLOAD_NONE;
2663	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2664	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2665
2666	return (0);
2667
2668fail:
2669	em_free_transmit_structures(adapter);
2670	return (error);
2671}
2672
2673/*********************************************************************
2674 *
2675 *  Enable transmit unit.
2676 *
2677 **********************************************************************/
2678static void
2679em_initialize_transmit_unit(struct adapter *adapter)
2680{
2681	uint32_t	reg_tctl;
2682	uint32_t	reg_tipg = 0;
2683	uint64_t	bus_addr;
2684
2685	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2686	/* Setup the Base and Length of the Tx Descriptor Ring */
2687	bus_addr = adapter->txdma.dma_paddr;
2688	E1000_WRITE_REG(&adapter->hw, TDLEN,
2689	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2690	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2691	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2692
2693	/* Setup the HW Tx Head and Tail descriptor pointers */
2694	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2695	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2696
2697
2698	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2699	    E1000_READ_REG(&adapter->hw, TDLEN));
2700
2701	/* Set the default values for the Tx Inter Packet Gap timer */
2702	switch (adapter->hw.mac_type) {
2703	case em_82542_rev2_0:
2704	case em_82542_rev2_1:
2705		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2706		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2707		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2708		break;
2709	case em_80003es2lan:
2710		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2711		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2712		    E1000_TIPG_IPGR2_SHIFT;
2713		break;
2714	default:
2715		if ((adapter->hw.media_type == em_media_type_fiber) ||
2716		    (adapter->hw.media_type == em_media_type_internal_serdes))
2717			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2718		else
2719			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2720		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2721		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2722	}
2723
2724	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2725	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2726	if(adapter->hw.mac_type >= em_82540)
2727		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2728
2729	/* Program the Transmit Control Register */
2730	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2731		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2732	if (adapter->hw.mac_type >= em_82571)
2733		reg_tctl |= E1000_TCTL_MULR;
2734	if (adapter->link_duplex == FULL_DUPLEX) {
2735		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2736	} else {
2737		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2738	}
2739	/* This write will effectively turn on the transmit unit. */
2740	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2741
2742	/* Setup Transmit Descriptor Settings for this adapter */
2743	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2744
2745	if (adapter->tx_int_delay.value > 0)
2746		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2747}
2748
2749/*********************************************************************
2750 *
2751 *  Free all transmit related data structures.
2752 *
2753 **********************************************************************/
2754static void
2755em_free_transmit_structures(struct adapter *adapter)
2756{
2757	struct em_buffer *tx_buffer;
2758	int i;
2759
2760	INIT_DEBUGOUT("free_transmit_structures: begin");
2761
2762	if (adapter->tx_buffer_area != NULL) {
2763		tx_buffer = adapter->tx_buffer_area;
2764		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2765			if (tx_buffer->m_head != NULL) {
2766				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2767				    BUS_DMASYNC_POSTWRITE);
2768				bus_dmamap_unload(adapter->txtag,
2769				    tx_buffer->map);
2770				m_freem(tx_buffer->m_head);
2771				tx_buffer->m_head = NULL;
2772			} else if (tx_buffer->map != NULL)
2773				bus_dmamap_unload(adapter->txtag,
2774				    tx_buffer->map);
2775			if (tx_buffer->map != NULL) {
2776				bus_dmamap_destroy(adapter->txtag,
2777				    tx_buffer->map);
2778				tx_buffer->map = NULL;
2779			}
2780		}
2781	}
2782	if (adapter->tx_buffer_area != NULL) {
2783		free(adapter->tx_buffer_area, M_DEVBUF);
2784		adapter->tx_buffer_area = NULL;
2785	}
2786	if (adapter->txtag != NULL) {
2787		bus_dma_tag_destroy(adapter->txtag);
2788		adapter->txtag = NULL;
2789	}
2790}
2791
2792/*********************************************************************
2793 *
2794 *  The offload context needs to be set when we transfer the first
2795 *  packet of a particular protocol (TCP/UDP). We change the
2796 *  context only if the protocol type changes.
2797 *
2798 **********************************************************************/
2799static void
2800em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2801    uint32_t *txd_upper, uint32_t *txd_lower)
2802{
2803	struct em_context_desc *TXD;
2804	struct em_buffer *tx_buffer;
2805	struct ether_vlan_header *eh;
2806	struct ip *ip;
2807	struct ip6_hdr *ip6;
2808	struct tcp_hdr *th;
2809	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2810	uint32_t cmd = 0;
2811	uint16_t etype;
2812	uint8_t ipproto;
2813
2814	/* Setup checksum offload context. */
2815	curr_txd = adapter->next_avail_tx_desc;
2816	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2817	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2818
2819	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2820		     E1000_TXD_DTYP_D;		/* Data descr */
2821
2822	/*
2823	 * Determine where frame payload starts.
2824	 * Jump over vlan headers if already present,
2825	 * helpful for QinQ too.
2826	 */
2827	eh = mtod(mp, struct ether_vlan_header *);
2828	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2829		etype = ntohs(eh->evl_proto);
2830		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2831	} else {
2832		etype = ntohs(eh->evl_encap_proto);
2833		ehdrlen = ETHER_HDR_LEN;
2834	}
2835
2836	/*
2837	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2838	 * TODO: Support SCTP too when it hits the tree.
2839	 */
2840	switch (etype) {
2841	case ETHERTYPE_IP:
2842		ip = (struct ip *)(mp->m_data + ehdrlen);
2843		ip_hlen = ip->ip_hl << 2;
2844
2845		/* Setup of IP header checksum. */
2846		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2847			/*
2848			 * Start offset for header checksum calculation.
2849			 * End offset for header checksum calculation.
2850			 * Offset of place to put the checksum.
2851			 */
2852			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2853			TXD->lower_setup.ip_fields.ipcse =
2854			    htole16(ehdrlen + ip_hlen);
2855			TXD->lower_setup.ip_fields.ipcso =
2856			    ehdrlen + offsetof(struct ip, ip_sum);
2857			cmd |= E1000_TXD_CMD_IP;
2858			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2859		}
2860
2861		if (mp->m_len < ehdrlen + ip_hlen)
2862			return;	/* failure */
2863
2864		hdr_len = ehdrlen + ip_hlen;
2865		ipproto = ip->ip_p;
2866
2867		break;
2868	case ETHERTYPE_IPV6:
2869		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2870		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2871
2872		if (mp->m_len < ehdrlen + ip_hlen)
2873			return;	/* failure */
2874
2875		/* IPv6 doesn't have a header checksum. */
2876
2877		hdr_len = ehdrlen + ip_hlen;
2878		ipproto = ip6->ip6_nxt;
2879
2880		break;
2881	default:
2882		*txd_upper = 0;
2883		*txd_lower = 0;
2884		return;
2885	}
2886
2887	switch (ipproto) {
2888	case IPPROTO_TCP:
2889		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2890			/*
2891			 * Start offset for payload checksum calculation.
2892			 * End offset for payload checksum calculation.
2893			 * Offset of place to put the checksum.
2894			 */
2895			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2896			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2897			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2898			TXD->upper_setup.tcp_fields.tucso =
2899			    hdr_len + offsetof(struct tcphdr, th_sum);
2900			cmd |= E1000_TXD_CMD_TCP;
2901			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2902		}
2903		break;
2904	case IPPROTO_UDP:
2905		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2906			/*
2907			 * Start offset for header checksum calculation.
2908			 * End offset for header checksum calculation.
2909			 * Offset of place to put the checksum.
2910			 */
2911			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2912			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2913			TXD->upper_setup.tcp_fields.tucso =
2914			    hdr_len + offsetof(struct udphdr, uh_sum);
2915			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2916		}
2917		break;
2918	default:
2919		break;
2920	}
2921
2922	TXD->tcp_seg_setup.data = htole32(0);
2923	TXD->cmd_and_length =
2924	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2925	tx_buffer->m_head = NULL;
2926
2927	if (++curr_txd == adapter->num_tx_desc)
2928		curr_txd = 0;
2929
2930	adapter->num_tx_desc_avail--;
2931	adapter->next_avail_tx_desc = curr_txd;
2932}
2933
2934/**********************************************************************
2935 *
2936 *  Setup work for hardware segmentation offload (TSO)
2937 *
2938 **********************************************************************/
2939static boolean_t
2940em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2941   uint32_t *txd_lower)
2942{
2943	struct em_context_desc *TXD;
2944	struct em_buffer *tx_buffer;
2945	struct ether_vlan_header *eh;
2946	struct ip *ip;
2947	struct ip6_hdr *ip6;
2948	struct tcphdr *th;
2949	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2950	uint16_t etype;
2951
2952	/*
2953	 * XXX: This is not really correct as the stack would not have
2954	 * set up all checksums.
2955	 * XXX: Return FALSE is not sufficient as we may have to return
2956	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2957	 * and 1 (success).
2958	 */
2959	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2960		return FALSE;	/* 0 */
2961
2962	/*
2963	 * This function could/should be extended to support IP/IPv6
2964	 * fragmentation as well.  But as they say, one step at a time.
2965	 */
2966
2967	/*
2968	 * Determine where frame payload starts.
2969	 * Jump over vlan headers if already present,
2970	 * helpful for QinQ too.
2971	 */
2972	eh = mtod(mp, struct ether_vlan_header *);
2973	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2974		etype = ntohs(eh->evl_proto);
2975		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2976	} else {
2977		etype = ntohs(eh->evl_encap_proto);
2978		ehdrlen = ETHER_HDR_LEN;
2979	}
2980
2981	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2982	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2983		return FALSE;	/* -1 */
2984
2985	/*
2986	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
2987	 * TODO: Support SCTP too when it hits the tree.
2988	 */
2989	switch (etype) {
2990	case ETHERTYPE_IP:
2991		isip6 = 0;
2992		ip = (struct ip *)(mp->m_data + ehdrlen);
2993		if (ip->ip_p != IPPROTO_TCP)
2994			return FALSE;	/* 0 */
2995		ip->ip_len = 0;
2996		ip->ip_sum = 0;
2997		ip_hlen = ip->ip_hl << 2;
2998		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
2999			return FALSE;	/* -1 */
3000		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3001#if 1
3002		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3003		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3004#else
3005		th->th_sum = mp->m_pkthdr.csum_data;
3006#endif
3007		break;
3008	case ETHERTYPE_IPV6:
3009		isip6 = 1;
3010		return FALSE;			/* Not supported yet. */
3011		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3012		if (ip6->ip6_nxt != IPPROTO_TCP)
3013			return FALSE;	/* 0 */
3014		ip6->ip6_plen = 0;
3015		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3016		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3017			return FALSE;	/* -1 */
3018		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3019#if 0
3020		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3021		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3022#else
3023		th->th_sum = mp->m_pkthdr.csum_data;
3024#endif
3025		break;
3026	default:
3027		return FALSE;
3028	}
3029	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3030
3031	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3032		      E1000_TXD_DTYP_D |	/* Data descr type */
3033		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3034
3035	/* IP and/or TCP header checksum calculation and insertion. */
3036	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3037		      E1000_TXD_POPTS_TXSM) << 8;
3038
3039	curr_txd = adapter->next_avail_tx_desc;
3040	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3041	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3042
3043	/* IPv6 doesn't have a header checksum. */
3044	if (!isip6) {
3045		/*
3046		 * Start offset for header checksum calculation.
3047		 * End offset for header checksum calculation.
3048		 * Offset of place put the checksum.
3049		 */
3050		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3051		TXD->lower_setup.ip_fields.ipcse =
3052		    htole16(ehdrlen + ip_hlen - 1);
3053		TXD->lower_setup.ip_fields.ipcso =
3054		    ehdrlen + offsetof(struct ip, ip_sum);
3055	}
3056	/*
3057	 * Start offset for payload checksum calculation.
3058	 * End offset for payload checksum calculation.
3059	 * Offset of place to put the checksum.
3060	 */
3061	TXD->upper_setup.tcp_fields.tucss =
3062	    ehdrlen + ip_hlen;
3063	TXD->upper_setup.tcp_fields.tucse = 0;
3064	TXD->upper_setup.tcp_fields.tucso =
3065	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3066	/*
3067	 * Payload size per packet w/o any headers.
3068	 * Length of all headers up to payload.
3069	 */
3070	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3071	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3072
3073	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3074				E1000_TXD_CMD_DEXT |	/* Extended descr */
3075				E1000_TXD_CMD_TSE |	/* TSE context */
3076				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3077				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3078				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3079
3080	tx_buffer->m_head = NULL;
3081
3082	if (++curr_txd == adapter->num_tx_desc)
3083		curr_txd = 0;
3084
3085	adapter->num_tx_desc_avail--;
3086	adapter->next_avail_tx_desc = curr_txd;
3087	adapter->tx_tso = TRUE;
3088
3089	return TRUE;
3090}
3091
3092/**********************************************************************
3093 *
3094 *  Examine each tx_buffer in the used queue. If the hardware is done
3095 *  processing the packet then free associated resources. The
3096 *  tx_buffer is put back on the free queue.
3097 *
3098 **********************************************************************/
3099static void
3100em_txeof(struct adapter *adapter)
3101{
3102	int i, num_avail;
3103	struct em_buffer *tx_buffer;
3104	struct em_tx_desc   *tx_desc;
3105	struct ifnet   *ifp = adapter->ifp;
3106
3107	EM_LOCK_ASSERT(adapter);
3108
3109	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3110		return;
3111
3112	num_avail = adapter->num_tx_desc_avail;
3113	i = adapter->oldest_used_tx_desc;
3114
3115	tx_buffer = &adapter->tx_buffer_area[i];
3116	tx_desc = &adapter->tx_desc_base[i];
3117
3118	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3119	    BUS_DMASYNC_POSTREAD);
3120	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3121
3122		tx_desc->upper.data = 0;
3123		num_avail++;
3124
3125		if (tx_buffer->m_head) {
3126			ifp->if_opackets++;
3127			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3128			    BUS_DMASYNC_POSTWRITE);
3129			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
3130
3131			m_freem(tx_buffer->m_head);
3132			tx_buffer->m_head = NULL;
3133		}
3134
3135		if (++i == adapter->num_tx_desc)
3136			i = 0;
3137
3138		tx_buffer = &adapter->tx_buffer_area[i];
3139		tx_desc = &adapter->tx_desc_base[i];
3140	}
3141	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3142	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3143
3144	adapter->oldest_used_tx_desc = i;
3145
3146	/*
3147	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3148	 * that it is OK to send packets.
3149	 * If there are no pending descriptors, clear the timeout. Otherwise,
3150	 * if some descriptors have been freed, restart the timeout.
3151	 */
3152	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3153		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3154		if (num_avail == adapter->num_tx_desc)
3155			ifp->if_timer = 0;
3156		else if (num_avail != adapter->num_tx_desc_avail)
3157			ifp->if_timer = EM_TX_TIMEOUT;
3158	}
3159	adapter->num_tx_desc_avail = num_avail;
3160}
3161
3162/*********************************************************************
3163 *
3164 *  Get a buffer from system mbuf buffer pool.
3165 *
3166 **********************************************************************/
3167static int
3168em_get_buf(struct adapter *adapter, int i)
3169{
3170	struct mbuf		*m;
3171	bus_dma_segment_t	segs[1];
3172	bus_dmamap_t		map;
3173	struct em_buffer	*rx_buffer;
3174	int			error, nsegs;
3175
3176	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3177	if (m == NULL) {
3178		adapter->mbuf_cluster_failed++;
3179		return (ENOBUFS);
3180	}
3181	m->m_len = m->m_pkthdr.len = MCLBYTES;
3182	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3183		m_adj(m, ETHER_ALIGN);
3184
3185	/*
3186	 * Using memory from the mbuf cluster pool, invoke the
3187	 * bus_dma machinery to arrange the memory mapping.
3188	 */
3189	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3190	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3191	if (error != 0) {
3192		m_free(m);
3193		return (error);
3194	}
3195	/* If nsegs is wrong then the stack is corrupt. */
3196	KASSERT(nsegs == 1, ("Too many segments returned!"));
3197
3198	rx_buffer = &adapter->rx_buffer_area[i];
3199	if (rx_buffer->m_head != NULL)
3200		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3201
3202	map = rx_buffer->map;
3203	rx_buffer->map = adapter->rx_sparemap;
3204	adapter->rx_sparemap = map;
3205	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3206	rx_buffer->m_head = m;
3207
3208	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3209
3210	return (0);
3211}
3212
3213/*********************************************************************
3214 *
3215 *  Allocate memory for rx_buffer structures. Since we use one
3216 *  rx_buffer per received packet, the maximum number of rx_buffer's
3217 *  that we'll need is equal to the number of receive descriptors
3218 *  that we've allocated.
3219 *
3220 **********************************************************************/
3221static int
3222em_allocate_receive_structures(struct adapter *adapter)
3223{
3224	device_t dev = adapter->dev;
3225	struct em_buffer *rx_buffer;
3226	int i, error;
3227
3228	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3229	    M_DEVBUF, M_NOWAIT);
3230	if (adapter->rx_buffer_area == NULL) {
3231		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3232		return (ENOMEM);
3233	}
3234
3235	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3236
3237	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3238				1, 0,			/* alignment, bounds */
3239				BUS_SPACE_MAXADDR,	/* lowaddr */
3240				BUS_SPACE_MAXADDR,	/* highaddr */
3241				NULL, NULL,		/* filter, filterarg */
3242				MCLBYTES,		/* maxsize */
3243				1,			/* nsegments */
3244				MCLBYTES,		/* maxsegsize */
3245				0,			/* flags */
3246				NULL,			/* lockfunc */
3247				NULL,			/* lockarg */
3248				&adapter->rxtag);
3249	if (error) {
3250		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3251		    __func__, error);
3252		goto fail;
3253	}
3254
3255	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3256	    &adapter->rx_sparemap);
3257	if (error) {
3258		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3259		    __func__, error);
3260		goto fail;
3261	}
3262	rx_buffer = adapter->rx_buffer_area;
3263	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3264		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3265		    &rx_buffer->map);
3266		if (error) {
3267			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3268			    __func__, error);
3269			goto fail;
3270		}
3271	}
3272
3273	for (i = 0; i < adapter->num_rx_desc; i++) {
3274		error = em_get_buf(adapter, i);
3275		if (error)
3276			goto fail;
3277	}
3278	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3279	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3280
3281	return (0);
3282
3283fail:
3284	em_free_receive_structures(adapter);
3285	return (error);
3286}
3287
3288/*********************************************************************
3289 *
3290 *  Allocate and initialize receive structures.
3291 *
3292 **********************************************************************/
3293static int
3294em_setup_receive_structures(struct adapter *adapter)
3295{
3296	int error;
3297
3298	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3299
3300	if ((error = em_allocate_receive_structures(adapter)) != 0)
3301		return (error);
3302
3303	/* Setup our descriptor pointers */
3304	adapter->next_rx_desc_to_check = 0;
3305
3306	return (0);
3307}
3308
3309/*********************************************************************
3310 *
3311 *  Enable receive unit.
3312 *
3313 **********************************************************************/
3314static void
3315em_initialize_receive_unit(struct adapter *adapter)
3316{
3317	struct ifnet	*ifp = adapter->ifp;
3318	uint64_t	bus_addr;
3319	uint32_t	reg_rctl;
3320	uint32_t	reg_rxcsum;
3321
3322	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3323
3324	/*
3325	 * Make sure receives are disabled while setting
3326	 * up the descriptor ring
3327	 */
3328	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3329
3330	/* Set the Receive Delay Timer Register */
3331	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3332
3333	if(adapter->hw.mac_type >= em_82540) {
3334		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3335
3336		/*
3337		 * Set the interrupt throttling rate. Value is calculated
3338		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3339		 */
3340#define MAX_INTS_PER_SEC	8000
3341#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3342		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3343	}
3344
3345	/* Setup the Base and Length of the Rx Descriptor Ring */
3346	bus_addr = adapter->rxdma.dma_paddr;
3347	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3348			sizeof(struct em_rx_desc));
3349	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3350	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3351
3352	/* Setup the Receive Control Register */
3353	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3354		   E1000_RCTL_RDMTS_HALF |
3355		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3356
3357	if (adapter->hw.tbi_compatibility_on == TRUE)
3358		reg_rctl |= E1000_RCTL_SBP;
3359
3360
3361	switch (adapter->rx_buffer_len) {
3362	default:
3363	case EM_RXBUFFER_2048:
3364		reg_rctl |= E1000_RCTL_SZ_2048;
3365		break;
3366	case EM_RXBUFFER_4096:
3367		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3368		break;
3369	case EM_RXBUFFER_8192:
3370		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3371		break;
3372	case EM_RXBUFFER_16384:
3373		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3374		break;
3375	}
3376
3377	if (ifp->if_mtu > ETHERMTU)
3378		reg_rctl |= E1000_RCTL_LPE;
3379
3380	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3381	if ((adapter->hw.mac_type >= em_82543) &&
3382	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3383		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3384		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3385		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3386	}
3387
3388	/* Enable Receives */
3389	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3390
3391	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3392	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3393	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3394}
3395
3396/*********************************************************************
3397 *
3398 *  Free receive related data structures.
3399 *
3400 **********************************************************************/
3401static void
3402em_free_receive_structures(struct adapter *adapter)
3403{
3404	struct em_buffer *rx_buffer;
3405	int i;
3406
3407	INIT_DEBUGOUT("free_receive_structures: begin");
3408
3409	if (adapter->rx_sparemap) {
3410		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3411		adapter->rx_sparemap = NULL;
3412	}
3413	if (adapter->rx_buffer_area != NULL) {
3414		rx_buffer = adapter->rx_buffer_area;
3415		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3416			if (rx_buffer->m_head != NULL) {
3417				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3418				    BUS_DMASYNC_POSTREAD);
3419				bus_dmamap_unload(adapter->rxtag,
3420				    rx_buffer->map);
3421				m_freem(rx_buffer->m_head);
3422				rx_buffer->m_head = NULL;
3423			} else if (rx_buffer->map != NULL)
3424				bus_dmamap_unload(adapter->rxtag,
3425				    rx_buffer->map);
3426			if (rx_buffer->map != NULL) {
3427				bus_dmamap_destroy(adapter->rxtag,
3428				    rx_buffer->map);
3429				rx_buffer->map = NULL;
3430			}
3431		}
3432	}
3433	if (adapter->rx_buffer_area != NULL) {
3434		free(adapter->rx_buffer_area, M_DEVBUF);
3435		adapter->rx_buffer_area = NULL;
3436	}
3437	if (adapter->rxtag != NULL) {
3438		bus_dma_tag_destroy(adapter->rxtag);
3439		adapter->rxtag = NULL;
3440	}
3441}
3442
3443/*********************************************************************
3444 *
3445 *  This routine executes in interrupt context. It replenishes
3446 *  the mbufs in the descriptor and sends data which has been
3447 *  dma'ed into host memory to upper layer.
3448 *
3449 *  We loop at most count times if count is > 0, or until done if
3450 *  count < 0.
3451 *
3452 *********************************************************************/
3453static int
3454em_rxeof(struct adapter *adapter, int count)
3455{
3456	struct ifnet	*ifp;
3457	struct mbuf	*mp;
3458	uint8_t		accept_frame = 0;
3459	uint8_t		eop = 0;
3460	uint16_t 	len, desc_len, prev_len_adj;
3461	int		i;
3462
3463	/* Pointer to the receive descriptor being examined. */
3464	struct em_rx_desc   *current_desc;
3465	uint8_t		status;
3466
3467	ifp = adapter->ifp;
3468	i = adapter->next_rx_desc_to_check;
3469	current_desc = &adapter->rx_desc_base[i];
3470	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3471	    BUS_DMASYNC_POSTREAD);
3472
3473	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3474		return (0);
3475
3476	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3477	    (count != 0) &&
3478	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3479		struct mbuf *m = NULL;
3480
3481		mp = adapter->rx_buffer_area[i].m_head;
3482		/*
3483		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3484		 * needs to access the last received byte in the mbuf.
3485		 */
3486		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3487		    BUS_DMASYNC_POSTREAD);
3488
3489		accept_frame = 1;
3490		prev_len_adj = 0;
3491		desc_len = le16toh(current_desc->length);
3492		status = current_desc->status;
3493		if (status & E1000_RXD_STAT_EOP) {
3494			count--;
3495			eop = 1;
3496			if (desc_len < ETHER_CRC_LEN) {
3497				len = 0;
3498				prev_len_adj = ETHER_CRC_LEN - desc_len;
3499			} else
3500				len = desc_len - ETHER_CRC_LEN;
3501		} else {
3502			eop = 0;
3503			len = desc_len;
3504		}
3505
3506		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3507			uint8_t		last_byte;
3508			uint32_t	pkt_len = desc_len;
3509
3510			if (adapter->fmp != NULL)
3511				pkt_len += adapter->fmp->m_pkthdr.len;
3512
3513			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3514			if (TBI_ACCEPT(&adapter->hw, status,
3515			    current_desc->errors, pkt_len, last_byte)) {
3516				em_tbi_adjust_stats(&adapter->hw,
3517				    &adapter->stats, pkt_len,
3518				    adapter->hw.mac_addr);
3519				if (len > 0)
3520					len--;
3521			} else
3522				accept_frame = 0;
3523		}
3524
3525		if (accept_frame) {
3526			if (em_get_buf(adapter, i) != 0) {
3527				ifp->if_iqdrops++;
3528				goto discard;
3529			}
3530
3531			/* Assign correct length to the current fragment */
3532			mp->m_len = len;
3533
3534			if (adapter->fmp == NULL) {
3535				mp->m_pkthdr.len = len;
3536				adapter->fmp = mp; /* Store the first mbuf */
3537				adapter->lmp = mp;
3538			} else {
3539				/* Chain mbuf's together */
3540				mp->m_flags &= ~M_PKTHDR;
3541				/*
3542				 * Adjust length of previous mbuf in chain if
3543				 * we received less than 4 bytes in the last
3544				 * descriptor.
3545				 */
3546				if (prev_len_adj > 0) {
3547					adapter->lmp->m_len -= prev_len_adj;
3548					adapter->fmp->m_pkthdr.len -=
3549					    prev_len_adj;
3550				}
3551				adapter->lmp->m_next = mp;
3552				adapter->lmp = adapter->lmp->m_next;
3553				adapter->fmp->m_pkthdr.len += len;
3554			}
3555
3556			if (eop) {
3557				adapter->fmp->m_pkthdr.rcvif = ifp;
3558				ifp->if_ipackets++;
3559				em_receive_checksum(adapter, current_desc,
3560				    adapter->fmp);
3561#ifndef __NO_STRICT_ALIGNMENT
3562				if (adapter->hw.max_frame_size >
3563				    (MCLBYTES - ETHER_ALIGN) &&
3564				    em_fixup_rx(adapter) != 0)
3565					goto skip;
3566#endif
3567				if (status & E1000_RXD_STAT_VP) {
3568					adapter->fmp->m_pkthdr.ether_vtag =
3569					    (le16toh(current_desc->special) &
3570					    E1000_RXD_SPC_VLAN_MASK);
3571					adapter->fmp->m_flags |= M_VLANTAG;
3572				}
3573#ifndef __NO_STRICT_ALIGNMENT
3574skip:
3575#endif
3576				m = adapter->fmp;
3577				adapter->fmp = NULL;
3578				adapter->lmp = NULL;
3579			}
3580		} else {
3581			ifp->if_ierrors++;
3582discard:
3583			/* Reuse loaded DMA map and just update mbuf chain */
3584			mp = adapter->rx_buffer_area[i].m_head;
3585			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3586			mp->m_data = mp->m_ext.ext_buf;
3587			mp->m_next = NULL;
3588			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3589				m_adj(mp, ETHER_ALIGN);
3590			if (adapter->fmp != NULL) {
3591				m_freem(adapter->fmp);
3592				adapter->fmp = NULL;
3593				adapter->lmp = NULL;
3594			}
3595			m = NULL;
3596		}
3597
3598		/* Zero out the receive descriptors status. */
3599		current_desc->status = 0;
3600		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3601		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3602
3603		/* Advance our pointers to the next descriptor. */
3604		if (++i == adapter->num_rx_desc)
3605			i = 0;
3606		if (m != NULL) {
3607			adapter->next_rx_desc_to_check = i;
3608#ifdef DEVICE_POLLING
3609			EM_UNLOCK(adapter);
3610			(*ifp->if_input)(ifp, m);
3611			EM_LOCK(adapter);
3612#else
3613			(*ifp->if_input)(ifp, m);
3614#endif
3615			i = adapter->next_rx_desc_to_check;
3616		}
3617		current_desc = &adapter->rx_desc_base[i];
3618	}
3619	adapter->next_rx_desc_to_check = i;
3620
3621	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3622	if (--i < 0)
3623		i = adapter->num_rx_desc - 1;
3624	E1000_WRITE_REG(&adapter->hw, RDT, i);
3625	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3626		return (0);
3627
3628	return (1);
3629}
3630
3631#ifndef __NO_STRICT_ALIGNMENT
3632/*
3633 * When jumbo frames are enabled we should realign entire payload on
3634 * architecures with strict alignment. This is serious design mistake of 8254x
3635 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3636 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3637 * payload. On architecures without strict alignment restrictions 8254x still
3638 * performs unaligned memory access which would reduce the performance too.
3639 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3640 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3641 * existing mbuf chain.
3642 *
3643 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3644 * not used at all on architectures with strict alignment.
3645 */
3646static int
3647em_fixup_rx(struct adapter *adapter)
3648{
3649	struct mbuf *m, *n;
3650	int error;
3651
3652	error = 0;
3653	m = adapter->fmp;
3654	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3655		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3656		m->m_data += ETHER_HDR_LEN;
3657	} else {
3658		MGETHDR(n, M_DONTWAIT, MT_DATA);
3659		if (n != NULL) {
3660			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3661			m->m_data += ETHER_HDR_LEN;
3662			m->m_len -= ETHER_HDR_LEN;
3663			n->m_len = ETHER_HDR_LEN;
3664			M_MOVE_PKTHDR(n, m);
3665			n->m_next = m;
3666			adapter->fmp = n;
3667		} else {
3668			adapter->ifp->if_iqdrops++;
3669			adapter->mbuf_alloc_failed++;
3670			m_freem(adapter->fmp);
3671			adapter->fmp = NULL;
3672			adapter->lmp = NULL;
3673			error = ENOBUFS;
3674		}
3675	}
3676
3677	return (error);
3678}
3679#endif
3680
3681/*********************************************************************
3682 *
3683 *  Verify that the hardware indicated that the checksum is valid.
3684 *  Inform the stack about the status of checksum so that stack
3685 *  doesn't spend time verifying the checksum.
3686 *
3687 *********************************************************************/
3688static void
3689em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3690		    struct mbuf *mp)
3691{
3692	/* 82543 or newer only */
3693	if ((adapter->hw.mac_type < em_82543) ||
3694	    /* Ignore Checksum bit is set */
3695	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3696		mp->m_pkthdr.csum_flags = 0;
3697		return;
3698	}
3699
3700	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3701		/* Did it pass? */
3702		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3703			/* IP Checksum Good */
3704			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3705			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3706
3707		} else {
3708			mp->m_pkthdr.csum_flags = 0;
3709		}
3710	}
3711
3712	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3713		/* Did it pass? */
3714		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3715			mp->m_pkthdr.csum_flags |=
3716			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3717			mp->m_pkthdr.csum_data = htons(0xffff);
3718		}
3719	}
3720}
3721
3722
3723static void
3724em_enable_vlans(struct adapter *adapter)
3725{
3726	uint32_t ctrl;
3727
3728	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3729
3730	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3731	ctrl |= E1000_CTRL_VME;
3732	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3733}
3734
3735static void
3736em_disable_vlans(struct adapter *adapter)
3737{
3738	uint32_t ctrl;
3739
3740	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3741	ctrl &= ~E1000_CTRL_VME;
3742	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3743}
3744
3745static void
3746em_enable_intr(struct adapter *adapter)
3747{
3748	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3749}
3750
3751static void
3752em_disable_intr(struct adapter *adapter)
3753{
3754	/*
3755	 * The first version of 82542 had an errata where when link was forced
3756	 * it would stay up even up even if the cable was disconnected.
3757	 * Sequence errors were used to detect the disconnect and then the
3758	 * driver would unforce the link. This code in the in the ISR. For this
3759	 * to work correctly the Sequence error interrupt had to be enabled
3760	 * all the time.
3761	 */
3762
3763	if (adapter->hw.mac_type == em_82542_rev2_0)
3764	    E1000_WRITE_REG(&adapter->hw, IMC,
3765		(0xffffffff & ~E1000_IMC_RXSEQ));
3766	else
3767	    E1000_WRITE_REG(&adapter->hw, IMC,
3768		0xffffffff);
3769}
3770
3771static int
3772em_is_valid_ether_addr(uint8_t *addr)
3773{
3774	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3775
3776	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3777		return (FALSE);
3778	}
3779
3780	return (TRUE);
3781}
3782
3783void
3784em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3785{
3786	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3787}
3788
3789void
3790em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3791{
3792	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3793}
3794
3795void
3796em_pci_set_mwi(struct em_hw *hw)
3797{
3798	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3799	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3800}
3801
3802void
3803em_pci_clear_mwi(struct em_hw *hw)
3804{
3805	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3806	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3807}
3808
3809/*
3810 * We may eventually really do this, but its unnecessary
3811 * for now so we just return unsupported.
3812 */
3813int32_t
3814em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3815{
3816	return (0);
3817}
3818
3819/*********************************************************************
3820* 82544 Coexistence issue workaround.
3821*    There are 2 issues.
3822*       1. Transmit Hang issue.
3823*    To detect this issue, following equation can be used...
3824*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3825*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3826*
3827*       2. DAC issue.
3828*    To detect this issue, following equation can be used...
3829*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3830*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3831*
3832*
3833*    WORKAROUND:
3834*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3835*
3836*** *********************************************************************/
3837static uint32_t
3838em_fill_descriptors (bus_addr_t address, uint32_t length,
3839		PDESC_ARRAY desc_array)
3840{
3841	/* Since issue is sensitive to length and address.*/
3842	/* Let us first check the address...*/
3843	uint32_t safe_terminator;
3844	if (length <= 4) {
3845		desc_array->descriptor[0].address = address;
3846		desc_array->descriptor[0].length = length;
3847		desc_array->elements = 1;
3848		return (desc_array->elements);
3849	}
3850	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3851	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3852	if (safe_terminator == 0   ||
3853	(safe_terminator > 4   &&
3854	safe_terminator < 9)   ||
3855	(safe_terminator > 0xC &&
3856	safe_terminator <= 0xF)) {
3857		desc_array->descriptor[0].address = address;
3858		desc_array->descriptor[0].length = length;
3859		desc_array->elements = 1;
3860		return (desc_array->elements);
3861	}
3862
3863	desc_array->descriptor[0].address = address;
3864	desc_array->descriptor[0].length = length - 4;
3865	desc_array->descriptor[1].address = address + (length - 4);
3866	desc_array->descriptor[1].length = 4;
3867	desc_array->elements = 2;
3868	return (desc_array->elements);
3869}
3870
3871/**********************************************************************
3872 *
3873 *  Update the board statistics counters.
3874 *
3875 **********************************************************************/
3876static void
3877em_update_stats_counters(struct adapter *adapter)
3878{
3879	struct ifnet   *ifp;
3880
3881	if(adapter->hw.media_type == em_media_type_copper ||
3882	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3883		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3884		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3885	}
3886	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3887	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3888	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3889	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3890
3891	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3892	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3893	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3894	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3895	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3896	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3897	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3898	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3899	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3900	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3901	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3902	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3903	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3904	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3905	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3906	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3907	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3908	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3909	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3910	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3911
3912	/* For the 64-bit byte counters the low dword must be read first. */
3913	/* Both registers clear on the read of the high dword */
3914
3915	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3916	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3917	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3918	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3919
3920	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3921	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3922	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3923	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3924	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3925
3926	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3927	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3928	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3929	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3930
3931	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3932	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3933	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3934	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3935	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3936	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3937	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3938	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3939	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3940	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3941
3942	if (adapter->hw.mac_type >= em_82543) {
3943		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3944		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3945		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3946		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3947		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3948		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3949	}
3950	ifp = adapter->ifp;
3951
3952	ifp->if_collisions = adapter->stats.colc;
3953
3954	/* Rx Errors */
3955	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3956	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3957	    adapter->stats.mpc + adapter->stats.cexterr;
3958
3959	/* Tx Errors */
3960	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3961	    adapter->watchdog_events;
3962}
3963
3964
3965/**********************************************************************
3966 *
3967 *  This routine is called only when em_display_debug_stats is enabled.
3968 *  This routine provides a way to take a look at important statistics
3969 *  maintained by the driver and hardware.
3970 *
3971 **********************************************************************/
3972static void
3973em_print_debug_info(struct adapter *adapter)
3974{
3975	device_t dev = adapter->dev;
3976	uint8_t *hw_addr = adapter->hw.hw_addr;
3977
3978	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3979	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3980	    E1000_READ_REG(&adapter->hw, CTRL),
3981	    E1000_READ_REG(&adapter->hw, RCTL));
3982	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3983	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3984	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3985	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3986	    adapter->hw.fc_high_water,
3987	    adapter->hw.fc_low_water);
3988	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3989	    E1000_READ_REG(&adapter->hw, TIDV),
3990	    E1000_READ_REG(&adapter->hw, TADV));
3991	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3992	    E1000_READ_REG(&adapter->hw, RDTR),
3993	    E1000_READ_REG(&adapter->hw, RADV));
3994	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3995	    (long long)adapter->tx_fifo_wrk_cnt,
3996	    (long long)adapter->tx_fifo_reset_cnt);
3997	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3998	    E1000_READ_REG(&adapter->hw, TDH),
3999	    E1000_READ_REG(&adapter->hw, TDT));
4000	device_printf(dev, "Num Tx descriptors avail = %d\n",
4001	    adapter->num_tx_desc_avail);
4002	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4003	    adapter->no_tx_desc_avail1);
4004	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4005	    adapter->no_tx_desc_avail2);
4006	device_printf(dev, "Std mbuf failed = %ld\n",
4007	    adapter->mbuf_alloc_failed);
4008	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4009	    adapter->mbuf_cluster_failed);
4010}
4011
4012static void
4013em_print_hw_stats(struct adapter *adapter)
4014{
4015	device_t dev = adapter->dev;
4016
4017	device_printf(dev, "Excessive collisions = %lld\n",
4018	    (long long)adapter->stats.ecol);
4019	device_printf(dev, "Symbol errors = %lld\n",
4020	    (long long)adapter->stats.symerrs);
4021	device_printf(dev, "Sequence errors = %lld\n",
4022	    (long long)adapter->stats.sec);
4023	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4024
4025	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4026	device_printf(dev, "Receive No Buffers = %lld\n",
4027	    (long long)adapter->stats.rnbc);
4028	/* RLEC is inaccurate on some hardware, calculate our own. */
4029	device_printf(dev, "Receive Length Errors = %lld\n",
4030	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4031	device_printf(dev, "Receive errors = %lld\n",
4032	    (long long)adapter->stats.rxerrc);
4033	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4034	device_printf(dev, "Alignment errors = %lld\n",
4035	    (long long)adapter->stats.algnerrc);
4036	device_printf(dev, "Carrier extension errors = %lld\n",
4037	    (long long)adapter->stats.cexterr);
4038	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4039	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4040
4041	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4042	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4043	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4044	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4045
4046	device_printf(dev, "Good Packets Rcvd = %lld\n",
4047	    (long long)adapter->stats.gprc);
4048	device_printf(dev, "Good Packets Xmtd = %lld\n",
4049	    (long long)adapter->stats.gptc);
4050	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4051	    (long long)adapter->stats.tsctc);
4052	device_printf(dev, "TSO Contexts Failed = %lld\n",
4053	    (long long)adapter->stats.tsctfc);
4054}
4055
4056static int
4057em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4058{
4059	struct adapter *adapter;
4060	int error;
4061	int result;
4062
4063	result = -1;
4064	error = sysctl_handle_int(oidp, &result, 0, req);
4065
4066	if (error || !req->newptr)
4067		return (error);
4068
4069	if (result == 1) {
4070		adapter = (struct adapter *)arg1;
4071		em_print_debug_info(adapter);
4072	}
4073
4074	return (error);
4075}
4076
4077
4078static int
4079em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4080{
4081	struct adapter *adapter;
4082	int error;
4083	int result;
4084
4085	result = -1;
4086	error = sysctl_handle_int(oidp, &result, 0, req);
4087
4088	if (error || !req->newptr)
4089		return (error);
4090
4091	if (result == 1) {
4092		adapter = (struct adapter *)arg1;
4093		em_print_hw_stats(adapter);
4094	}
4095
4096	return (error);
4097}
4098
4099static int
4100em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4101{
4102	struct em_int_delay_info *info;
4103	struct adapter *adapter;
4104	uint32_t regval;
4105	int error;
4106	int usecs;
4107	int ticks;
4108
4109	info = (struct em_int_delay_info *)arg1;
4110	usecs = info->value;
4111	error = sysctl_handle_int(oidp, &usecs, 0, req);
4112	if (error != 0 || req->newptr == NULL)
4113		return (error);
4114	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4115		return (EINVAL);
4116	info->value = usecs;
4117	ticks = E1000_USECS_TO_TICKS(usecs);
4118
4119	adapter = info->adapter;
4120
4121	EM_LOCK(adapter);
4122	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4123	regval = (regval & ~0xffff) | (ticks & 0xffff);
4124	/* Handle a few special cases. */
4125	switch (info->offset) {
4126	case E1000_RDTR:
4127	case E1000_82542_RDTR:
4128		regval |= E1000_RDT_FPDB;
4129		break;
4130	case E1000_TIDV:
4131	case E1000_82542_TIDV:
4132		if (ticks == 0) {
4133			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4134			/* Don't write 0 into the TIDV register. */
4135			regval++;
4136		} else
4137			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4138		break;
4139	}
4140	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4141	EM_UNLOCK(adapter);
4142	return (0);
4143}
4144
4145static void
4146em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4147	const char *description, struct em_int_delay_info *info,
4148	int offset, int value)
4149{
4150	info->adapter = adapter;
4151	info->offset = offset;
4152	info->value = value;
4153	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4154	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4155	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4156	    info, 0, em_sysctl_int_delay, "I", description);
4157}
4158
4159#ifndef DEVICE_POLLING
4160static void
4161em_add_int_process_limit(struct adapter *adapter, const char *name,
4162	const char *description, int *limit, int value)
4163{
4164	*limit = value;
4165	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4166	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4167	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4168}
4169#endif
4170