if_em.c revision 163827
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 163827 2006-10-31 17:05:02Z jhb $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.2.9";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159
160	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
164
165	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
183
184	/* required last entry */
185	{ 0, 0, 0, 0, 0}
186};
187
188/*********************************************************************
189 *  Table of branding strings for all supported NICs.
190 *********************************************************************/
191
192static char *em_strings[] = {
193	"Intel(R) PRO/1000 Network Connection"
194};
195
196/*********************************************************************
197 *  Function prototypes
198 *********************************************************************/
199static int	em_probe(device_t);
200static int	em_attach(device_t);
201static int	em_detach(device_t);
202static int	em_shutdown(device_t);
203static int	em_suspend(device_t);
204static int	em_resume(device_t);
205static void	em_start(struct ifnet *);
206static void	em_start_locked(struct ifnet *ifp);
207static int	em_ioctl(struct ifnet *, u_long, caddr_t);
208static void	em_watchdog(struct ifnet *);
209static void	em_init(void *);
210static void	em_init_locked(struct adapter *);
211static void	em_stop(void *);
212static void	em_media_status(struct ifnet *, struct ifmediareq *);
213static int	em_media_change(struct ifnet *);
214static void	em_identify_hardware(struct adapter *);
215static int	em_allocate_pci_resources(struct adapter *);
216static int	em_allocate_intr(struct adapter *);
217static void	em_free_intr(struct adapter *);
218static void	em_free_pci_resources(struct adapter *);
219static void	em_local_timer(void *);
220static int	em_hardware_init(struct adapter *);
221static void	em_setup_interface(device_t, struct adapter *);
222static int	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_setup_receive_structures(struct adapter *);
225static void	em_initialize_receive_unit(struct adapter *);
226static void	em_enable_intr(struct adapter *);
227static void	em_disable_intr(struct adapter *);
228static void	em_free_transmit_structures(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_update_stats_counters(struct adapter *);
231static void	em_txeof(struct adapter *);
232static int	em_allocate_receive_structures(struct adapter *);
233static int	em_allocate_transmit_structures(struct adapter *);
234static int	em_rxeof(struct adapter *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct adapter *);
237#endif
238static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
239		    struct mbuf *);
240static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
241		    uint32_t *, uint32_t *);
242static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
243		    uint32_t *, uint32_t *);
244static void	em_set_promisc(struct adapter *);
245static void	em_disable_promisc(struct adapter *);
246static void	em_set_multi(struct adapter *);
247static void	em_print_hw_stats(struct adapter *);
248static void	em_update_link_status(struct adapter *);
249static int	em_get_buf(struct adapter *, int);
250static void	em_enable_vlans(struct adapter *);
251static void	em_disable_vlans(struct adapter *);
252static int	em_encap(struct adapter *, struct mbuf **);
253static void	em_smartspeed(struct adapter *);
254static int	em_82547_fifo_workaround(struct adapter *, int);
255static void	em_82547_update_fifo_head(struct adapter *, int);
256static int	em_82547_tx_fifo_reset(struct adapter *);
257static void	em_82547_move_tail(void *arg);
258static int	em_dma_malloc(struct adapter *, bus_size_t,
259		struct em_dma_alloc *, int);
260static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(uint8_t *);
263static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
264static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
265static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
266		    PDESC_ARRAY desc_array);
267static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
268static void	em_add_int_delay_sysctl(struct adapter *, const char *,
269		const char *, struct em_int_delay_info *, int, int);
270
271/*
272 * Fast interrupt handler and legacy ithread/polling modes are
273 * mutually exclusive.
274 */
275#ifdef DEVICE_POLLING
276static poll_handler_t em_poll;
277static void	em_intr(void *);
278#else
279static void	em_intr_fast(void *);
280static void	em_add_int_process_limit(struct adapter *, const char *,
281		const char *, int *, int);
282static void	em_handle_rxtx(void *context, int pending);
283static void	em_handle_link(void *context, int pending);
284#endif
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t em_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, em_probe),
293	DEVMETHOD(device_attach, em_attach),
294	DEVMETHOD(device_detach, em_detach),
295	DEVMETHOD(device_shutdown, em_shutdown),
296	DEVMETHOD(device_suspend, em_suspend),
297	DEVMETHOD(device_resume, em_resume),
298	{0, 0}
299};
300
301static driver_t em_driver = {
302	"em", em_methods, sizeof(struct adapter),
303};
304
305static devclass_t em_devclass;
306DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
307MODULE_DEPEND(em, pci, 1, 1, 1);
308MODULE_DEPEND(em, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
315#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
316#define M_TSO_LEN			66
317
318static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
319static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
320static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
321static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
322static int em_rxd = EM_DEFAULT_RXD;
323static int em_txd = EM_DEFAULT_TXD;
324static int em_smart_pwr_down = FALSE;
325
326TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
327TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
328TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
329TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
330TUNABLE_INT("hw.em.rxd", &em_rxd);
331TUNABLE_INT("hw.em.txd", &em_txd);
332TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
333#ifndef DEVICE_POLLING
334static int em_rx_process_limit = 100;
335TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
336#endif
337
338/*********************************************************************
339 *  Device identification routine
340 *
341 *  em_probe determines if the driver should be loaded on
342 *  adapter based on PCI vendor/device id of the adapter.
343 *
344 *  return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
346
347static int
348em_probe(device_t dev)
349{
350	char		adapter_name[60];
351	uint16_t	pci_vendor_id = 0;
352	uint16_t	pci_device_id = 0;
353	uint16_t	pci_subvendor_id = 0;
354	uint16_t	pci_subdevice_id = 0;
355	em_vendor_info_t *ent;
356
357	INIT_DEBUGOUT("em_probe: begin");
358
359	pci_vendor_id = pci_get_vendor(dev);
360	if (pci_vendor_id != EM_VENDOR_ID)
361		return (ENXIO);
362
363	pci_device_id = pci_get_device(dev);
364	pci_subvendor_id = pci_get_subvendor(dev);
365	pci_subdevice_id = pci_get_subdevice(dev);
366
367	ent = em_vendor_info_array;
368	while (ent->vendor_id != 0) {
369		if ((pci_vendor_id == ent->vendor_id) &&
370		    (pci_device_id == ent->device_id) &&
371
372		    ((pci_subvendor_id == ent->subvendor_id) ||
373		    (ent->subvendor_id == PCI_ANY_ID)) &&
374
375		    ((pci_subdevice_id == ent->subdevice_id) ||
376		    (ent->subdevice_id == PCI_ANY_ID))) {
377			sprintf(adapter_name, "%s %s",
378				em_strings[ent->index],
379				em_driver_version);
380			device_set_desc_copy(dev, adapter_name);
381			return (BUS_PROBE_DEFAULT);
382		}
383		ent++;
384	}
385
386	return (ENXIO);
387}
388
389/*********************************************************************
390 *  Device initialization routine
391 *
392 *  The attach entry point is called when the driver is being loaded.
393 *  This routine identifies the type of hardware, allocates all resources
394 *  and initializes the hardware.
395 *
396 *  return 0 on success, positive on failure
397 *********************************************************************/
398
399static int
400em_attach(device_t dev)
401{
402	struct adapter	*adapter;
403	int		tsize, rsize;
404	int		error = 0;
405
406	INIT_DEBUGOUT("em_attach: begin");
407
408	adapter = device_get_softc(dev);
409	adapter->dev = adapter->osdep.dev = dev;
410	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412	/* SYSCTL stuff */
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    em_sysctl_debug_info, "I", "Debug Information");
417
418	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421	    em_sysctl_stats, "I", "Statistics");
422
423	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
424	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
425
426	/* Determine hardware revision */
427	em_identify_hardware(adapter);
428
429	/* Set up some sysctls for the tunable interrupt delays */
430	em_add_int_delay_sysctl(adapter, "rx_int_delay",
431	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
432	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
433	em_add_int_delay_sysctl(adapter, "tx_int_delay",
434	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
435	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
436	if (adapter->hw.mac_type >= em_82540) {
437		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
438		    "receive interrupt delay limit in usecs",
439		    &adapter->rx_abs_int_delay,
440		    E1000_REG_OFFSET(&adapter->hw, RADV),
441		    em_rx_abs_int_delay_dflt);
442		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
443		    "transmit interrupt delay limit in usecs",
444		    &adapter->tx_abs_int_delay,
445		    E1000_REG_OFFSET(&adapter->hw, TADV),
446		    em_tx_abs_int_delay_dflt);
447	}
448
449#ifndef DEVICE_POLLING
450	/* Sysctls for limiting the amount of work done in the taskqueue */
451	em_add_int_process_limit(adapter, "rx_processing_limit",
452	    "max number of rx packets to process", &adapter->rx_process_limit,
453	    em_rx_process_limit);
454#endif
455
456	/*
457	 * Validate number of transmit and receive descriptors. It
458	 * must not exceed hardware maximum, and must be multiple
459	 * of EM_DBA_ALIGN.
460	 */
461	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
462	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
463	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
464	    (em_txd < EM_MIN_TXD)) {
465		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
466		    EM_DEFAULT_TXD, em_txd);
467		adapter->num_tx_desc = EM_DEFAULT_TXD;
468	} else
469		adapter->num_tx_desc = em_txd;
470	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
471	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
472	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
473	    (em_rxd < EM_MIN_RXD)) {
474		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
475		    EM_DEFAULT_RXD, em_rxd);
476		adapter->num_rx_desc = EM_DEFAULT_RXD;
477	} else
478		adapter->num_rx_desc = em_rxd;
479
480	adapter->hw.autoneg = DO_AUTO_NEG;
481	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
482	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
483	adapter->hw.tbi_compatibility_en = TRUE;
484	adapter->rx_buffer_len = EM_RXBUFFER_2048;
485
486	adapter->hw.phy_init_script = 1;
487	adapter->hw.phy_reset_disable = FALSE;
488
489#ifndef EM_MASTER_SLAVE
490	adapter->hw.master_slave = em_ms_hw_default;
491#else
492	adapter->hw.master_slave = EM_MASTER_SLAVE;
493#endif
494	/*
495	 * Set the max frame size assuming standard ethernet
496	 * sized frames.
497	 */
498	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
499
500	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
501
502	/*
503	 * This controls when hardware reports transmit completion
504	 * status.
505	 */
506	adapter->hw.report_tx_early = 1;
507	if (em_allocate_pci_resources(adapter)) {
508		device_printf(dev, "Allocation of PCI resources failed\n");
509		error = ENXIO;
510		goto err_pci;
511	}
512
513	/* Initialize eeprom parameters */
514	em_init_eeprom_params(&adapter->hw);
515
516	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
517	    EM_DBA_ALIGN);
518
519	/* Allocate Transmit Descriptor ring */
520	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
521		device_printf(dev, "Unable to allocate tx_desc memory\n");
522		error = ENOMEM;
523		goto err_tx_desc;
524	}
525	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
526
527	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
528	    EM_DBA_ALIGN);
529
530	/* Allocate Receive Descriptor ring */
531	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
532		device_printf(dev, "Unable to allocate rx_desc memory\n");
533		error = ENOMEM;
534		goto err_rx_desc;
535	}
536	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
537
538	/* Initialize the hardware */
539	if (em_hardware_init(adapter)) {
540		device_printf(dev, "Unable to initialize the hardware\n");
541		error = EIO;
542		goto err_hw_init;
543	}
544
545	/* Copy the permanent MAC address out of the EEPROM */
546	if (em_read_mac_addr(&adapter->hw) < 0) {
547		device_printf(dev, "EEPROM read error while reading MAC"
548		    " address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
554		device_printf(dev, "Invalid MAC address\n");
555		error = EIO;
556		goto err_hw_init;
557	}
558
559	/* Setup OS specific network interface */
560	em_setup_interface(dev, adapter);
561
562	em_allocate_intr(adapter);
563
564	/* Initialize statistics */
565	em_clear_hw_cntrs(&adapter->hw);
566	em_update_stats_counters(adapter);
567	adapter->hw.get_link_status = 1;
568	em_update_link_status(adapter);
569
570	/* Indicate SOL/IDER usage */
571	if (em_check_phy_reset_block(&adapter->hw))
572		device_printf(dev,
573		    "PHY reset is blocked due to SOL/IDER session.\n");
574
575	/* Identify 82544 on PCIX */
576	em_get_bus_info(&adapter->hw);
577	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
578		adapter->pcix_82544 = TRUE;
579	else
580		adapter->pcix_82544 = FALSE;
581
582	INIT_DEBUGOUT("em_attach: end");
583
584	return (0);
585
586err_hw_init:
587	em_dma_free(adapter, &adapter->rxdma);
588err_rx_desc:
589	em_dma_free(adapter, &adapter->txdma);
590err_tx_desc:
591err_pci:
592	em_free_intr(adapter);
593	em_free_pci_resources(adapter);
594	EM_LOCK_DESTROY(adapter);
595
596	return (error);
597}
598
599/*********************************************************************
600 *  Device removal routine
601 *
602 *  The detach entry point is called when the driver is being removed.
603 *  This routine stops the adapter and deallocates all the resources
604 *  that were allocated for driver operation.
605 *
606 *  return 0 on success, positive on failure
607 *********************************************************************/
608
609static int
610em_detach(device_t dev)
611{
612	struct adapter	*adapter = device_get_softc(dev);
613	struct ifnet	*ifp = adapter->ifp;
614
615	INIT_DEBUGOUT("em_detach: begin");
616
617#ifdef DEVICE_POLLING
618	if (ifp->if_capenable & IFCAP_POLLING)
619		ether_poll_deregister(ifp);
620#endif
621
622	em_free_intr(adapter);
623	EM_LOCK(adapter);
624	adapter->in_detach = 1;
625	em_stop(adapter);
626	em_phy_hw_reset(&adapter->hw);
627	EM_UNLOCK(adapter);
628	ether_ifdetach(adapter->ifp);
629
630	callout_drain(&adapter->timer);
631	callout_drain(&adapter->tx_fifo_timer);
632
633	em_free_pci_resources(adapter);
634	bus_generic_detach(dev);
635	if_free(ifp);
636
637	/* Free Transmit Descriptor ring */
638	if (adapter->tx_desc_base) {
639		em_dma_free(adapter, &adapter->txdma);
640		adapter->tx_desc_base = NULL;
641	}
642
643	/* Free Receive Descriptor ring */
644	if (adapter->rx_desc_base) {
645		em_dma_free(adapter, &adapter->rxdma);
646		adapter->rx_desc_base = NULL;
647	}
648
649	EM_LOCK_DESTROY(adapter);
650
651	return (0);
652}
653
654/*********************************************************************
655 *
656 *  Shutdown entry point
657 *
658 **********************************************************************/
659
660static int
661em_shutdown(device_t dev)
662{
663	struct adapter *adapter = device_get_softc(dev);
664	EM_LOCK(adapter);
665	em_stop(adapter);
666	EM_UNLOCK(adapter);
667	return (0);
668}
669
670/*
671 * Suspend/resume device methods.
672 */
673static int
674em_suspend(device_t dev)
675{
676	struct adapter *adapter = device_get_softc(dev);
677
678	EM_LOCK(adapter);
679	em_stop(adapter);
680	EM_UNLOCK(adapter);
681
682	return bus_generic_suspend(dev);
683}
684
685static int
686em_resume(device_t dev)
687{
688	struct adapter *adapter = device_get_softc(dev);
689	struct ifnet *ifp = adapter->ifp;
690
691	EM_LOCK(adapter);
692	em_init_locked(adapter);
693	if ((ifp->if_flags & IFF_UP) &&
694	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
695		em_start_locked(ifp);
696	EM_UNLOCK(adapter);
697
698	return bus_generic_resume(dev);
699}
700
701
702/*********************************************************************
703 *  Transmit entry point
704 *
705 *  em_start is called by the stack to initiate a transmit.
706 *  The driver will remain in this routine as long as there are
707 *  packets to transmit and transmit resources are available.
708 *  In case resources are not available stack is notified and
709 *  the packet is requeued.
710 **********************************************************************/
711
712static void
713em_start_locked(struct ifnet *ifp)
714{
715	struct adapter	*adapter = ifp->if_softc;
716	struct mbuf	*m_head;
717
718	EM_LOCK_ASSERT(adapter);
719
720	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
721	    IFF_DRV_RUNNING)
722		return;
723	if (!adapter->link_active)
724		return;
725
726	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
727
728		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
729		if (m_head == NULL)
730			break;
731		/*
732		 * em_encap() can modify our pointer, and or make it NULL on
733		 * failure.  In that event, we can't requeue.
734		 */
735		if (em_encap(adapter, &m_head)) {
736			if (m_head == NULL)
737				break;
738			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
739			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
740			break;
741		}
742
743		/* Send a copy of the frame to the BPF listener */
744		BPF_MTAP(ifp, m_head);
745
746		/* Set timeout in case hardware has problems transmitting. */
747		ifp->if_timer = EM_TX_TIMEOUT;
748	}
749}
750
751static void
752em_start(struct ifnet *ifp)
753{
754	struct adapter *adapter = ifp->if_softc;
755
756	EM_LOCK(adapter);
757	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
758		em_start_locked(ifp);
759	EM_UNLOCK(adapter);
760}
761
762/*********************************************************************
763 *  Ioctl entry point
764 *
765 *  em_ioctl is called when the user wants to configure the
766 *  interface.
767 *
768 *  return 0 on success, positive on failure
769 **********************************************************************/
770
771static int
772em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
773{
774	struct adapter	*adapter = ifp->if_softc;
775	struct ifreq *ifr = (struct ifreq *)data;
776	struct ifaddr *ifa = (struct ifaddr *)data;
777	int error = 0;
778
779	if (adapter->in_detach)
780		return (error);
781
782	switch (command) {
783	case SIOCSIFADDR:
784	case SIOCGIFADDR:
785		if (ifa->ifa_addr->sa_family == AF_INET) {
786			/*
787			 * XXX
788			 * Since resetting hardware takes a very long time
789			 * and results in link renegotiation we only
790			 * initialize the hardware only when it is absolutely
791			 * required.
792			 */
793			ifp->if_flags |= IFF_UP;
794			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
795				EM_LOCK(adapter);
796				em_init_locked(adapter);
797				EM_UNLOCK(adapter);
798			}
799			arp_ifinit(ifp, ifa);
800		} else
801			error = ether_ioctl(ifp, command, data);
802		break;
803	case SIOCSIFMTU:
804	    {
805		int max_frame_size;
806		uint16_t eeprom_data = 0;
807
808		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
809
810		EM_LOCK(adapter);
811		switch (adapter->hw.mac_type) {
812		case em_82573:
813			/*
814			 * 82573 only supports jumbo frames
815			 * if ASPM is disabled.
816			 */
817			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
818			    &eeprom_data);
819			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
820				max_frame_size = ETHER_MAX_LEN;
821				break;
822			}
823			/* Allow Jumbo frames - fall thru */
824		case em_82571:
825		case em_82572:
826		case em_80003es2lan:	/* Limit Jumbo Frame size */
827			max_frame_size = 9234;
828			break;
829		case em_ich8lan:
830			/* ICH8 does not support jumbo frames */
831			max_frame_size = ETHER_MAX_LEN;
832			break;
833		default:
834			max_frame_size = MAX_JUMBO_FRAME_SIZE;
835		}
836		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
837		    ETHER_CRC_LEN) {
838			EM_UNLOCK(adapter);
839			error = EINVAL;
840			break;
841		}
842
843		ifp->if_mtu = ifr->ifr_mtu;
844		adapter->hw.max_frame_size =
845		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
846		em_init_locked(adapter);
847		EM_UNLOCK(adapter);
848		break;
849	    }
850	case SIOCSIFFLAGS:
851		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
852		EM_LOCK(adapter);
853		if (ifp->if_flags & IFF_UP) {
854			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
855				if ((ifp->if_flags ^ adapter->if_flags) &
856				    IFF_PROMISC) {
857					em_disable_promisc(adapter);
858					em_set_promisc(adapter);
859				}
860			} else
861				em_init_locked(adapter);
862		} else {
863			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
864				em_stop(adapter);
865			}
866		}
867		adapter->if_flags = ifp->if_flags;
868		EM_UNLOCK(adapter);
869		break;
870	case SIOCADDMULTI:
871	case SIOCDELMULTI:
872		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
873		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
874			EM_LOCK(adapter);
875			em_disable_intr(adapter);
876			em_set_multi(adapter);
877			if (adapter->hw.mac_type == em_82542_rev2_0) {
878				em_initialize_receive_unit(adapter);
879			}
880#ifdef DEVICE_POLLING
881			if (!(ifp->if_capenable & IFCAP_POLLING))
882#endif
883				em_enable_intr(adapter);
884			EM_UNLOCK(adapter);
885		}
886		break;
887	case SIOCSIFMEDIA:
888		/* Check SOL/IDER usage */
889		EM_LOCK(adapter);
890		if (em_check_phy_reset_block(&adapter->hw)) {
891			EM_UNLOCK(adapter);
892			device_printf(adapter->dev, "Media change is"
893			    "blocked due to SOL/IDER session.\n");
894			break;
895		}
896		EM_UNLOCK(adapter);
897	case SIOCGIFMEDIA:
898		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
899		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
900		break;
901	case SIOCSIFCAP:
902	    {
903		int mask, reinit;
904
905		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
906		reinit = 0;
907		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
908#ifdef DEVICE_POLLING
909		if (mask & IFCAP_POLLING) {
910			if (ifr->ifr_reqcap & IFCAP_POLLING) {
911				error = ether_poll_register(em_poll, ifp);
912				if (error)
913					return (error);
914				EM_LOCK(adapter);
915				em_disable_intr(adapter);
916				ifp->if_capenable |= IFCAP_POLLING;
917				EM_UNLOCK(adapter);
918			} else {
919				error = ether_poll_deregister(ifp);
920				/* Enable interrupt even in error case */
921				EM_LOCK(adapter);
922				em_enable_intr(adapter);
923				ifp->if_capenable &= ~IFCAP_POLLING;
924				EM_UNLOCK(adapter);
925			}
926		}
927#endif
928		if (mask & IFCAP_HWCSUM) {
929			ifp->if_capenable ^= IFCAP_HWCSUM;
930			reinit = 1;
931		}
932		if (mask & IFCAP_TSO4) {
933			ifp->if_capenable ^= IFCAP_TSO4;
934			reinit = 1;
935		}
936		if (mask & IFCAP_VLAN_HWTAGGING) {
937			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
938			reinit = 1;
939		}
940		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
941			em_init(adapter);
942		VLAN_CAPABILITIES(ifp);
943		break;
944	    }
945	default:
946		error = ether_ioctl(ifp, command, data);
947		break;
948	}
949
950	return (error);
951}
952
953/*********************************************************************
954 *  Watchdog entry point
955 *
956 *  This routine is called whenever hardware quits transmitting.
957 *
958 **********************************************************************/
959
960static void
961em_watchdog(struct ifnet *ifp)
962{
963	struct adapter *adapter = ifp->if_softc;
964
965	EM_LOCK(adapter);
966	/* If we are in this routine because of pause frames, then
967	 * don't reset the hardware.
968	 */
969	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
970		ifp->if_timer = EM_TX_TIMEOUT;
971		EM_UNLOCK(adapter);
972		return;
973	}
974
975	/*
976	 * Reclaim first as there is a possibility of losing Tx completion
977	 * interrupts. Possible cause of missing Tx completion interrupts
978	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
979	 * or chipset bug.
980	 */
981	em_txeof(adapter);
982	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
983		EM_UNLOCK(adapter);
984		return;
985	}
986
987	if (em_check_for_link(&adapter->hw) == 0)
988		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
989
990	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
991	adapter->watchdog_events++;
992
993	em_init_locked(adapter);
994	EM_UNLOCK(adapter);
995}
996
997/*********************************************************************
998 *  Init entry point
999 *
1000 *  This routine is used in two ways. It is used by the stack as
1001 *  init entry point in network interface structure. It is also used
1002 *  by the driver as a hw/sw initialization routine to get to a
1003 *  consistent state.
1004 *
1005 *  return 0 on success, positive on failure
1006 **********************************************************************/
1007
1008static void
1009em_init_locked(struct adapter *adapter)
1010{
1011	struct ifnet	*ifp = adapter->ifp;
1012	device_t	dev = adapter->dev;
1013	uint32_t	pba;
1014
1015	INIT_DEBUGOUT("em_init: begin");
1016
1017	EM_LOCK_ASSERT(adapter);
1018
1019	em_stop(adapter);
1020
1021	/*
1022	 * Packet Buffer Allocation (PBA)
1023	 * Writing PBA sets the receive portion of the buffer
1024	 * the remainder is used for the transmit buffer.
1025	 *
1026	 * Devices before the 82547 had a Packet Buffer of 64K.
1027	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1028	 * After the 82547 the buffer was reduced to 40K.
1029	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1030	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1031	 */
1032	switch (adapter->hw.mac_type) {
1033	case em_82547:
1034	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1035		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1036			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1037		else
1038			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1039		adapter->tx_fifo_head = 0;
1040		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1041		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1042		break;
1043	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1044	case em_82571: /* 82571: Total Packet Buffer is 48K */
1045	case em_82572: /* 82572: Total Packet Buffer is 48K */
1046			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1047		break;
1048	case em_82573: /* 82573: Total Packet Buffer is 32K */
1049		/* Jumbo frames not supported */
1050			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1051		break;
1052	case em_ich8lan:
1053		pba = E1000_PBA_8K;
1054		break;
1055	default:
1056		/* Devices before 82547 had a Packet Buffer of 64K.   */
1057		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1058			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1059		else
1060			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1061	}
1062
1063	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1064	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1065
1066	/* Get the latest mac address, User can use a LAA */
1067	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1068
1069	/* Initialize the hardware */
1070	if (em_hardware_init(adapter)) {
1071		device_printf(dev, "Unable to initialize the hardware\n");
1072		return;
1073	}
1074	em_update_link_status(adapter);
1075
1076	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1077		em_enable_vlans(adapter);
1078
1079	ifp->if_hwassist = 0;
1080	if (adapter->hw.mac_type >= em_82543) {
1081		if (ifp->if_capenable & IFCAP_TXCSUM)
1082			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1083		/*
1084		 * em_setup_transmit_structures() will behave differently
1085		 * based on the state of TSO.
1086		 */
1087		if (ifp->if_capenable & IFCAP_TSO)
1088			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1089	}
1090
1091	/* Prepare transmit descriptors and buffers */
1092	if (em_setup_transmit_structures(adapter)) {
1093		device_printf(dev, "Could not setup transmit structures\n");
1094		em_stop(adapter);
1095		return;
1096	}
1097	em_initialize_transmit_unit(adapter);
1098
1099	/* Setup Multicast table */
1100	em_set_multi(adapter);
1101
1102	/* Prepare receive descriptors and buffers */
1103	if (em_setup_receive_structures(adapter)) {
1104		device_printf(dev, "Could not setup receive structures\n");
1105		em_stop(adapter);
1106		return;
1107	}
1108	em_initialize_receive_unit(adapter);
1109
1110	/* Don't lose promiscuous settings */
1111	em_set_promisc(adapter);
1112
1113	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1114	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1115
1116	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1117	em_clear_hw_cntrs(&adapter->hw);
1118#ifdef DEVICE_POLLING
1119	/*
1120	 * Only enable interrupts if we are not polling, make sure
1121	 * they are off otherwise.
1122	 */
1123	if (ifp->if_capenable & IFCAP_POLLING)
1124		em_disable_intr(adapter);
1125	else
1126#endif /* DEVICE_POLLING */
1127		em_enable_intr(adapter);
1128
1129	/* Don't reset the phy next time init gets called */
1130	adapter->hw.phy_reset_disable = TRUE;
1131}
1132
1133static void
1134em_init(void *arg)
1135{
1136	struct adapter *adapter = arg;
1137
1138	EM_LOCK(adapter);
1139	em_init_locked(adapter);
1140	EM_UNLOCK(adapter);
1141}
1142
1143
1144#ifdef DEVICE_POLLING
1145/*********************************************************************
1146 *
1147 *  Legacy polling routine
1148 *
1149 *********************************************************************/
1150static void
1151em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1152{
1153	struct adapter *adapter = ifp->if_softc;
1154	uint32_t reg_icr;
1155
1156	EM_LOCK(adapter);
1157	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1158		EM_UNLOCK(adapter);
1159		return;
1160	}
1161
1162	if (cmd == POLL_AND_CHECK_STATUS) {
1163		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1164		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1165			callout_stop(&adapter->timer);
1166			adapter->hw.get_link_status = 1;
1167			em_check_for_link(&adapter->hw);
1168			em_update_link_status(adapter);
1169			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1170		}
1171	}
1172	em_rxeof(adapter, count);
1173	em_txeof(adapter);
1174
1175	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1176		em_start_locked(ifp);
1177	EM_UNLOCK(adapter);
1178}
1179
1180/*********************************************************************
1181 *
1182 *  Legacy Interrupt Service routine
1183 *
1184 *********************************************************************/
1185static void
1186em_intr(void *arg)
1187{
1188	struct adapter	*adapter = arg;
1189	struct ifnet	*ifp;
1190	uint32_t	reg_icr;
1191
1192	EM_LOCK(adapter);
1193
1194	ifp = adapter->ifp;
1195
1196	if (ifp->if_capenable & IFCAP_POLLING) {
1197		EM_UNLOCK(adapter);
1198		return;
1199	}
1200
1201	for (;;) {
1202		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1203		if (adapter->hw.mac_type >= em_82571 &&
1204		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1205			break;
1206		else if (reg_icr == 0)
1207			break;
1208
1209		/*
1210		 * XXX: some laptops trigger several spurious interrupts
1211		 * on em(4) when in the resume cycle. The ICR register
1212		 * reports all-ones value in this case. Processing such
1213		 * interrupts would lead to a freeze. I don't know why.
1214		 */
1215		if (reg_icr == 0xffffffff)
1216			break;
1217
1218		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1219			em_rxeof(adapter, -1);
1220			em_txeof(adapter);
1221		}
1222
1223		/* Link status change */
1224		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1225			callout_stop(&adapter->timer);
1226			adapter->hw.get_link_status = 1;
1227			em_check_for_link(&adapter->hw);
1228			em_update_link_status(adapter);
1229			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1230		}
1231
1232		if (reg_icr & E1000_ICR_RXO)
1233			adapter->rx_overruns++;
1234	}
1235
1236	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1237	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1238		em_start_locked(ifp);
1239
1240	EM_UNLOCK(adapter);
1241}
1242
1243#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1244
1245static void
1246em_handle_link(void *context, int pending)
1247{
1248	struct adapter	*adapter = context;
1249	struct ifnet *ifp;
1250
1251	ifp = adapter->ifp;
1252
1253	EM_LOCK(adapter);
1254	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1255		EM_UNLOCK(adapter);
1256		return;
1257	}
1258
1259	callout_stop(&adapter->timer);
1260	adapter->hw.get_link_status = 1;
1261	em_check_for_link(&adapter->hw);
1262	em_update_link_status(adapter);
1263	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1264	EM_UNLOCK(adapter);
1265}
1266
1267static void
1268em_handle_rxtx(void *context, int pending)
1269{
1270	struct adapter	*adapter = context;
1271	struct ifnet	*ifp;
1272
1273	NET_LOCK_GIANT();
1274	ifp = adapter->ifp;
1275
1276	/*
1277	 * TODO:
1278	 * It should be possible to run the tx clean loop without the lock.
1279	 */
1280	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1281		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1282			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1283		EM_LOCK(adapter);
1284		em_txeof(adapter);
1285
1286		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1287			em_start_locked(ifp);
1288		EM_UNLOCK(adapter);
1289	}
1290
1291	em_enable_intr(adapter);
1292	NET_UNLOCK_GIANT();
1293}
1294
1295/*********************************************************************
1296 *
1297 *  Fast Interrupt Service routine
1298 *
1299 *********************************************************************/
1300static void
1301em_intr_fast(void *arg)
1302{
1303	struct adapter	*adapter = arg;
1304	struct ifnet	*ifp;
1305	uint32_t	reg_icr;
1306
1307	ifp = adapter->ifp;
1308
1309	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1310
1311	/* Hot eject?  */
1312	if (reg_icr == 0xffffffff)
1313		return;
1314
1315	/* Definitely not our interrupt.  */
1316	if (reg_icr == 0x0)
1317		return;
1318
1319	/*
1320	 * Starting with the 82571 chip, bit 31 should be used to
1321	 * determine whether the interrupt belongs to us.
1322	 */
1323	if (adapter->hw.mac_type >= em_82571 &&
1324	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1325		return;
1326
1327	/*
1328	 * Mask interrupts until the taskqueue is finished running.  This is
1329	 * cheap, just assume that it is needed.  This also works around the
1330	 * MSI message reordering errata on certain systems.
1331	 */
1332	em_disable_intr(adapter);
1333	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1334
1335	/* Link status change */
1336	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1337		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1338
1339	if (reg_icr & E1000_ICR_RXO)
1340		adapter->rx_overruns++;
1341}
1342#endif /* ! DEVICE_POLLING */
1343
1344/*********************************************************************
1345 *
1346 *  Media Ioctl callback
1347 *
1348 *  This routine is called whenever the user queries the status of
1349 *  the interface using ifconfig.
1350 *
1351 **********************************************************************/
1352static void
1353em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1354{
1355	struct adapter *adapter = ifp->if_softc;
1356
1357	INIT_DEBUGOUT("em_media_status: begin");
1358
1359	EM_LOCK(adapter);
1360	em_check_for_link(&adapter->hw);
1361	em_update_link_status(adapter);
1362
1363	ifmr->ifm_status = IFM_AVALID;
1364	ifmr->ifm_active = IFM_ETHER;
1365
1366	if (!adapter->link_active) {
1367		EM_UNLOCK(adapter);
1368		return;
1369	}
1370
1371	ifmr->ifm_status |= IFM_ACTIVE;
1372
1373	if ((adapter->hw.media_type == em_media_type_fiber) ||
1374	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1375		if (adapter->hw.mac_type == em_82545)
1376			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1377		else
1378			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1379	} else {
1380		switch (adapter->link_speed) {
1381		case 10:
1382			ifmr->ifm_active |= IFM_10_T;
1383			break;
1384		case 100:
1385			ifmr->ifm_active |= IFM_100_TX;
1386			break;
1387		case 1000:
1388			ifmr->ifm_active |= IFM_1000_T;
1389			break;
1390		}
1391		if (adapter->link_duplex == FULL_DUPLEX)
1392			ifmr->ifm_active |= IFM_FDX;
1393		else
1394			ifmr->ifm_active |= IFM_HDX;
1395	}
1396	EM_UNLOCK(adapter);
1397}
1398
1399/*********************************************************************
1400 *
1401 *  Media Ioctl callback
1402 *
1403 *  This routine is called when the user changes speed/duplex using
1404 *  media/mediopt option with ifconfig.
1405 *
1406 **********************************************************************/
1407static int
1408em_media_change(struct ifnet *ifp)
1409{
1410	struct adapter *adapter = ifp->if_softc;
1411	struct ifmedia  *ifm = &adapter->media;
1412
1413	INIT_DEBUGOUT("em_media_change: begin");
1414
1415	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1416		return (EINVAL);
1417
1418	EM_LOCK(adapter);
1419	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1420	case IFM_AUTO:
1421		adapter->hw.autoneg = DO_AUTO_NEG;
1422		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1423		break;
1424	case IFM_1000_LX:
1425	case IFM_1000_SX:
1426	case IFM_1000_T:
1427		adapter->hw.autoneg = DO_AUTO_NEG;
1428		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1429		break;
1430	case IFM_100_TX:
1431		adapter->hw.autoneg = FALSE;
1432		adapter->hw.autoneg_advertised = 0;
1433		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1434			adapter->hw.forced_speed_duplex = em_100_full;
1435		else
1436			adapter->hw.forced_speed_duplex = em_100_half;
1437		break;
1438	case IFM_10_T:
1439		adapter->hw.autoneg = FALSE;
1440		adapter->hw.autoneg_advertised = 0;
1441		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1442			adapter->hw.forced_speed_duplex = em_10_full;
1443		else
1444			adapter->hw.forced_speed_duplex = em_10_half;
1445		break;
1446	default:
1447		device_printf(adapter->dev, "Unsupported media type\n");
1448	}
1449
1450	/* As the speed/duplex settings my have changed we need to
1451	 * reset the PHY.
1452	 */
1453	adapter->hw.phy_reset_disable = FALSE;
1454
1455	em_init_locked(adapter);
1456	EM_UNLOCK(adapter);
1457
1458	return (0);
1459}
1460
1461/*********************************************************************
1462 *
1463 *  This routine maps the mbufs to tx descriptors.
1464 *
1465 *  return 0 on success, positive on failure
1466 **********************************************************************/
1467static int
1468em_encap(struct adapter *adapter, struct mbuf **m_headp)
1469{
1470	struct ifnet		*ifp = adapter->ifp;
1471	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1472	bus_dmamap_t		map;
1473	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1474	struct em_tx_desc	*current_tx_desc;
1475	struct mbuf		*m_head;
1476	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1477	int			nsegs, i, j, first, last = 0;
1478	int			error, do_tso, tso_desc = 0;
1479
1480	m_head = *m_headp;
1481	current_tx_desc = NULL;
1482	txd_upper = txd_lower = txd_used = txd_saved = 0;
1483
1484	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1485
1486	/*
1487	 * Force a cleanup if number of TX descriptors
1488	 * available hits the threshold.
1489	 */
1490	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1491		em_txeof(adapter);
1492		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1493			adapter->no_tx_desc_avail1++;
1494			return (ENOBUFS);
1495		}
1496	}
1497
1498	/*
1499	 * When operating in promiscuous mode, hardware stripping of the
1500	 * VLAN tag on receive is disabled.  This should not prevent us
1501	 * from doing hardware insertion of the VLAN tag here as that
1502	 * is controlled by the dma descriptor flags and not the receive
1503	 * tag strip setting.  Unfortunatly this hardware switches the
1504	 * VLAN encapsulation type from 802.1q to ISL when stripping om
1505	 * receive is disabled.  This means we have to add the vlan
1506	 * encapsulation here in the driver, since it will have come down
1507	 * from the VLAN layer with a tag instead of a VLAN header.
1508	 */
1509	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1510		struct ether_vlan_header *evl;
1511		struct ether_header eh;
1512
1513		m_head = m_pullup(m_head, sizeof(eh));
1514		if (m_head == NULL) {
1515			*m_headp = NULL;
1516			return (ENOBUFS);
1517		}
1518		eh = *mtod(m_head, struct ether_header *);
1519		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1520		if (m_head == NULL) {
1521			*m_headp = NULL;
1522			return (ENOBUFS);
1523		}
1524		m_head = m_pullup(m_head, sizeof(*evl));
1525		if (m_head == NULL) {
1526			*m_headp = NULL;
1527			return (ENOBUFS);
1528		}
1529		evl = mtod(m_head, struct ether_vlan_header *);
1530		bcopy(&eh, evl, sizeof(*evl));
1531		evl->evl_proto = evl->evl_encap_proto;
1532		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1533		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1534		*m_headp = m_head;
1535	}
1536
1537	/*
1538	 * TSO workaround:
1539	 *  If an mbuf contains only the IP and TCP header we have
1540	 *  to pull 4 bytes of data into it.
1541	 */
1542	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1543		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1544		*m_headp = m_head;
1545		if (m_head == NULL) {
1546			return (ENOBUFS);
1547		}
1548	}
1549
1550	/*
1551	 * Map the packet for DMA.
1552	 *
1553	 * Capture the first descriptor index,
1554	 * this descriptor will have the index
1555	 * of the EOP which is the only one that
1556	 * now gets a DONE bit writeback.
1557	 */
1558	first = adapter->next_avail_tx_desc;
1559	tx_buffer = &adapter->tx_buffer_area[first];
1560	tx_buffer_mapped = tx_buffer;
1561	map = tx_buffer->map;
1562
1563	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1564	    &nsegs, BUS_DMA_NOWAIT);
1565
1566	/*
1567	 * There are two types of errors we can (try) to handle:
1568	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1569	 *   out of segments.  Defragment the mbuf chain and try again.
1570	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1571	 *   at this point in time.  Defer sending and try again later.
1572	 * All other errors, in particular EINVAL, are fatal and prevent the
1573	 * mbuf chain from ever going through.  Drop it and report error.
1574	 */
1575	if (error == EFBIG) {
1576		struct mbuf *m;
1577
1578		m = m_defrag(*m_headp, M_DONTWAIT);
1579		if (m == NULL) {
1580			/* Assume m_defrag(9) used only m_get(9). */
1581			adapter->mbuf_alloc_failed++;
1582			m_freem(*m_headp);
1583			*m_headp = NULL;
1584			return (ENOBUFS);
1585		}
1586		*m_headp = m;
1587
1588		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1589		    segs, &nsegs, BUS_DMA_NOWAIT);
1590
1591		if (error == ENOMEM) {
1592			adapter->no_tx_dma_setup++;
1593			return (error);
1594		} else if (error != 0) {
1595			adapter->no_tx_dma_setup++;
1596			m_freem(*m_headp);
1597			*m_headp = NULL;
1598			return (error);
1599		}
1600	} else if (error == ENOMEM) {
1601		adapter->no_tx_dma_setup++;
1602		return (error);
1603	} else if (error != 0) {
1604		adapter->no_tx_dma_setup++;
1605		m_freem(*m_headp);
1606		*m_headp = NULL;
1607		return (error);
1608	}
1609
1610	/*
1611	 * TSO Hardware workaround, if this packet is not
1612	 * TSO, and is only a single descriptor long, and
1613	 * it follows a TSO burst, then we need to add a
1614	 * sentinel descriptor to prevent premature writeback.
1615	 */
1616	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1617		if (nsegs == 1)
1618			tso_desc = TRUE;
1619		adapter->tx_tso = FALSE;
1620	}
1621
1622	if (nsegs > adapter->num_tx_desc_avail - 2) {
1623		adapter->no_tx_desc_avail2++;
1624		bus_dmamap_unload(adapter->txtag, map);
1625		return (ENOBUFS);
1626	}
1627	m_head = *m_headp;
1628
1629	/* Do hardware assists */
1630	if (ifp->if_hwassist) {
1631		if (do_tso &&
1632		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1633			/* we need to make a final sentinel transmit desc */
1634			tso_desc = TRUE;
1635		} else
1636			em_transmit_checksum_setup(adapter,  m_head,
1637			    &txd_upper, &txd_lower);
1638	}
1639
1640	i = adapter->next_avail_tx_desc;
1641	if (adapter->pcix_82544)
1642		txd_saved = i;
1643
1644	for (j = 0; j < nsegs; j++) {
1645		bus_size_t seg_len;
1646		bus_addr_t seg_addr;
1647		/* If adapter is 82544 and on PCIX bus. */
1648		if(adapter->pcix_82544) {
1649			DESC_ARRAY	desc_array;
1650			uint32_t	array_elements, counter;
1651
1652			/*
1653			 * Check the Address and Length combination and
1654			 * split the data accordingly
1655			 */
1656			array_elements = em_fill_descriptors(segs[j].ds_addr,
1657			    segs[j].ds_len, &desc_array);
1658			for (counter = 0; counter < array_elements; counter++) {
1659				if (txd_used == adapter->num_tx_desc_avail) {
1660					adapter->next_avail_tx_desc = txd_saved;
1661					adapter->no_tx_desc_avail2++;
1662					bus_dmamap_unload(adapter->txtag, map);
1663					return (ENOBUFS);
1664				}
1665				tx_buffer = &adapter->tx_buffer_area[i];
1666				current_tx_desc = &adapter->tx_desc_base[i];
1667				current_tx_desc->buffer_addr = htole64(
1668					desc_array.descriptor[counter].address);
1669				current_tx_desc->lower.data = htole32(
1670					(adapter->txd_cmd | txd_lower |
1671					(uint16_t)desc_array.descriptor[counter].length));
1672				current_tx_desc->upper.data = htole32((txd_upper));
1673				last = i;
1674				if (++i == adapter->num_tx_desc)
1675					i = 0;
1676
1677				tx_buffer->m_head = NULL;
1678				tx_buffer->next_eop = -1;
1679				txd_used++;
1680			}
1681		} else {
1682			tx_buffer = &adapter->tx_buffer_area[i];
1683			current_tx_desc = &adapter->tx_desc_base[i];
1684			seg_addr = htole64(segs[j].ds_addr);
1685			seg_len  = segs[j].ds_len;
1686			/*
1687			** TSO Workaround:
1688			** If this is the last descriptor, we want to
1689			** split it so we have a small final sentinel
1690			*/
1691			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1692				seg_len -= 4;
1693				current_tx_desc->buffer_addr = seg_addr;
1694				current_tx_desc->lower.data = htole32(
1695				adapter->txd_cmd | txd_lower | seg_len);
1696				current_tx_desc->upper.data =
1697				    htole32(txd_upper);
1698				if (++i == adapter->num_tx_desc)
1699					i = 0;
1700				/* Now make the sentinel */
1701				++txd_used; /* using an extra txd */
1702				current_tx_desc = &adapter->tx_desc_base[i];
1703				tx_buffer = &adapter->tx_buffer_area[i];
1704				current_tx_desc->buffer_addr =
1705				    seg_addr + seg_len;
1706				current_tx_desc->lower.data = htole32(
1707				adapter->txd_cmd | txd_lower | 4);
1708				current_tx_desc->upper.data =
1709				    htole32(txd_upper);
1710				last = i;
1711				if (++i == adapter->num_tx_desc)
1712					i = 0;
1713			} else {
1714				current_tx_desc->buffer_addr = seg_addr;
1715				current_tx_desc->lower.data = htole32(
1716				adapter->txd_cmd | txd_lower | seg_len);
1717				current_tx_desc->upper.data =
1718				    htole32(txd_upper);
1719				last = i;
1720				if (++i == adapter->num_tx_desc)
1721					i = 0;
1722			}
1723			tx_buffer->m_head = NULL;
1724			tx_buffer->next_eop = -1;
1725		}
1726	}
1727
1728	adapter->next_avail_tx_desc = i;
1729	if (adapter->pcix_82544)
1730		adapter->num_tx_desc_avail -= txd_used;
1731	else {
1732		adapter->num_tx_desc_avail -= nsegs;
1733		if (tso_desc) /* TSO used an extra for sentinel */
1734			adapter->num_tx_desc_avail -= txd_used;
1735	}
1736
1737	if (m_head->m_flags & M_VLANTAG) {
1738		/* Set the vlan id. */
1739		current_tx_desc->upper.fields.special =
1740		    htole16(m_head->m_pkthdr.ether_vtag);
1741
1742		/* Tell hardware to add tag. */
1743		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1744	}
1745
1746	tx_buffer->m_head = m_head;
1747	tx_buffer_mapped->map = tx_buffer->map;
1748	tx_buffer->map = map;
1749	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1750
1751	/*
1752	 * Last Descriptor of Packet
1753	 * needs End Of Packet (EOP)
1754	 * and Report Status (RS)
1755	 */
1756	current_tx_desc->lower.data |=
1757	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1758	/*
1759	 * Keep track in the first buffer which
1760	 * descriptor will be written back
1761	 */
1762	tx_buffer = &adapter->tx_buffer_area[first];
1763	tx_buffer->next_eop = last;
1764
1765	/*
1766	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1767	 * that this frame is available to transmit.
1768	 */
1769	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1770	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1771
1772	if (adapter->hw.mac_type == em_82547 &&
1773	    adapter->link_duplex == HALF_DUPLEX)
1774		em_82547_move_tail(adapter);
1775	else {
1776		E1000_WRITE_REG(&adapter->hw, TDT, i);
1777		if (adapter->hw.mac_type == em_82547)
1778			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1779	}
1780
1781	return (0);
1782}
1783
1784/*********************************************************************
1785 *
1786 * 82547 workaround to avoid controller hang in half-duplex environment.
1787 * The workaround is to avoid queuing a large packet that would span
1788 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1789 * in this case. We do that only when FIFO is quiescent.
1790 *
1791 **********************************************************************/
1792static void
1793em_82547_move_tail(void *arg)
1794{
1795	struct adapter *adapter = arg;
1796	uint16_t hw_tdt;
1797	uint16_t sw_tdt;
1798	struct em_tx_desc *tx_desc;
1799	uint16_t length = 0;
1800	boolean_t eop = 0;
1801
1802	EM_LOCK_ASSERT(adapter);
1803
1804	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1805	sw_tdt = adapter->next_avail_tx_desc;
1806
1807	while (hw_tdt != sw_tdt) {
1808		tx_desc = &adapter->tx_desc_base[hw_tdt];
1809		length += tx_desc->lower.flags.length;
1810		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1811		if(++hw_tdt == adapter->num_tx_desc)
1812			hw_tdt = 0;
1813
1814		if (eop) {
1815			if (em_82547_fifo_workaround(adapter, length)) {
1816				adapter->tx_fifo_wrk_cnt++;
1817				callout_reset(&adapter->tx_fifo_timer, 1,
1818					em_82547_move_tail, adapter);
1819				break;
1820			}
1821			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1822			em_82547_update_fifo_head(adapter, length);
1823			length = 0;
1824		}
1825	}
1826}
1827
1828static int
1829em_82547_fifo_workaround(struct adapter *adapter, int len)
1830{
1831	int fifo_space, fifo_pkt_len;
1832
1833	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1834
1835	if (adapter->link_duplex == HALF_DUPLEX) {
1836		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1837
1838		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1839			if (em_82547_tx_fifo_reset(adapter))
1840				return (0);
1841			else
1842				return (1);
1843		}
1844	}
1845
1846	return (0);
1847}
1848
1849static void
1850em_82547_update_fifo_head(struct adapter *adapter, int len)
1851{
1852	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1853
1854	/* tx_fifo_head is always 16 byte aligned */
1855	adapter->tx_fifo_head += fifo_pkt_len;
1856	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1857		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1858	}
1859}
1860
1861
1862static int
1863em_82547_tx_fifo_reset(struct adapter *adapter)
1864{
1865	uint32_t tctl;
1866
1867	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1868	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1869	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1870	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1871
1872		/* Disable TX unit */
1873		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1874		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1875
1876		/* Reset FIFO pointers */
1877		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1878		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1879		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1880		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1881
1882		/* Re-enable TX unit */
1883		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1884		E1000_WRITE_FLUSH(&adapter->hw);
1885
1886		adapter->tx_fifo_head = 0;
1887		adapter->tx_fifo_reset_cnt++;
1888
1889		return (TRUE);
1890	}
1891	else {
1892		return (FALSE);
1893	}
1894}
1895
1896static void
1897em_set_promisc(struct adapter *adapter)
1898{
1899	struct ifnet	*ifp = adapter->ifp;
1900	uint32_t	reg_rctl;
1901
1902	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1903
1904	if (ifp->if_flags & IFF_PROMISC) {
1905		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1906		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1907		/*
1908		 * Disable VLAN stripping in promiscous mode.
1909		 * This enables bridging of vlan tagged frames to occur
1910		 * and also allows vlan tags to be seen in tcpdump.
1911		 * XXX: This is a bit bogus as tcpdump may be used
1912		 * w/o promisc mode as well.
1913		 */
1914		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1915			em_disable_vlans(adapter);
1916		adapter->em_insert_vlan_header = 1;
1917	} else if (ifp->if_flags & IFF_ALLMULTI) {
1918		reg_rctl |= E1000_RCTL_MPE;
1919		reg_rctl &= ~E1000_RCTL_UPE;
1920		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1921		adapter->em_insert_vlan_header = 0;
1922	} else
1923		adapter->em_insert_vlan_header = 0;
1924}
1925
1926static void
1927em_disable_promisc(struct adapter *adapter)
1928{
1929	struct ifnet	*ifp = adapter->ifp;
1930	uint32_t	reg_rctl;
1931
1932	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1933
1934	reg_rctl &=  (~E1000_RCTL_UPE);
1935	reg_rctl &=  (~E1000_RCTL_MPE);
1936	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1937
1938	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1939		em_enable_vlans(adapter);
1940	adapter->em_insert_vlan_header = 0;
1941}
1942
1943
1944/*********************************************************************
1945 *  Multicast Update
1946 *
1947 *  This routine is called whenever multicast address list is updated.
1948 *
1949 **********************************************************************/
1950
1951static void
1952em_set_multi(struct adapter *adapter)
1953{
1954	struct ifnet	*ifp = adapter->ifp;
1955	struct ifmultiaddr *ifma;
1956	uint32_t reg_rctl = 0;
1957	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1958	int mcnt = 0;
1959
1960	IOCTL_DEBUGOUT("em_set_multi: begin");
1961
1962	if (adapter->hw.mac_type == em_82542_rev2_0) {
1963		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1964		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1965			em_pci_clear_mwi(&adapter->hw);
1966		reg_rctl |= E1000_RCTL_RST;
1967		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1968		msec_delay(5);
1969	}
1970
1971	IF_ADDR_LOCK(ifp);
1972	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1973		if (ifma->ifma_addr->sa_family != AF_LINK)
1974			continue;
1975
1976		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1977			break;
1978
1979		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1980		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1981		mcnt++;
1982	}
1983	IF_ADDR_UNLOCK(ifp);
1984
1985	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1986		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1987		reg_rctl |= E1000_RCTL_MPE;
1988		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1989	} else
1990		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1991
1992	if (adapter->hw.mac_type == em_82542_rev2_0) {
1993		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1994		reg_rctl &= ~E1000_RCTL_RST;
1995		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1996		msec_delay(5);
1997		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1998			em_pci_set_mwi(&adapter->hw);
1999	}
2000}
2001
2002
2003/*********************************************************************
2004 *  Timer routine
2005 *
2006 *  This routine checks for link status and updates statistics.
2007 *
2008 **********************************************************************/
2009
2010static void
2011em_local_timer(void *arg)
2012{
2013	struct adapter	*adapter = arg;
2014	struct ifnet	*ifp = adapter->ifp;
2015
2016	EM_LOCK_ASSERT(adapter);
2017
2018	em_check_for_link(&adapter->hw);
2019	em_update_link_status(adapter);
2020	em_update_stats_counters(adapter);
2021	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2022		em_print_hw_stats(adapter);
2023	em_smartspeed(adapter);
2024
2025	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2026}
2027
2028static void
2029em_update_link_status(struct adapter *adapter)
2030{
2031	struct ifnet *ifp = adapter->ifp;
2032	device_t dev = adapter->dev;
2033
2034	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
2035		if (adapter->link_active == 0) {
2036			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
2037			    &adapter->link_duplex);
2038			/* Check if we may set SPEED_MODE bit on PCI-E */
2039			if ((adapter->link_speed == SPEED_1000) &&
2040			    ((adapter->hw.mac_type == em_82571) ||
2041			    (adapter->hw.mac_type == em_82572))) {
2042				int tarc0;
2043
2044				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2045				tarc0 |= SPEED_MODE_BIT;
2046				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2047			}
2048			if (bootverbose)
2049				device_printf(dev, "Link is up %d Mbps %s\n",
2050				    adapter->link_speed,
2051				    ((adapter->link_duplex == FULL_DUPLEX) ?
2052				    "Full Duplex" : "Half Duplex"));
2053			adapter->link_active = 1;
2054			adapter->smartspeed = 0;
2055			ifp->if_baudrate = adapter->link_speed * 1000000;
2056			if_link_state_change(ifp, LINK_STATE_UP);
2057		}
2058	} else {
2059		if (adapter->link_active == 1) {
2060			ifp->if_baudrate = adapter->link_speed = 0;
2061			adapter->link_duplex = 0;
2062			if (bootverbose)
2063				device_printf(dev, "Link is Down\n");
2064			adapter->link_active = 0;
2065			if_link_state_change(ifp, LINK_STATE_DOWN);
2066		}
2067	}
2068}
2069
2070/*********************************************************************
2071 *
2072 *  This routine disables all traffic on the adapter by issuing a
2073 *  global reset on the MAC and deallocates TX/RX buffers.
2074 *
2075 **********************************************************************/
2076
2077static void
2078em_stop(void *arg)
2079{
2080	struct adapter	*adapter = arg;
2081	struct ifnet	*ifp = adapter->ifp;
2082
2083	EM_LOCK_ASSERT(adapter);
2084
2085	INIT_DEBUGOUT("em_stop: begin");
2086
2087	em_disable_intr(adapter);
2088	em_reset_hw(&adapter->hw);
2089	callout_stop(&adapter->timer);
2090	callout_stop(&adapter->tx_fifo_timer);
2091	em_free_transmit_structures(adapter);
2092	em_free_receive_structures(adapter);
2093
2094	/* Tell the stack that the interface is no longer active */
2095	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2096}
2097
2098
2099/********************************************************************
2100 *
2101 *  Determine hardware revision.
2102 *
2103 **********************************************************************/
2104static void
2105em_identify_hardware(struct adapter *adapter)
2106{
2107	device_t dev = adapter->dev;
2108
2109	/* Make sure our PCI config space has the necessary stuff set */
2110	pci_enable_busmaster(dev);
2111	pci_enable_io(dev, SYS_RES_MEMORY);
2112	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2113
2114	/* Save off the information about this board */
2115	adapter->hw.vendor_id = pci_get_vendor(dev);
2116	adapter->hw.device_id = pci_get_device(dev);
2117	adapter->hw.revision_id = pci_get_revid(dev);
2118	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2119	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2120
2121	/* Identify the MAC */
2122	if (em_set_mac_type(&adapter->hw))
2123		device_printf(dev, "Unknown MAC Type\n");
2124
2125	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2126	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2127		adapter->hw.phy_init_script = TRUE;
2128}
2129
2130static int
2131em_allocate_pci_resources(struct adapter *adapter)
2132{
2133	device_t	dev = adapter->dev;
2134	int		val, rid;
2135
2136	rid = PCIR_BAR(0);
2137	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2138	    &rid, RF_ACTIVE);
2139	if (adapter->res_memory == NULL) {
2140		device_printf(dev, "Unable to allocate bus resource: memory\n");
2141		return (ENXIO);
2142	}
2143	adapter->osdep.mem_bus_space_tag =
2144	rman_get_bustag(adapter->res_memory);
2145	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2146	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2147
2148	if (adapter->hw.mac_type > em_82543) {
2149		/* Figure our where our IO BAR is ? */
2150		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2151			val = pci_read_config(dev, rid, 4);
2152			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2153				adapter->io_rid = rid;
2154				break;
2155			}
2156			rid += 4;
2157			/* check for 64bit BAR */
2158			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2159				rid += 4;
2160		}
2161		if (rid >= PCIR_CIS) {
2162			device_printf(dev, "Unable to locate IO BAR\n");
2163			return (ENXIO);
2164		}
2165		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2166		    &adapter->io_rid, RF_ACTIVE);
2167		if (adapter->res_ioport == NULL) {
2168			device_printf(dev, "Unable to allocate bus resource: "
2169			    "ioport\n");
2170			return (ENXIO);
2171		}
2172		adapter->hw.io_base = 0;
2173		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2174		adapter->osdep.io_bus_space_handle =
2175		    rman_get_bushandle(adapter->res_ioport);
2176	}
2177
2178	/* For ICH8 we need to find the flash memory. */
2179	if (adapter->hw.mac_type == em_ich8lan) {
2180		rid = EM_FLASH;
2181
2182		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2183		    &rid, RF_ACTIVE);
2184		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2185		adapter->osdep.flash_bus_space_handle =
2186		    rman_get_bushandle(adapter->flash_mem);
2187	}
2188
2189	rid = 0x0;
2190	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2191	    RF_SHAREABLE | RF_ACTIVE);
2192	if (adapter->res_interrupt == NULL) {
2193		device_printf(dev, "Unable to allocate bus resource: "
2194		    "interrupt\n");
2195		return (ENXIO);
2196	}
2197
2198	adapter->hw.back = &adapter->osdep;
2199
2200	return (0);
2201}
2202
2203int
2204em_allocate_intr(struct adapter *adapter)
2205{
2206	device_t dev = adapter->dev;
2207	int error;
2208
2209	/* Manually turn off all interrupts */
2210	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2211
2212#ifdef DEVICE_POLLING
2213	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2214	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2215	    &adapter->int_handler_tag)) != 0) {
2216		device_printf(dev, "Failed to register interrupt handler");
2217		return (error);
2218	}
2219#else
2220	/*
2221	 * Try allocating a fast interrupt and the associated deferred
2222	 * processing contexts.
2223	 */
2224	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2225	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2226	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2227	    taskqueue_thread_enqueue, &adapter->tq);
2228	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2229	    device_get_nameunit(adapter->dev));
2230	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2231	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2232	    &adapter->int_handler_tag)) != 0) {
2233		device_printf(dev, "Failed to register fast interrupt "
2234			    "handler: %d\n", error);
2235		taskqueue_free(adapter->tq);
2236		adapter->tq = NULL;
2237		return (error);
2238	}
2239#endif
2240
2241	em_enable_intr(adapter);
2242	return (0);
2243}
2244
2245static void
2246em_free_intr(struct adapter *adapter)
2247{
2248	device_t dev = adapter->dev;
2249
2250	if (adapter->int_handler_tag != NULL) {
2251		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2252		adapter->int_handler_tag = NULL;
2253	}
2254	if (adapter->tq != NULL) {
2255		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2256		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2257		taskqueue_free(adapter->tq);
2258		adapter->tq = NULL;
2259	}
2260}
2261
2262static void
2263em_free_pci_resources(struct adapter *adapter)
2264{
2265	device_t dev = adapter->dev;
2266
2267	if (adapter->res_interrupt != NULL)
2268		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2269
2270	if (adapter->res_memory != NULL)
2271		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2272		    adapter->res_memory);
2273
2274	if (adapter->flash_mem != NULL)
2275		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2276		    adapter->flash_mem);
2277
2278	if (adapter->res_ioport != NULL)
2279		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2280		    adapter->res_ioport);
2281}
2282
2283/*********************************************************************
2284 *
2285 *  Initialize the hardware to a configuration as specified by the
2286 *  adapter structure. The controller is reset, the EEPROM is
2287 *  verified, the MAC address is set, then the shared initialization
2288 *  routines are called.
2289 *
2290 **********************************************************************/
2291static int
2292em_hardware_init(struct adapter *adapter)
2293{
2294	device_t dev = adapter->dev;
2295	uint16_t rx_buffer_size;
2296
2297	INIT_DEBUGOUT("em_hardware_init: begin");
2298	/* Issue a global reset */
2299	em_reset_hw(&adapter->hw);
2300
2301	/* When hardware is reset, fifo_head is also reset */
2302	adapter->tx_fifo_head = 0;
2303
2304	/* Make sure we have a good EEPROM before we read from it */
2305	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2306		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2307		return (EIO);
2308	}
2309
2310	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2311		device_printf(dev, "EEPROM read error while reading part "
2312		    "number\n");
2313		return (EIO);
2314	}
2315
2316	/* Set up smart power down as default off on newer adapters. */
2317	if (!em_smart_pwr_down &&
2318	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2319		uint16_t phy_tmp = 0;
2320
2321		/* Speed up time to link by disabling smart power down. */
2322		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2323		phy_tmp &= ~IGP02E1000_PM_SPD;
2324		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2325	}
2326
2327	/*
2328	 * These parameters control the automatic generation (Tx) and
2329	 * response (Rx) to Ethernet PAUSE frames.
2330	 * - High water mark should allow for at least two frames to be
2331	 *   received after sending an XOFF.
2332	 * - Low water mark works best when it is very near the high water mark.
2333	 *   This allows the receiver to restart by sending XON when it has
2334	 *   drained a bit. Here we use an arbitary value of 1500 which will
2335	 *   restart after one full frame is pulled from the buffer. There
2336	 *   could be several smaller frames in the buffer and if so they will
2337	 *   not trigger the XON until their total number reduces the buffer
2338	 *   by 1500.
2339	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2340	 */
2341	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2342
2343	adapter->hw.fc_high_water = rx_buffer_size -
2344	    roundup2(adapter->hw.max_frame_size, 1024);
2345	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2346	if (adapter->hw.mac_type == em_80003es2lan)
2347		adapter->hw.fc_pause_time = 0xFFFF;
2348	else
2349		adapter->hw.fc_pause_time = 0x1000;
2350	adapter->hw.fc_send_xon = TRUE;
2351	adapter->hw.fc = E1000_FC_FULL;
2352
2353	if (em_init_hw(&adapter->hw) < 0) {
2354		device_printf(dev, "Hardware Initialization Failed");
2355		return (EIO);
2356	}
2357
2358	em_check_for_link(&adapter->hw);
2359
2360	return (0);
2361}
2362
2363/*********************************************************************
2364 *
2365 *  Setup networking device structure and register an interface.
2366 *
2367 **********************************************************************/
2368static void
2369em_setup_interface(device_t dev, struct adapter *adapter)
2370{
2371	struct ifnet   *ifp;
2372	INIT_DEBUGOUT("em_setup_interface: begin");
2373
2374	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2375	if (ifp == NULL)
2376		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2377	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2378	ifp->if_mtu = ETHERMTU;
2379	ifp->if_init =  em_init;
2380	ifp->if_softc = adapter;
2381	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2382	ifp->if_ioctl = em_ioctl;
2383	ifp->if_start = em_start;
2384	ifp->if_watchdog = em_watchdog;
2385	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2386	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2387	IFQ_SET_READY(&ifp->if_snd);
2388
2389	ether_ifattach(ifp, adapter->hw.mac_addr);
2390
2391	ifp->if_capabilities = ifp->if_capenable = 0;
2392
2393	if (adapter->hw.mac_type >= em_82543) {
2394		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2395		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2396	}
2397
2398	/* Enable TSO if available */
2399	if ((adapter->hw.mac_type > em_82544) &&
2400	    (adapter->hw.mac_type != em_82547)) {
2401		ifp->if_capabilities |= IFCAP_TSO4;
2402		ifp->if_capenable |= IFCAP_TSO4;
2403	}
2404
2405	/*
2406	 * Tell the upper layer(s) we support long frames.
2407	 */
2408	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2409	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2410	ifp->if_capenable |= IFCAP_VLAN_MTU;
2411
2412#ifdef DEVICE_POLLING
2413	ifp->if_capabilities |= IFCAP_POLLING;
2414#endif
2415
2416	/*
2417	 * Specify the media types supported by this adapter and register
2418	 * callbacks to update media and link information
2419	 */
2420	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2421	    em_media_status);
2422	if ((adapter->hw.media_type == em_media_type_fiber) ||
2423	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2424		u_char fiber_type = IFM_1000_SX;	/* default type; */
2425
2426		if (adapter->hw.mac_type == em_82545)
2427			fiber_type = IFM_1000_LX;
2428		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2429		    0, NULL);
2430		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2431	} else {
2432		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2433		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2434			    0, NULL);
2435		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2436			    0, NULL);
2437		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2438			    0, NULL);
2439		if (adapter->hw.phy_type != em_phy_ife) {
2440			ifmedia_add(&adapter->media,
2441				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2442			ifmedia_add(&adapter->media,
2443				IFM_ETHER | IFM_1000_T, 0, NULL);
2444		}
2445	}
2446	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2447	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2448}
2449
2450
2451/*********************************************************************
2452 *
2453 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2454 *
2455 **********************************************************************/
2456static void
2457em_smartspeed(struct adapter *adapter)
2458{
2459	uint16_t phy_tmp;
2460
2461	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2462	    adapter->hw.autoneg == 0 ||
2463	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2464		return;
2465
2466	if (adapter->smartspeed == 0) {
2467		/* If Master/Slave config fault is asserted twice,
2468		 * we assume back-to-back */
2469		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2470		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2471			return;
2472		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2473		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2474			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2475			if(phy_tmp & CR_1000T_MS_ENABLE) {
2476				phy_tmp &= ~CR_1000T_MS_ENABLE;
2477				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2478				    phy_tmp);
2479				adapter->smartspeed++;
2480				if(adapter->hw.autoneg &&
2481				   !em_phy_setup_autoneg(&adapter->hw) &&
2482				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2483				    &phy_tmp)) {
2484					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2485						    MII_CR_RESTART_AUTO_NEG);
2486					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2487					    phy_tmp);
2488				}
2489			}
2490		}
2491		return;
2492	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2493		/* If still no link, perhaps using 2/3 pair cable */
2494		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2495		phy_tmp |= CR_1000T_MS_ENABLE;
2496		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2497		if(adapter->hw.autoneg &&
2498		   !em_phy_setup_autoneg(&adapter->hw) &&
2499		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2500			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2501				    MII_CR_RESTART_AUTO_NEG);
2502			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2503		}
2504	}
2505	/* Restart process after EM_SMARTSPEED_MAX iterations */
2506	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2507		adapter->smartspeed = 0;
2508}
2509
2510
2511/*
2512 * Manage DMA'able memory.
2513 */
2514static void
2515em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2516{
2517	if (error)
2518		return;
2519	*(bus_addr_t *) arg = segs[0].ds_addr;
2520}
2521
2522static int
2523em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2524	int mapflags)
2525{
2526	int error;
2527
2528	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2529				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2530				BUS_SPACE_MAXADDR,	/* lowaddr */
2531				BUS_SPACE_MAXADDR,	/* highaddr */
2532				NULL, NULL,		/* filter, filterarg */
2533				size,			/* maxsize */
2534				1,			/* nsegments */
2535				size,			/* maxsegsize */
2536				0,			/* flags */
2537				NULL,			/* lockfunc */
2538				NULL,			/* lockarg */
2539				&dma->dma_tag);
2540	if (error) {
2541		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2542		    __func__, error);
2543		goto fail_0;
2544	}
2545
2546	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2547	    BUS_DMA_NOWAIT, &dma->dma_map);
2548	if (error) {
2549		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2550		    __func__, (uintmax_t)size, error);
2551		goto fail_2;
2552	}
2553
2554	dma->dma_paddr = 0;
2555	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2556	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2557	if (error || dma->dma_paddr == 0) {
2558		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2559		    __func__, error);
2560		goto fail_3;
2561	}
2562
2563	return (0);
2564
2565fail_3:
2566	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2567fail_2:
2568	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2569	bus_dma_tag_destroy(dma->dma_tag);
2570fail_0:
2571	dma->dma_map = NULL;
2572	dma->dma_tag = NULL;
2573
2574	return (error);
2575}
2576
2577static void
2578em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2579{
2580	if (dma->dma_tag == NULL)
2581		return;
2582	if (dma->dma_map != NULL) {
2583		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2584		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2585		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2586		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2587		dma->dma_map = NULL;
2588	}
2589	bus_dma_tag_destroy(dma->dma_tag);
2590	dma->dma_tag = NULL;
2591}
2592
2593
2594/*********************************************************************
2595 *
2596 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2597 *  the information needed to transmit a packet on the wire.
2598 *
2599 **********************************************************************/
2600static int
2601em_allocate_transmit_structures(struct adapter *adapter)
2602{
2603	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2604	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2605	if (adapter->tx_buffer_area == NULL) {
2606		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2607		return (ENOMEM);
2608	}
2609
2610	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2611
2612	return (0);
2613}
2614
2615/*********************************************************************
2616 *
2617 *  Allocate and initialize transmit structures.
2618 *
2619 **********************************************************************/
2620static int
2621em_setup_transmit_structures(struct adapter *adapter)
2622{
2623	struct ifnet   *ifp = adapter->ifp;
2624	device_t dev = adapter->dev;
2625	struct em_buffer *tx_buffer;
2626	bus_size_t size, segsize;
2627	int error, i;
2628
2629	/*
2630	 * Setup DMA descriptor areas.
2631	 */
2632	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2633
2634	/* Overrides for TSO - want large sizes */
2635	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2636		size = EM_TSO_SIZE;
2637		segsize = PAGE_SIZE;
2638	}
2639
2640	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2641				1, 0,			/* alignment, bounds */
2642				BUS_SPACE_MAXADDR,	/* lowaddr */
2643				BUS_SPACE_MAXADDR,	/* highaddr */
2644				NULL, NULL,		/* filter, filterarg */
2645				size,			/* maxsize */
2646				EM_MAX_SCATTER,		/* nsegments */
2647				segsize,		/* maxsegsize */
2648				0,			/* flags */
2649				NULL,		/* lockfunc */
2650				NULL,		/* lockarg */
2651				&adapter->txtag)) != 0) {
2652		device_printf(dev, "Unable to allocate TX DMA tag\n");
2653		goto fail;
2654	}
2655
2656	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2657		goto fail;
2658
2659	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2660	tx_buffer = adapter->tx_buffer_area;
2661	for (i = 0; i < adapter->num_tx_desc; i++) {
2662		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2663		if (error != 0) {
2664			device_printf(dev, "Unable to create TX DMA map\n");
2665			goto fail;
2666		}
2667		tx_buffer++;
2668	}
2669
2670	adapter->next_avail_tx_desc = 0;
2671	adapter->next_tx_to_clean = 0;
2672
2673	/* Set number of descriptors available */
2674	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2675
2676	/* Set checksum context */
2677	adapter->active_checksum_context = OFFLOAD_NONE;
2678	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2679	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2680
2681	return (0);
2682
2683fail:
2684	em_free_transmit_structures(adapter);
2685	return (error);
2686}
2687
2688/*********************************************************************
2689 *
2690 *  Enable transmit unit.
2691 *
2692 **********************************************************************/
2693static void
2694em_initialize_transmit_unit(struct adapter *adapter)
2695{
2696	uint32_t	reg_tctl;
2697	uint32_t	reg_tipg = 0;
2698	uint64_t	bus_addr;
2699
2700	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2701	/* Setup the Base and Length of the Tx Descriptor Ring */
2702	bus_addr = adapter->txdma.dma_paddr;
2703	E1000_WRITE_REG(&adapter->hw, TDLEN,
2704	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2705	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2706	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2707
2708	/* Setup the HW Tx Head and Tail descriptor pointers */
2709	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2710	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2711
2712
2713	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2714	    E1000_READ_REG(&adapter->hw, TDLEN));
2715
2716	/* Set the default values for the Tx Inter Packet Gap timer */
2717	switch (adapter->hw.mac_type) {
2718	case em_82542_rev2_0:
2719	case em_82542_rev2_1:
2720		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2721		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2722		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2723		break;
2724	case em_80003es2lan:
2725		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2726		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2727		    E1000_TIPG_IPGR2_SHIFT;
2728		break;
2729	default:
2730		if ((adapter->hw.media_type == em_media_type_fiber) ||
2731		    (adapter->hw.media_type == em_media_type_internal_serdes))
2732			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2733		else
2734			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2735		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2736		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2737	}
2738
2739	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2740	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2741	if(adapter->hw.mac_type >= em_82540)
2742		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2743
2744	/* Program the Transmit Control Register */
2745	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2746		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2747	if (adapter->hw.mac_type >= em_82571)
2748		reg_tctl |= E1000_TCTL_MULR;
2749	if (adapter->link_duplex == FULL_DUPLEX) {
2750		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2751	} else {
2752		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2753	}
2754	/* This write will effectively turn on the transmit unit. */
2755	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2756
2757	/* Setup Transmit Descriptor Base Settings */
2758	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2759
2760	if (adapter->tx_int_delay.value > 0)
2761		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2762}
2763
2764/*********************************************************************
2765 *
2766 *  Free all transmit related data structures.
2767 *
2768 **********************************************************************/
2769static void
2770em_free_transmit_structures(struct adapter *adapter)
2771{
2772	struct em_buffer *tx_buffer;
2773	int i;
2774
2775	INIT_DEBUGOUT("free_transmit_structures: begin");
2776
2777	if (adapter->tx_buffer_area != NULL) {
2778		tx_buffer = adapter->tx_buffer_area;
2779		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2780			if (tx_buffer->m_head != NULL) {
2781				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2782				    BUS_DMASYNC_POSTWRITE);
2783				bus_dmamap_unload(adapter->txtag,
2784				    tx_buffer->map);
2785				m_freem(tx_buffer->m_head);
2786				tx_buffer->m_head = NULL;
2787			} else if (tx_buffer->map != NULL)
2788				bus_dmamap_unload(adapter->txtag,
2789				    tx_buffer->map);
2790			if (tx_buffer->map != NULL) {
2791				bus_dmamap_destroy(adapter->txtag,
2792				    tx_buffer->map);
2793				tx_buffer->map = NULL;
2794			}
2795		}
2796	}
2797	if (adapter->tx_buffer_area != NULL) {
2798		free(adapter->tx_buffer_area, M_DEVBUF);
2799		adapter->tx_buffer_area = NULL;
2800	}
2801	if (adapter->txtag != NULL) {
2802		bus_dma_tag_destroy(adapter->txtag);
2803		adapter->txtag = NULL;
2804	}
2805}
2806
2807/*********************************************************************
2808 *
2809 *  The offload context needs to be set when we transfer the first
2810 *  packet of a particular protocol (TCP/UDP). We change the
2811 *  context only if the protocol type changes.
2812 *
2813 **********************************************************************/
2814static void
2815em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2816    uint32_t *txd_upper, uint32_t *txd_lower)
2817{
2818	struct em_context_desc *TXD;
2819	struct em_buffer *tx_buffer;
2820	struct ether_vlan_header *eh;
2821	struct ip *ip;
2822	struct ip6_hdr *ip6;
2823	struct tcp_hdr *th;
2824	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2825	uint32_t cmd = 0;
2826	uint16_t etype;
2827	uint8_t ipproto;
2828
2829	/* Setup checksum offload context. */
2830	curr_txd = adapter->next_avail_tx_desc;
2831	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2832	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2833
2834	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2835		     E1000_TXD_DTYP_D;		/* Data descr */
2836
2837	/*
2838	 * Determine where frame payload starts.
2839	 * Jump over vlan headers if already present,
2840	 * helpful for QinQ too.
2841	 */
2842	eh = mtod(mp, struct ether_vlan_header *);
2843	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2844		etype = ntohs(eh->evl_proto);
2845		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2846	} else {
2847		etype = ntohs(eh->evl_encap_proto);
2848		ehdrlen = ETHER_HDR_LEN;
2849	}
2850
2851	/*
2852	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2853	 * TODO: Support SCTP too when it hits the tree.
2854	 */
2855	switch (etype) {
2856	case ETHERTYPE_IP:
2857		ip = (struct ip *)(mp->m_data + ehdrlen);
2858		ip_hlen = ip->ip_hl << 2;
2859
2860		/* Setup of IP header checksum. */
2861		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2862			/*
2863			 * Start offset for header checksum calculation.
2864			 * End offset for header checksum calculation.
2865			 * Offset of place to put the checksum.
2866			 */
2867			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2868			TXD->lower_setup.ip_fields.ipcse =
2869			    htole16(ehdrlen + ip_hlen);
2870			TXD->lower_setup.ip_fields.ipcso =
2871			    ehdrlen + offsetof(struct ip, ip_sum);
2872			cmd |= E1000_TXD_CMD_IP;
2873			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2874		}
2875
2876		if (mp->m_len < ehdrlen + ip_hlen)
2877			return;	/* failure */
2878
2879		hdr_len = ehdrlen + ip_hlen;
2880		ipproto = ip->ip_p;
2881
2882		break;
2883	case ETHERTYPE_IPV6:
2884		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2885		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2886
2887		if (mp->m_len < ehdrlen + ip_hlen)
2888			return;	/* failure */
2889
2890		/* IPv6 doesn't have a header checksum. */
2891
2892		hdr_len = ehdrlen + ip_hlen;
2893		ipproto = ip6->ip6_nxt;
2894
2895		break;
2896	default:
2897		*txd_upper = 0;
2898		*txd_lower = 0;
2899		return;
2900	}
2901
2902	switch (ipproto) {
2903	case IPPROTO_TCP:
2904		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2905			/*
2906			 * Start offset for payload checksum calculation.
2907			 * End offset for payload checksum calculation.
2908			 * Offset of place to put the checksum.
2909			 */
2910			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2911			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2912			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2913			TXD->upper_setup.tcp_fields.tucso =
2914			    hdr_len + offsetof(struct tcphdr, th_sum);
2915			cmd |= E1000_TXD_CMD_TCP;
2916			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2917		}
2918		break;
2919	case IPPROTO_UDP:
2920		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2921			/*
2922			 * Start offset for header checksum calculation.
2923			 * End offset for header checksum calculation.
2924			 * Offset of place to put the checksum.
2925			 */
2926			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2927			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2928			TXD->upper_setup.tcp_fields.tucso =
2929			    hdr_len + offsetof(struct udphdr, uh_sum);
2930			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2931		}
2932		break;
2933	default:
2934		break;
2935	}
2936
2937	TXD->tcp_seg_setup.data = htole32(0);
2938	TXD->cmd_and_length =
2939	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2940	tx_buffer->m_head = NULL;
2941	tx_buffer->next_eop = -1;
2942
2943	if (++curr_txd == adapter->num_tx_desc)
2944		curr_txd = 0;
2945
2946	adapter->num_tx_desc_avail--;
2947	adapter->next_avail_tx_desc = curr_txd;
2948}
2949
2950/**********************************************************************
2951 *
2952 *  Setup work for hardware segmentation offload (TSO)
2953 *
2954 **********************************************************************/
2955static boolean_t
2956em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2957   uint32_t *txd_lower)
2958{
2959	struct em_context_desc *TXD;
2960	struct em_buffer *tx_buffer;
2961	struct ether_vlan_header *eh;
2962	struct ip *ip;
2963	struct ip6_hdr *ip6;
2964	struct tcphdr *th;
2965	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2966	uint16_t etype;
2967
2968	/*
2969	 * XXX: This is not really correct as the stack would not have
2970	 * set up all checksums.
2971	 * XXX: Return FALSE is not sufficient as we may have to return
2972	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2973	 * and 1 (success).
2974	 */
2975	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
2976		return FALSE;	/* 0 */
2977
2978	/*
2979	 * This function could/should be extended to support IP/IPv6
2980	 * fragmentation as well.  But as they say, one step at a time.
2981	 */
2982
2983	/*
2984	 * Determine where frame payload starts.
2985	 * Jump over vlan headers if already present,
2986	 * helpful for QinQ too.
2987	 */
2988	eh = mtod(mp, struct ether_vlan_header *);
2989	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2990		etype = ntohs(eh->evl_proto);
2991		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2992	} else {
2993		etype = ntohs(eh->evl_encap_proto);
2994		ehdrlen = ETHER_HDR_LEN;
2995	}
2996
2997	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2998	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2999		return FALSE;	/* -1 */
3000
3001	/*
3002	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3003	 * TODO: Support SCTP too when it hits the tree.
3004	 */
3005	switch (etype) {
3006	case ETHERTYPE_IP:
3007		isip6 = 0;
3008		ip = (struct ip *)(mp->m_data + ehdrlen);
3009		if (ip->ip_p != IPPROTO_TCP)
3010			return FALSE;	/* 0 */
3011		ip->ip_len = 0;
3012		ip->ip_sum = 0;
3013		ip_hlen = ip->ip_hl << 2;
3014		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3015			return FALSE;	/* -1 */
3016		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3017#if 1
3018		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3019		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3020#else
3021		th->th_sum = mp->m_pkthdr.csum_data;
3022#endif
3023		break;
3024	case ETHERTYPE_IPV6:
3025		isip6 = 1;
3026		return FALSE;			/* Not supported yet. */
3027		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3028		if (ip6->ip6_nxt != IPPROTO_TCP)
3029			return FALSE;	/* 0 */
3030		ip6->ip6_plen = 0;
3031		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3032		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3033			return FALSE;	/* -1 */
3034		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3035#if 0
3036		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3037		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3038#else
3039		th->th_sum = mp->m_pkthdr.csum_data;
3040#endif
3041		break;
3042	default:
3043		return FALSE;
3044	}
3045	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3046
3047	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3048		      E1000_TXD_DTYP_D |	/* Data descr type */
3049		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3050
3051	/* IP and/or TCP header checksum calculation and insertion. */
3052	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3053		      E1000_TXD_POPTS_TXSM) << 8;
3054
3055	curr_txd = adapter->next_avail_tx_desc;
3056	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3057	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3058
3059	/* IPv6 doesn't have a header checksum. */
3060	if (!isip6) {
3061		/*
3062		 * Start offset for header checksum calculation.
3063		 * End offset for header checksum calculation.
3064		 * Offset of place put the checksum.
3065		 */
3066		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3067		TXD->lower_setup.ip_fields.ipcse =
3068		    htole16(ehdrlen + ip_hlen - 1);
3069		TXD->lower_setup.ip_fields.ipcso =
3070		    ehdrlen + offsetof(struct ip, ip_sum);
3071	}
3072	/*
3073	 * Start offset for payload checksum calculation.
3074	 * End offset for payload checksum calculation.
3075	 * Offset of place to put the checksum.
3076	 */
3077	TXD->upper_setup.tcp_fields.tucss =
3078	    ehdrlen + ip_hlen;
3079	TXD->upper_setup.tcp_fields.tucse = 0;
3080	TXD->upper_setup.tcp_fields.tucso =
3081	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3082	/*
3083	 * Payload size per packet w/o any headers.
3084	 * Length of all headers up to payload.
3085	 */
3086	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3087	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3088
3089	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3090				E1000_TXD_CMD_DEXT |	/* Extended descr */
3091				E1000_TXD_CMD_TSE |	/* TSE context */
3092				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3093				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3094				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3095
3096	tx_buffer->m_head = NULL;
3097
3098	if (++curr_txd == adapter->num_tx_desc)
3099		curr_txd = 0;
3100
3101	adapter->num_tx_desc_avail--;
3102	adapter->next_avail_tx_desc = curr_txd;
3103	adapter->tx_tso = TRUE;
3104
3105	return TRUE;
3106}
3107
3108/**********************************************************************
3109 *
3110 *  Examine each tx_buffer in the used queue. If the hardware is done
3111 *  processing the packet then free associated resources. The
3112 *  tx_buffer is put back on the free queue.
3113 *
3114 **********************************************************************/
3115static void
3116em_txeof(struct adapter *adapter)
3117{
3118	int first, last, done, num_avail;
3119	struct em_buffer *tx_buffer;
3120	struct em_tx_desc   *tx_desc, *eop_desc;
3121	struct ifnet   *ifp = adapter->ifp;
3122
3123	EM_LOCK_ASSERT(adapter);
3124
3125	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3126		return;
3127
3128	num_avail = adapter->num_tx_desc_avail;
3129	first = adapter->next_tx_to_clean;
3130	tx_desc = &adapter->tx_desc_base[first];
3131	tx_buffer = &adapter->tx_buffer_area[first];
3132	last = tx_buffer->next_eop;
3133	eop_desc = &adapter->tx_desc_base[last];
3134
3135	/*
3136	 * Now calculate the terminating index
3137	 * for the cleanup loop below.
3138	 */
3139	if (++last == adapter->num_tx_desc)
3140		last = 0;
3141	done = last;
3142
3143	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3144	    BUS_DMASYNC_POSTREAD);
3145	while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3146		/* We clean the range of the packet */
3147		while (first != done) {
3148			tx_desc->upper.data = 0;
3149			tx_desc->lower.data = 0;
3150			num_avail++;
3151
3152			if (tx_buffer->m_head) {
3153				ifp->if_opackets++;
3154				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3155				    BUS_DMASYNC_POSTWRITE);
3156				bus_dmamap_unload(adapter->txtag,
3157				    tx_buffer->map);
3158
3159				m_freem(tx_buffer->m_head);
3160				tx_buffer->m_head = NULL;
3161			}
3162			tx_buffer->next_eop = -1;
3163
3164			if (++first == adapter->num_tx_desc)
3165				first = 0;
3166
3167			tx_buffer = &adapter->tx_buffer_area[first];
3168			tx_desc = &adapter->tx_desc_base[first];
3169		}
3170		/* See if we can continue to the next packet */
3171		last = tx_buffer->next_eop;
3172		if (last != -1) {
3173			eop_desc = &adapter->tx_desc_base[last];
3174			/* Get new done point */
3175			if (++last == adapter->num_tx_desc)
3176				last = 0;
3177			done = last;
3178		} else
3179			break;
3180	}
3181	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3182	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3183
3184	adapter->next_tx_to_clean = first;
3185
3186	/*
3187	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3188	 * that it is OK to send packets.
3189	 * If there are no pending descriptors, clear the timeout. Otherwise,
3190	 * if some descriptors have been freed, restart the timeout.
3191	 */
3192	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3193		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3194		if (num_avail == adapter->num_tx_desc)
3195			ifp->if_timer = 0;
3196		else if (num_avail != adapter->num_tx_desc_avail)
3197			ifp->if_timer = EM_TX_TIMEOUT;
3198	}
3199	adapter->num_tx_desc_avail = num_avail;
3200}
3201
3202/*********************************************************************
3203 *
3204 *  Get a buffer from system mbuf buffer pool.
3205 *
3206 **********************************************************************/
3207static int
3208em_get_buf(struct adapter *adapter, int i)
3209{
3210	struct mbuf		*m;
3211	bus_dma_segment_t	segs[1];
3212	bus_dmamap_t		map;
3213	struct em_buffer	*rx_buffer;
3214	int			error, nsegs;
3215
3216	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3217	if (m == NULL) {
3218		adapter->mbuf_cluster_failed++;
3219		return (ENOBUFS);
3220	}
3221	m->m_len = m->m_pkthdr.len = MCLBYTES;
3222	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3223		m_adj(m, ETHER_ALIGN);
3224
3225	/*
3226	 * Using memory from the mbuf cluster pool, invoke the
3227	 * bus_dma machinery to arrange the memory mapping.
3228	 */
3229	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3230	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3231	if (error != 0) {
3232		m_free(m);
3233		return (error);
3234	}
3235	/* If nsegs is wrong then the stack is corrupt. */
3236	KASSERT(nsegs == 1, ("Too many segments returned!"));
3237
3238	rx_buffer = &adapter->rx_buffer_area[i];
3239	if (rx_buffer->m_head != NULL)
3240		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3241
3242	map = rx_buffer->map;
3243	rx_buffer->map = adapter->rx_sparemap;
3244	adapter->rx_sparemap = map;
3245	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3246	rx_buffer->m_head = m;
3247
3248	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3249
3250	return (0);
3251}
3252
3253/*********************************************************************
3254 *
3255 *  Allocate memory for rx_buffer structures. Since we use one
3256 *  rx_buffer per received packet, the maximum number of rx_buffer's
3257 *  that we'll need is equal to the number of receive descriptors
3258 *  that we've allocated.
3259 *
3260 **********************************************************************/
3261static int
3262em_allocate_receive_structures(struct adapter *adapter)
3263{
3264	device_t dev = adapter->dev;
3265	struct em_buffer *rx_buffer;
3266	int i, error;
3267
3268	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3269	    M_DEVBUF, M_NOWAIT);
3270	if (adapter->rx_buffer_area == NULL) {
3271		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3272		return (ENOMEM);
3273	}
3274
3275	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3276
3277	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3278				1, 0,			/* alignment, bounds */
3279				BUS_SPACE_MAXADDR,	/* lowaddr */
3280				BUS_SPACE_MAXADDR,	/* highaddr */
3281				NULL, NULL,		/* filter, filterarg */
3282				MCLBYTES,		/* maxsize */
3283				1,			/* nsegments */
3284				MCLBYTES,		/* maxsegsize */
3285				0,			/* flags */
3286				NULL,			/* lockfunc */
3287				NULL,			/* lockarg */
3288				&adapter->rxtag);
3289	if (error) {
3290		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3291		    __func__, error);
3292		goto fail;
3293	}
3294
3295	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3296	    &adapter->rx_sparemap);
3297	if (error) {
3298		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3299		    __func__, error);
3300		goto fail;
3301	}
3302	rx_buffer = adapter->rx_buffer_area;
3303	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3304		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3305		    &rx_buffer->map);
3306		if (error) {
3307			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3308			    __func__, error);
3309			goto fail;
3310		}
3311	}
3312
3313	for (i = 0; i < adapter->num_rx_desc; i++) {
3314		error = em_get_buf(adapter, i);
3315		if (error)
3316			goto fail;
3317	}
3318	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3319	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3320
3321	return (0);
3322
3323fail:
3324	em_free_receive_structures(adapter);
3325	return (error);
3326}
3327
3328/*********************************************************************
3329 *
3330 *  Allocate and initialize receive structures.
3331 *
3332 **********************************************************************/
3333static int
3334em_setup_receive_structures(struct adapter *adapter)
3335{
3336	int error;
3337
3338	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3339
3340	if ((error = em_allocate_receive_structures(adapter)) != 0)
3341		return (error);
3342
3343	/* Setup our descriptor pointers */
3344	adapter->next_rx_desc_to_check = 0;
3345
3346	return (0);
3347}
3348
3349/*********************************************************************
3350 *
3351 *  Enable receive unit.
3352 *
3353 **********************************************************************/
3354static void
3355em_initialize_receive_unit(struct adapter *adapter)
3356{
3357	struct ifnet	*ifp = adapter->ifp;
3358	uint64_t	bus_addr;
3359	uint32_t	reg_rctl;
3360	uint32_t	reg_rxcsum;
3361
3362	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3363
3364	/*
3365	 * Make sure receives are disabled while setting
3366	 * up the descriptor ring
3367	 */
3368	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3369
3370	/* Set the Receive Delay Timer Register */
3371	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3372
3373	if(adapter->hw.mac_type >= em_82540) {
3374		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3375
3376		/*
3377		 * Set the interrupt throttling rate. Value is calculated
3378		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3379		 */
3380#define MAX_INTS_PER_SEC	8000
3381#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3382		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3383	}
3384
3385	/* Setup the Base and Length of the Rx Descriptor Ring */
3386	bus_addr = adapter->rxdma.dma_paddr;
3387	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3388			sizeof(struct em_rx_desc));
3389	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3390	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3391
3392	/* Setup the Receive Control Register */
3393	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3394		   E1000_RCTL_RDMTS_HALF |
3395		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3396
3397	if (adapter->hw.tbi_compatibility_on == TRUE)
3398		reg_rctl |= E1000_RCTL_SBP;
3399
3400
3401	switch (adapter->rx_buffer_len) {
3402	default:
3403	case EM_RXBUFFER_2048:
3404		reg_rctl |= E1000_RCTL_SZ_2048;
3405		break;
3406	case EM_RXBUFFER_4096:
3407		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3408		break;
3409	case EM_RXBUFFER_8192:
3410		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3411		break;
3412	case EM_RXBUFFER_16384:
3413		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3414		break;
3415	}
3416
3417	if (ifp->if_mtu > ETHERMTU)
3418		reg_rctl |= E1000_RCTL_LPE;
3419
3420	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3421	if ((adapter->hw.mac_type >= em_82543) &&
3422	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3423		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3424		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3425		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3426	}
3427
3428	/* Enable Receives */
3429	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3430
3431	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3432	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3433	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3434}
3435
3436/*********************************************************************
3437 *
3438 *  Free receive related data structures.
3439 *
3440 **********************************************************************/
3441static void
3442em_free_receive_structures(struct adapter *adapter)
3443{
3444	struct em_buffer *rx_buffer;
3445	int i;
3446
3447	INIT_DEBUGOUT("free_receive_structures: begin");
3448
3449	if (adapter->rx_sparemap) {
3450		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3451		adapter->rx_sparemap = NULL;
3452	}
3453	if (adapter->rx_buffer_area != NULL) {
3454		rx_buffer = adapter->rx_buffer_area;
3455		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3456			if (rx_buffer->m_head != NULL) {
3457				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3458				    BUS_DMASYNC_POSTREAD);
3459				bus_dmamap_unload(adapter->rxtag,
3460				    rx_buffer->map);
3461				m_freem(rx_buffer->m_head);
3462				rx_buffer->m_head = NULL;
3463			} else if (rx_buffer->map != NULL)
3464				bus_dmamap_unload(adapter->rxtag,
3465				    rx_buffer->map);
3466			if (rx_buffer->map != NULL) {
3467				bus_dmamap_destroy(adapter->rxtag,
3468				    rx_buffer->map);
3469				rx_buffer->map = NULL;
3470			}
3471		}
3472	}
3473	if (adapter->rx_buffer_area != NULL) {
3474		free(adapter->rx_buffer_area, M_DEVBUF);
3475		adapter->rx_buffer_area = NULL;
3476	}
3477	if (adapter->rxtag != NULL) {
3478		bus_dma_tag_destroy(adapter->rxtag);
3479		adapter->rxtag = NULL;
3480	}
3481}
3482
3483/*********************************************************************
3484 *
3485 *  This routine executes in interrupt context. It replenishes
3486 *  the mbufs in the descriptor and sends data which has been
3487 *  dma'ed into host memory to upper layer.
3488 *
3489 *  We loop at most count times if count is > 0, or until done if
3490 *  count < 0.
3491 *
3492 *********************************************************************/
3493static int
3494em_rxeof(struct adapter *adapter, int count)
3495{
3496	struct ifnet	*ifp;
3497	struct mbuf	*mp;
3498	uint8_t		accept_frame = 0;
3499	uint8_t		eop = 0;
3500	uint16_t 	len, desc_len, prev_len_adj;
3501	int		i;
3502
3503	/* Pointer to the receive descriptor being examined. */
3504	struct em_rx_desc   *current_desc;
3505	uint8_t		status;
3506
3507	ifp = adapter->ifp;
3508	i = adapter->next_rx_desc_to_check;
3509	current_desc = &adapter->rx_desc_base[i];
3510	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3511	    BUS_DMASYNC_POSTREAD);
3512
3513	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3514		return (0);
3515
3516	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3517	    (count != 0) &&
3518	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3519		struct mbuf *m = NULL;
3520
3521		mp = adapter->rx_buffer_area[i].m_head;
3522		/*
3523		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3524		 * needs to access the last received byte in the mbuf.
3525		 */
3526		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3527		    BUS_DMASYNC_POSTREAD);
3528
3529		accept_frame = 1;
3530		prev_len_adj = 0;
3531		desc_len = le16toh(current_desc->length);
3532		status = current_desc->status;
3533		if (status & E1000_RXD_STAT_EOP) {
3534			count--;
3535			eop = 1;
3536			if (desc_len < ETHER_CRC_LEN) {
3537				len = 0;
3538				prev_len_adj = ETHER_CRC_LEN - desc_len;
3539			} else
3540				len = desc_len - ETHER_CRC_LEN;
3541		} else {
3542			eop = 0;
3543			len = desc_len;
3544		}
3545
3546		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3547			uint8_t		last_byte;
3548			uint32_t	pkt_len = desc_len;
3549
3550			if (adapter->fmp != NULL)
3551				pkt_len += adapter->fmp->m_pkthdr.len;
3552
3553			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3554			if (TBI_ACCEPT(&adapter->hw, status,
3555			    current_desc->errors, pkt_len, last_byte)) {
3556				em_tbi_adjust_stats(&adapter->hw,
3557				    &adapter->stats, pkt_len,
3558				    adapter->hw.mac_addr);
3559				if (len > 0)
3560					len--;
3561			} else
3562				accept_frame = 0;
3563		}
3564
3565		if (accept_frame) {
3566			if (em_get_buf(adapter, i) != 0) {
3567				ifp->if_iqdrops++;
3568				goto discard;
3569			}
3570
3571			/* Assign correct length to the current fragment */
3572			mp->m_len = len;
3573
3574			if (adapter->fmp == NULL) {
3575				mp->m_pkthdr.len = len;
3576				adapter->fmp = mp; /* Store the first mbuf */
3577				adapter->lmp = mp;
3578			} else {
3579				/* Chain mbuf's together */
3580				mp->m_flags &= ~M_PKTHDR;
3581				/*
3582				 * Adjust length of previous mbuf in chain if
3583				 * we received less than 4 bytes in the last
3584				 * descriptor.
3585				 */
3586				if (prev_len_adj > 0) {
3587					adapter->lmp->m_len -= prev_len_adj;
3588					adapter->fmp->m_pkthdr.len -=
3589					    prev_len_adj;
3590				}
3591				adapter->lmp->m_next = mp;
3592				adapter->lmp = adapter->lmp->m_next;
3593				adapter->fmp->m_pkthdr.len += len;
3594			}
3595
3596			if (eop) {
3597				adapter->fmp->m_pkthdr.rcvif = ifp;
3598				ifp->if_ipackets++;
3599				em_receive_checksum(adapter, current_desc,
3600				    adapter->fmp);
3601#ifndef __NO_STRICT_ALIGNMENT
3602				if (adapter->hw.max_frame_size >
3603				    (MCLBYTES - ETHER_ALIGN) &&
3604				    em_fixup_rx(adapter) != 0)
3605					goto skip;
3606#endif
3607				if (status & E1000_RXD_STAT_VP) {
3608					adapter->fmp->m_pkthdr.ether_vtag =
3609					    (le16toh(current_desc->special) &
3610					    E1000_RXD_SPC_VLAN_MASK);
3611					adapter->fmp->m_flags |= M_VLANTAG;
3612				}
3613#ifndef __NO_STRICT_ALIGNMENT
3614skip:
3615#endif
3616				m = adapter->fmp;
3617				adapter->fmp = NULL;
3618				adapter->lmp = NULL;
3619			}
3620		} else {
3621			ifp->if_ierrors++;
3622discard:
3623			/* Reuse loaded DMA map and just update mbuf chain */
3624			mp = adapter->rx_buffer_area[i].m_head;
3625			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3626			mp->m_data = mp->m_ext.ext_buf;
3627			mp->m_next = NULL;
3628			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3629				m_adj(mp, ETHER_ALIGN);
3630			if (adapter->fmp != NULL) {
3631				m_freem(adapter->fmp);
3632				adapter->fmp = NULL;
3633				adapter->lmp = NULL;
3634			}
3635			m = NULL;
3636		}
3637
3638		/* Zero out the receive descriptors status. */
3639		current_desc->status = 0;
3640		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3641		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3642
3643		/* Advance our pointers to the next descriptor. */
3644		if (++i == adapter->num_rx_desc)
3645			i = 0;
3646		if (m != NULL) {
3647			adapter->next_rx_desc_to_check = i;
3648#ifdef DEVICE_POLLING
3649			EM_UNLOCK(adapter);
3650			(*ifp->if_input)(ifp, m);
3651			EM_LOCK(adapter);
3652#else
3653			(*ifp->if_input)(ifp, m);
3654#endif
3655			i = adapter->next_rx_desc_to_check;
3656		}
3657		current_desc = &adapter->rx_desc_base[i];
3658	}
3659	adapter->next_rx_desc_to_check = i;
3660
3661	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3662	if (--i < 0)
3663		i = adapter->num_rx_desc - 1;
3664	E1000_WRITE_REG(&adapter->hw, RDT, i);
3665	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3666		return (0);
3667
3668	return (1);
3669}
3670
3671#ifndef __NO_STRICT_ALIGNMENT
3672/*
3673 * When jumbo frames are enabled we should realign entire payload on
3674 * architecures with strict alignment. This is serious design mistake of 8254x
3675 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3676 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3677 * payload. On architecures without strict alignment restrictions 8254x still
3678 * performs unaligned memory access which would reduce the performance too.
3679 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3680 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3681 * existing mbuf chain.
3682 *
3683 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3684 * not used at all on architectures with strict alignment.
3685 */
3686static int
3687em_fixup_rx(struct adapter *adapter)
3688{
3689	struct mbuf *m, *n;
3690	int error;
3691
3692	error = 0;
3693	m = adapter->fmp;
3694	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3695		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3696		m->m_data += ETHER_HDR_LEN;
3697	} else {
3698		MGETHDR(n, M_DONTWAIT, MT_DATA);
3699		if (n != NULL) {
3700			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3701			m->m_data += ETHER_HDR_LEN;
3702			m->m_len -= ETHER_HDR_LEN;
3703			n->m_len = ETHER_HDR_LEN;
3704			M_MOVE_PKTHDR(n, m);
3705			n->m_next = m;
3706			adapter->fmp = n;
3707		} else {
3708			adapter->ifp->if_iqdrops++;
3709			adapter->mbuf_alloc_failed++;
3710			m_freem(adapter->fmp);
3711			adapter->fmp = NULL;
3712			adapter->lmp = NULL;
3713			error = ENOBUFS;
3714		}
3715	}
3716
3717	return (error);
3718}
3719#endif
3720
3721/*********************************************************************
3722 *
3723 *  Verify that the hardware indicated that the checksum is valid.
3724 *  Inform the stack about the status of checksum so that stack
3725 *  doesn't spend time verifying the checksum.
3726 *
3727 *********************************************************************/
3728static void
3729em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3730		    struct mbuf *mp)
3731{
3732	/* 82543 or newer only */
3733	if ((adapter->hw.mac_type < em_82543) ||
3734	    /* Ignore Checksum bit is set */
3735	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3736		mp->m_pkthdr.csum_flags = 0;
3737		return;
3738	}
3739
3740	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3741		/* Did it pass? */
3742		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3743			/* IP Checksum Good */
3744			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3745			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3746
3747		} else {
3748			mp->m_pkthdr.csum_flags = 0;
3749		}
3750	}
3751
3752	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3753		/* Did it pass? */
3754		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3755			mp->m_pkthdr.csum_flags |=
3756			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3757			mp->m_pkthdr.csum_data = htons(0xffff);
3758		}
3759	}
3760}
3761
3762
3763static void
3764em_enable_vlans(struct adapter *adapter)
3765{
3766	uint32_t ctrl;
3767
3768	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3769
3770	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3771	ctrl |= E1000_CTRL_VME;
3772	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3773}
3774
3775static void
3776em_disable_vlans(struct adapter *adapter)
3777{
3778	uint32_t ctrl;
3779
3780	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3781	ctrl &= ~E1000_CTRL_VME;
3782	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3783}
3784
3785static void
3786em_enable_intr(struct adapter *adapter)
3787{
3788	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3789}
3790
3791static void
3792em_disable_intr(struct adapter *adapter)
3793{
3794	/*
3795	 * The first version of 82542 had an errata where when link was forced
3796	 * it would stay up even up even if the cable was disconnected.
3797	 * Sequence errors were used to detect the disconnect and then the
3798	 * driver would unforce the link. This code in the in the ISR. For this
3799	 * to work correctly the Sequence error interrupt had to be enabled
3800	 * all the time.
3801	 */
3802
3803	if (adapter->hw.mac_type == em_82542_rev2_0)
3804	    E1000_WRITE_REG(&adapter->hw, IMC,
3805		(0xffffffff & ~E1000_IMC_RXSEQ));
3806	else
3807	    E1000_WRITE_REG(&adapter->hw, IMC,
3808		0xffffffff);
3809}
3810
3811static int
3812em_is_valid_ether_addr(uint8_t *addr)
3813{
3814	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3815
3816	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3817		return (FALSE);
3818	}
3819
3820	return (TRUE);
3821}
3822
3823void
3824em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3825{
3826	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3827}
3828
3829void
3830em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3831{
3832	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3833}
3834
3835void
3836em_pci_set_mwi(struct em_hw *hw)
3837{
3838	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3839	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3840}
3841
3842void
3843em_pci_clear_mwi(struct em_hw *hw)
3844{
3845	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3846	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3847}
3848
3849/*
3850 * We may eventually really do this, but its unnecessary
3851 * for now so we just return unsupported.
3852 */
3853int32_t
3854em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3855{
3856	return (0);
3857}
3858
3859/*********************************************************************
3860* 82544 Coexistence issue workaround.
3861*    There are 2 issues.
3862*       1. Transmit Hang issue.
3863*    To detect this issue, following equation can be used...
3864*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3865*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3866*
3867*       2. DAC issue.
3868*    To detect this issue, following equation can be used...
3869*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3870*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3871*
3872*
3873*    WORKAROUND:
3874*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3875*
3876*** *********************************************************************/
3877static uint32_t
3878em_fill_descriptors (bus_addr_t address, uint32_t length,
3879		PDESC_ARRAY desc_array)
3880{
3881	/* Since issue is sensitive to length and address.*/
3882	/* Let us first check the address...*/
3883	uint32_t safe_terminator;
3884	if (length <= 4) {
3885		desc_array->descriptor[0].address = address;
3886		desc_array->descriptor[0].length = length;
3887		desc_array->elements = 1;
3888		return (desc_array->elements);
3889	}
3890	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3891	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3892	if (safe_terminator == 0   ||
3893	(safe_terminator > 4   &&
3894	safe_terminator < 9)   ||
3895	(safe_terminator > 0xC &&
3896	safe_terminator <= 0xF)) {
3897		desc_array->descriptor[0].address = address;
3898		desc_array->descriptor[0].length = length;
3899		desc_array->elements = 1;
3900		return (desc_array->elements);
3901	}
3902
3903	desc_array->descriptor[0].address = address;
3904	desc_array->descriptor[0].length = length - 4;
3905	desc_array->descriptor[1].address = address + (length - 4);
3906	desc_array->descriptor[1].length = 4;
3907	desc_array->elements = 2;
3908	return (desc_array->elements);
3909}
3910
3911/**********************************************************************
3912 *
3913 *  Update the board statistics counters.
3914 *
3915 **********************************************************************/
3916static void
3917em_update_stats_counters(struct adapter *adapter)
3918{
3919	struct ifnet   *ifp;
3920
3921	if(adapter->hw.media_type == em_media_type_copper ||
3922	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3923		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3924		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3925	}
3926	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3927	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3928	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3929	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3930
3931	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3932	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3933	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3934	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3935	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3936	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3937	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3938	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3939	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3940	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3941	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3942	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3943	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3944	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3945	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3946	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3947	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3948	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3949	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3950	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3951
3952	/* For the 64-bit byte counters the low dword must be read first. */
3953	/* Both registers clear on the read of the high dword */
3954
3955	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3956	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3957	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3958	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3959
3960	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3961	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3962	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3963	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3964	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3965
3966	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3967	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3968	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3969	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3970
3971	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3972	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3973	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3974	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3975	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3976	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3977	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3978	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3979	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3980	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3981
3982	if (adapter->hw.mac_type >= em_82543) {
3983		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3984		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3985		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3986		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3987		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3988		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3989	}
3990	ifp = adapter->ifp;
3991
3992	ifp->if_collisions = adapter->stats.colc;
3993
3994	/* Rx Errors */
3995	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3996	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3997	    adapter->stats.mpc + adapter->stats.cexterr;
3998
3999	/* Tx Errors */
4000	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
4001	    adapter->watchdog_events;
4002}
4003
4004
4005/**********************************************************************
4006 *
4007 *  This routine is called only when em_display_debug_stats is enabled.
4008 *  This routine provides a way to take a look at important statistics
4009 *  maintained by the driver and hardware.
4010 *
4011 **********************************************************************/
4012static void
4013em_print_debug_info(struct adapter *adapter)
4014{
4015	device_t dev = adapter->dev;
4016	uint8_t *hw_addr = adapter->hw.hw_addr;
4017
4018	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4019	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4020	    E1000_READ_REG(&adapter->hw, CTRL),
4021	    E1000_READ_REG(&adapter->hw, RCTL));
4022	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4023	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
4024	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
4025	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4026	    adapter->hw.fc_high_water,
4027	    adapter->hw.fc_low_water);
4028	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4029	    E1000_READ_REG(&adapter->hw, TIDV),
4030	    E1000_READ_REG(&adapter->hw, TADV));
4031	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4032	    E1000_READ_REG(&adapter->hw, RDTR),
4033	    E1000_READ_REG(&adapter->hw, RADV));
4034	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4035	    (long long)adapter->tx_fifo_wrk_cnt,
4036	    (long long)adapter->tx_fifo_reset_cnt);
4037	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4038	    E1000_READ_REG(&adapter->hw, TDH),
4039	    E1000_READ_REG(&adapter->hw, TDT));
4040	device_printf(dev, "Num Tx descriptors avail = %d\n",
4041	    adapter->num_tx_desc_avail);
4042	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4043	    adapter->no_tx_desc_avail1);
4044	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4045	    adapter->no_tx_desc_avail2);
4046	device_printf(dev, "Std mbuf failed = %ld\n",
4047	    adapter->mbuf_alloc_failed);
4048	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4049	    adapter->mbuf_cluster_failed);
4050}
4051
4052static void
4053em_print_hw_stats(struct adapter *adapter)
4054{
4055	device_t dev = adapter->dev;
4056
4057	device_printf(dev, "Excessive collisions = %lld\n",
4058	    (long long)adapter->stats.ecol);
4059	device_printf(dev, "Symbol errors = %lld\n",
4060	    (long long)adapter->stats.symerrs);
4061	device_printf(dev, "Sequence errors = %lld\n",
4062	    (long long)adapter->stats.sec);
4063	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4064
4065	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4066	device_printf(dev, "Receive No Buffers = %lld\n",
4067	    (long long)adapter->stats.rnbc);
4068	/* RLEC is inaccurate on some hardware, calculate our own. */
4069	device_printf(dev, "Receive Length Errors = %lld\n",
4070	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4071	device_printf(dev, "Receive errors = %lld\n",
4072	    (long long)adapter->stats.rxerrc);
4073	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4074	device_printf(dev, "Alignment errors = %lld\n",
4075	    (long long)adapter->stats.algnerrc);
4076	device_printf(dev, "Carrier extension errors = %lld\n",
4077	    (long long)adapter->stats.cexterr);
4078	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4079	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4080
4081	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4082	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4083	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4084	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4085
4086	device_printf(dev, "Good Packets Rcvd = %lld\n",
4087	    (long long)adapter->stats.gprc);
4088	device_printf(dev, "Good Packets Xmtd = %lld\n",
4089	    (long long)adapter->stats.gptc);
4090	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4091	    (long long)adapter->stats.tsctc);
4092	device_printf(dev, "TSO Contexts Failed = %lld\n",
4093	    (long long)adapter->stats.tsctfc);
4094}
4095
4096static int
4097em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4098{
4099	struct adapter *adapter;
4100	int error;
4101	int result;
4102
4103	result = -1;
4104	error = sysctl_handle_int(oidp, &result, 0, req);
4105
4106	if (error || !req->newptr)
4107		return (error);
4108
4109	if (result == 1) {
4110		adapter = (struct adapter *)arg1;
4111		em_print_debug_info(adapter);
4112	}
4113
4114	return (error);
4115}
4116
4117
4118static int
4119em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4120{
4121	struct adapter *adapter;
4122	int error;
4123	int result;
4124
4125	result = -1;
4126	error = sysctl_handle_int(oidp, &result, 0, req);
4127
4128	if (error || !req->newptr)
4129		return (error);
4130
4131	if (result == 1) {
4132		adapter = (struct adapter *)arg1;
4133		em_print_hw_stats(adapter);
4134	}
4135
4136	return (error);
4137}
4138
4139static int
4140em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4141{
4142	struct em_int_delay_info *info;
4143	struct adapter *adapter;
4144	uint32_t regval;
4145	int error;
4146	int usecs;
4147	int ticks;
4148
4149	info = (struct em_int_delay_info *)arg1;
4150	usecs = info->value;
4151	error = sysctl_handle_int(oidp, &usecs, 0, req);
4152	if (error != 0 || req->newptr == NULL)
4153		return (error);
4154	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4155		return (EINVAL);
4156	info->value = usecs;
4157	ticks = E1000_USECS_TO_TICKS(usecs);
4158
4159	adapter = info->adapter;
4160
4161	EM_LOCK(adapter);
4162	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4163	regval = (regval & ~0xffff) | (ticks & 0xffff);
4164	/* Handle a few special cases. */
4165	switch (info->offset) {
4166	case E1000_RDTR:
4167	case E1000_82542_RDTR:
4168		regval |= E1000_RDT_FPDB;
4169		break;
4170	case E1000_TIDV:
4171	case E1000_82542_TIDV:
4172		if (ticks == 0) {
4173			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4174			/* Don't write 0 into the TIDV register. */
4175			regval++;
4176		} else
4177			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4178		break;
4179	}
4180	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4181	EM_UNLOCK(adapter);
4182	return (0);
4183}
4184
4185static void
4186em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4187	const char *description, struct em_int_delay_info *info,
4188	int offset, int value)
4189{
4190	info->adapter = adapter;
4191	info->offset = offset;
4192	info->value = value;
4193	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4194	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4195	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4196	    info, 0, em_sysctl_int_delay, "I", description);
4197}
4198
4199#ifndef DEVICE_POLLING
4200static void
4201em_add_int_process_limit(struct adapter *adapter, const char *name,
4202	const char *description, int *limit, int value)
4203{
4204	*limit = value;
4205	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4206	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4207	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4208}
4209#endif
4210