if_em.c revision 163828
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 163828 2006-10-31 17:21:14Z jhb $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79#include <dev/em/if_em_hw.h>
80#include <dev/em/if_em.h>
81
82/*********************************************************************
83 *  Set this to one to display debug statistics
84 *********************************************************************/
85int	em_display_debug_stats = 0;
86
87/*********************************************************************
88 *  Driver version
89 *********************************************************************/
90
91char em_driver_version[] = "Version - 6.2.9";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into em_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static em_vendor_info_t em_vendor_info_array[] =
105{
106	/* Intel(R) PRO/1000 Network Connection */
107	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
112
113	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
122
123	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125
126	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136
137	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147
148	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
151
152	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE,
158						PCI_ANY_ID, PCI_ANY_ID, 0},
159
160	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
164
165	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
183
184	/* required last entry */
185	{ 0, 0, 0, 0, 0}
186};
187
188/*********************************************************************
189 *  Table of branding strings for all supported NICs.
190 *********************************************************************/
191
192static char *em_strings[] = {
193	"Intel(R) PRO/1000 Network Connection"
194};
195
196/*********************************************************************
197 *  Function prototypes
198 *********************************************************************/
199static int	em_probe(device_t);
200static int	em_attach(device_t);
201static int	em_detach(device_t);
202static int	em_shutdown(device_t);
203static int	em_suspend(device_t);
204static int	em_resume(device_t);
205static void	em_start(struct ifnet *);
206static void	em_start_locked(struct ifnet *ifp);
207static int	em_ioctl(struct ifnet *, u_long, caddr_t);
208static void	em_watchdog(struct ifnet *);
209static void	em_init(void *);
210static void	em_init_locked(struct adapter *);
211static void	em_stop(void *);
212static void	em_media_status(struct ifnet *, struct ifmediareq *);
213static int	em_media_change(struct ifnet *);
214static void	em_identify_hardware(struct adapter *);
215static int	em_allocate_pci_resources(struct adapter *);
216static int	em_allocate_intr(struct adapter *);
217static void	em_free_intr(struct adapter *);
218static void	em_free_pci_resources(struct adapter *);
219static void	em_local_timer(void *);
220static int	em_hardware_init(struct adapter *);
221static void	em_setup_interface(device_t, struct adapter *);
222static void	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_setup_receive_structures(struct adapter *);
225static void	em_initialize_receive_unit(struct adapter *);
226static void	em_enable_intr(struct adapter *);
227static void	em_disable_intr(struct adapter *);
228static void	em_free_transmit_structures(struct adapter *);
229static void	em_free_receive_structures(struct adapter *);
230static void	em_update_stats_counters(struct adapter *);
231static void	em_txeof(struct adapter *);
232static int	em_allocate_receive_structures(struct adapter *);
233static int	em_allocate_transmit_structures(struct adapter *);
234static int	em_rxeof(struct adapter *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct adapter *);
237#endif
238static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
239		    struct mbuf *);
240static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
241		    uint32_t *, uint32_t *);
242static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
243		    uint32_t *, uint32_t *);
244static void	em_set_promisc(struct adapter *);
245static void	em_disable_promisc(struct adapter *);
246static void	em_set_multi(struct adapter *);
247static void	em_print_hw_stats(struct adapter *);
248static void	em_update_link_status(struct adapter *);
249static int	em_get_buf(struct adapter *, int);
250static void	em_enable_vlans(struct adapter *);
251static void	em_disable_vlans(struct adapter *);
252static int	em_encap(struct adapter *, struct mbuf **);
253static void	em_smartspeed(struct adapter *);
254static int	em_82547_fifo_workaround(struct adapter *, int);
255static void	em_82547_update_fifo_head(struct adapter *, int);
256static int	em_82547_tx_fifo_reset(struct adapter *);
257static void	em_82547_move_tail(void *arg);
258static int	em_dma_malloc(struct adapter *, bus_size_t,
259		struct em_dma_alloc *, int);
260static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
261static void	em_print_debug_info(struct adapter *);
262static int 	em_is_valid_ether_addr(uint8_t *);
263static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
264static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
265static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
266		    PDESC_ARRAY desc_array);
267static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
268static void	em_add_int_delay_sysctl(struct adapter *, const char *,
269		const char *, struct em_int_delay_info *, int, int);
270
271/*
272 * Fast interrupt handler and legacy ithread/polling modes are
273 * mutually exclusive.
274 */
275#ifdef DEVICE_POLLING
276static poll_handler_t em_poll;
277static void	em_intr(void *);
278#else
279static void	em_intr_fast(void *);
280static void	em_add_int_process_limit(struct adapter *, const char *,
281		const char *, int *, int);
282static void	em_handle_rxtx(void *context, int pending);
283static void	em_handle_link(void *context, int pending);
284#endif
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t em_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, em_probe),
293	DEVMETHOD(device_attach, em_attach),
294	DEVMETHOD(device_detach, em_detach),
295	DEVMETHOD(device_shutdown, em_shutdown),
296	DEVMETHOD(device_suspend, em_suspend),
297	DEVMETHOD(device_resume, em_resume),
298	{0, 0}
299};
300
301static driver_t em_driver = {
302	"em", em_methods, sizeof(struct adapter),
303};
304
305static devclass_t em_devclass;
306DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
307MODULE_DEPEND(em, pci, 1, 1, 1);
308MODULE_DEPEND(em, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
315#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
316#define M_TSO_LEN			66
317
318static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
319static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
320static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
321static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
322static int em_rxd = EM_DEFAULT_RXD;
323static int em_txd = EM_DEFAULT_TXD;
324static int em_smart_pwr_down = FALSE;
325
326TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
327TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
328TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
329TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
330TUNABLE_INT("hw.em.rxd", &em_rxd);
331TUNABLE_INT("hw.em.txd", &em_txd);
332TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
333#ifndef DEVICE_POLLING
334static int em_rx_process_limit = 100;
335TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
336#endif
337
338/*********************************************************************
339 *  Device identification routine
340 *
341 *  em_probe determines if the driver should be loaded on
342 *  adapter based on PCI vendor/device id of the adapter.
343 *
344 *  return BUS_PROBE_DEFAULT on success, positive on failure
345 *********************************************************************/
346
347static int
348em_probe(device_t dev)
349{
350	char		adapter_name[60];
351	uint16_t	pci_vendor_id = 0;
352	uint16_t	pci_device_id = 0;
353	uint16_t	pci_subvendor_id = 0;
354	uint16_t	pci_subdevice_id = 0;
355	em_vendor_info_t *ent;
356
357	INIT_DEBUGOUT("em_probe: begin");
358
359	pci_vendor_id = pci_get_vendor(dev);
360	if (pci_vendor_id != EM_VENDOR_ID)
361		return (ENXIO);
362
363	pci_device_id = pci_get_device(dev);
364	pci_subvendor_id = pci_get_subvendor(dev);
365	pci_subdevice_id = pci_get_subdevice(dev);
366
367	ent = em_vendor_info_array;
368	while (ent->vendor_id != 0) {
369		if ((pci_vendor_id == ent->vendor_id) &&
370		    (pci_device_id == ent->device_id) &&
371
372		    ((pci_subvendor_id == ent->subvendor_id) ||
373		    (ent->subvendor_id == PCI_ANY_ID)) &&
374
375		    ((pci_subdevice_id == ent->subdevice_id) ||
376		    (ent->subdevice_id == PCI_ANY_ID))) {
377			sprintf(adapter_name, "%s %s",
378				em_strings[ent->index],
379				em_driver_version);
380			device_set_desc_copy(dev, adapter_name);
381			return (BUS_PROBE_DEFAULT);
382		}
383		ent++;
384	}
385
386	return (ENXIO);
387}
388
389/*********************************************************************
390 *  Device initialization routine
391 *
392 *  The attach entry point is called when the driver is being loaded.
393 *  This routine identifies the type of hardware, allocates all resources
394 *  and initializes the hardware.
395 *
396 *  return 0 on success, positive on failure
397 *********************************************************************/
398
399static int
400em_attach(device_t dev)
401{
402	struct adapter	*adapter;
403	int		tsize, rsize;
404	int		error = 0;
405
406	INIT_DEBUGOUT("em_attach: begin");
407
408	adapter = device_get_softc(dev);
409	adapter->dev = adapter->osdep.dev = dev;
410	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
411
412	/* SYSCTL stuff */
413	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416	    em_sysctl_debug_info, "I", "Debug Information");
417
418	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421	    em_sysctl_stats, "I", "Statistics");
422
423	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
424	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
425
426	/* Determine hardware revision */
427	em_identify_hardware(adapter);
428
429	/* Set up some sysctls for the tunable interrupt delays */
430	em_add_int_delay_sysctl(adapter, "rx_int_delay",
431	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
432	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
433	em_add_int_delay_sysctl(adapter, "tx_int_delay",
434	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
435	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
436	if (adapter->hw.mac_type >= em_82540) {
437		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
438		    "receive interrupt delay limit in usecs",
439		    &adapter->rx_abs_int_delay,
440		    E1000_REG_OFFSET(&adapter->hw, RADV),
441		    em_rx_abs_int_delay_dflt);
442		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
443		    "transmit interrupt delay limit in usecs",
444		    &adapter->tx_abs_int_delay,
445		    E1000_REG_OFFSET(&adapter->hw, TADV),
446		    em_tx_abs_int_delay_dflt);
447	}
448
449#ifndef DEVICE_POLLING
450	/* Sysctls for limiting the amount of work done in the taskqueue */
451	em_add_int_process_limit(adapter, "rx_processing_limit",
452	    "max number of rx packets to process", &adapter->rx_process_limit,
453	    em_rx_process_limit);
454#endif
455
456	/*
457	 * Validate number of transmit and receive descriptors. It
458	 * must not exceed hardware maximum, and must be multiple
459	 * of EM_DBA_ALIGN.
460	 */
461	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
462	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
463	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
464	    (em_txd < EM_MIN_TXD)) {
465		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
466		    EM_DEFAULT_TXD, em_txd);
467		adapter->num_tx_desc = EM_DEFAULT_TXD;
468	} else
469		adapter->num_tx_desc = em_txd;
470	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
471	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
472	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
473	    (em_rxd < EM_MIN_RXD)) {
474		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
475		    EM_DEFAULT_RXD, em_rxd);
476		adapter->num_rx_desc = EM_DEFAULT_RXD;
477	} else
478		adapter->num_rx_desc = em_rxd;
479
480	adapter->hw.autoneg = DO_AUTO_NEG;
481	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
482	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
483	adapter->hw.tbi_compatibility_en = TRUE;
484	adapter->rx_buffer_len = EM_RXBUFFER_2048;
485
486	adapter->hw.phy_init_script = 1;
487	adapter->hw.phy_reset_disable = FALSE;
488
489#ifndef EM_MASTER_SLAVE
490	adapter->hw.master_slave = em_ms_hw_default;
491#else
492	adapter->hw.master_slave = EM_MASTER_SLAVE;
493#endif
494	/*
495	 * Set the max frame size assuming standard ethernet
496	 * sized frames.
497	 */
498	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
499
500	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
501
502	/*
503	 * This controls when hardware reports transmit completion
504	 * status.
505	 */
506	adapter->hw.report_tx_early = 1;
507	if (em_allocate_pci_resources(adapter)) {
508		device_printf(dev, "Allocation of PCI resources failed\n");
509		error = ENXIO;
510		goto err_pci;
511	}
512
513	/* Initialize eeprom parameters */
514	em_init_eeprom_params(&adapter->hw);
515
516	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
517	    EM_DBA_ALIGN);
518
519	/* Allocate Transmit Descriptor ring */
520	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
521		device_printf(dev, "Unable to allocate tx_desc memory\n");
522		error = ENOMEM;
523		goto err_tx_desc;
524	}
525	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
526
527	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
528	    EM_DBA_ALIGN);
529
530	/* Allocate Receive Descriptor ring */
531	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
532		device_printf(dev, "Unable to allocate rx_desc memory\n");
533		error = ENOMEM;
534		goto err_rx_desc;
535	}
536	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
537
538	/* Initialize the hardware */
539	if (em_hardware_init(adapter)) {
540		device_printf(dev, "Unable to initialize the hardware\n");
541		error = EIO;
542		goto err_hw_init;
543	}
544
545	/* Copy the permanent MAC address out of the EEPROM */
546	if (em_read_mac_addr(&adapter->hw) < 0) {
547		device_printf(dev, "EEPROM read error while reading MAC"
548		    " address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
554		device_printf(dev, "Invalid MAC address\n");
555		error = EIO;
556		goto err_hw_init;
557	}
558
559	/* Allocate transmit descriptors and buffers */
560	if (em_allocate_transmit_structures(adapter)) {
561		device_printf(dev, "Could not setup transmit structures\n");
562		error = ENOMEM;
563		goto err_tx_struct;
564	}
565
566	/* Allocate receive descriptors and buffers */
567	if (em_allocate_receive_structures(adapter)) {
568		device_printf(dev, "Could not setup receive structures\n");
569		error = ENOMEM;
570		goto err_rx_struct;
571	}
572
573	/* Setup OS specific network interface */
574	em_setup_interface(dev, adapter);
575
576	em_allocate_intr(adapter);
577
578	/* Initialize statistics */
579	em_clear_hw_cntrs(&adapter->hw);
580	em_update_stats_counters(adapter);
581	adapter->hw.get_link_status = 1;
582	em_update_link_status(adapter);
583
584	/* Indicate SOL/IDER usage */
585	if (em_check_phy_reset_block(&adapter->hw))
586		device_printf(dev,
587		    "PHY reset is blocked due to SOL/IDER session.\n");
588
589	/* Identify 82544 on PCIX */
590	em_get_bus_info(&adapter->hw);
591	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
592		adapter->pcix_82544 = TRUE;
593	else
594		adapter->pcix_82544 = FALSE;
595
596	INIT_DEBUGOUT("em_attach: end");
597
598	return (0);
599
600err_rx_struct:
601	em_free_transmit_structures(adapter);
602err_tx_struct:
603err_hw_init:
604	em_dma_free(adapter, &adapter->rxdma);
605err_rx_desc:
606	em_dma_free(adapter, &adapter->txdma);
607err_tx_desc:
608err_pci:
609	em_free_intr(adapter);
610	em_free_pci_resources(adapter);
611	EM_LOCK_DESTROY(adapter);
612
613	return (error);
614}
615
616/*********************************************************************
617 *  Device removal routine
618 *
619 *  The detach entry point is called when the driver is being removed.
620 *  This routine stops the adapter and deallocates all the resources
621 *  that were allocated for driver operation.
622 *
623 *  return 0 on success, positive on failure
624 *********************************************************************/
625
626static int
627em_detach(device_t dev)
628{
629	struct adapter	*adapter = device_get_softc(dev);
630	struct ifnet	*ifp = adapter->ifp;
631
632	INIT_DEBUGOUT("em_detach: begin");
633
634#ifdef DEVICE_POLLING
635	if (ifp->if_capenable & IFCAP_POLLING)
636		ether_poll_deregister(ifp);
637#endif
638
639	em_free_intr(adapter);
640	EM_LOCK(adapter);
641	adapter->in_detach = 1;
642	em_stop(adapter);
643	em_phy_hw_reset(&adapter->hw);
644	EM_UNLOCK(adapter);
645	ether_ifdetach(adapter->ifp);
646
647	callout_drain(&adapter->timer);
648	callout_drain(&adapter->tx_fifo_timer);
649
650	em_free_pci_resources(adapter);
651	bus_generic_detach(dev);
652	if_free(ifp);
653
654	em_free_transmit_structures(adapter);
655	em_free_receive_structures(adapter);
656
657	/* Free Transmit Descriptor ring */
658	if (adapter->tx_desc_base) {
659		em_dma_free(adapter, &adapter->txdma);
660		adapter->tx_desc_base = NULL;
661	}
662
663	/* Free Receive Descriptor ring */
664	if (adapter->rx_desc_base) {
665		em_dma_free(adapter, &adapter->rxdma);
666		adapter->rx_desc_base = NULL;
667	}
668
669	EM_LOCK_DESTROY(adapter);
670
671	return (0);
672}
673
674/*********************************************************************
675 *
676 *  Shutdown entry point
677 *
678 **********************************************************************/
679
680static int
681em_shutdown(device_t dev)
682{
683	struct adapter *adapter = device_get_softc(dev);
684	EM_LOCK(adapter);
685	em_stop(adapter);
686	EM_UNLOCK(adapter);
687	return (0);
688}
689
690/*
691 * Suspend/resume device methods.
692 */
693static int
694em_suspend(device_t dev)
695{
696	struct adapter *adapter = device_get_softc(dev);
697
698	EM_LOCK(adapter);
699	em_stop(adapter);
700	EM_UNLOCK(adapter);
701
702	return bus_generic_suspend(dev);
703}
704
705static int
706em_resume(device_t dev)
707{
708	struct adapter *adapter = device_get_softc(dev);
709	struct ifnet *ifp = adapter->ifp;
710
711	EM_LOCK(adapter);
712	em_init_locked(adapter);
713	if ((ifp->if_flags & IFF_UP) &&
714	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
715		em_start_locked(ifp);
716	EM_UNLOCK(adapter);
717
718	return bus_generic_resume(dev);
719}
720
721
722/*********************************************************************
723 *  Transmit entry point
724 *
725 *  em_start is called by the stack to initiate a transmit.
726 *  The driver will remain in this routine as long as there are
727 *  packets to transmit and transmit resources are available.
728 *  In case resources are not available stack is notified and
729 *  the packet is requeued.
730 **********************************************************************/
731
732static void
733em_start_locked(struct ifnet *ifp)
734{
735	struct adapter	*adapter = ifp->if_softc;
736	struct mbuf	*m_head;
737
738	EM_LOCK_ASSERT(adapter);
739
740	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
741	    IFF_DRV_RUNNING)
742		return;
743	if (!adapter->link_active)
744		return;
745
746	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
747
748		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
749		if (m_head == NULL)
750			break;
751		/*
752		 * em_encap() can modify our pointer, and or make it NULL on
753		 * failure.  In that event, we can't requeue.
754		 */
755		if (em_encap(adapter, &m_head)) {
756			if (m_head == NULL)
757				break;
758			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
759			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
760			break;
761		}
762
763		/* Send a copy of the frame to the BPF listener */
764		BPF_MTAP(ifp, m_head);
765
766		/* Set timeout in case hardware has problems transmitting. */
767		ifp->if_timer = EM_TX_TIMEOUT;
768	}
769}
770
771static void
772em_start(struct ifnet *ifp)
773{
774	struct adapter *adapter = ifp->if_softc;
775
776	EM_LOCK(adapter);
777	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
778		em_start_locked(ifp);
779	EM_UNLOCK(adapter);
780}
781
782/*********************************************************************
783 *  Ioctl entry point
784 *
785 *  em_ioctl is called when the user wants to configure the
786 *  interface.
787 *
788 *  return 0 on success, positive on failure
789 **********************************************************************/
790
791static int
792em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
793{
794	struct adapter	*adapter = ifp->if_softc;
795	struct ifreq *ifr = (struct ifreq *)data;
796	struct ifaddr *ifa = (struct ifaddr *)data;
797	int error = 0;
798
799	if (adapter->in_detach)
800		return (error);
801
802	switch (command) {
803	case SIOCSIFADDR:
804	case SIOCGIFADDR:
805		if (ifa->ifa_addr->sa_family == AF_INET) {
806			/*
807			 * XXX
808			 * Since resetting hardware takes a very long time
809			 * and results in link renegotiation we only
810			 * initialize the hardware only when it is absolutely
811			 * required.
812			 */
813			ifp->if_flags |= IFF_UP;
814			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
815				EM_LOCK(adapter);
816				em_init_locked(adapter);
817				EM_UNLOCK(adapter);
818			}
819			arp_ifinit(ifp, ifa);
820		} else
821			error = ether_ioctl(ifp, command, data);
822		break;
823	case SIOCSIFMTU:
824	    {
825		int max_frame_size;
826		uint16_t eeprom_data = 0;
827
828		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
829
830		EM_LOCK(adapter);
831		switch (adapter->hw.mac_type) {
832		case em_82573:
833			/*
834			 * 82573 only supports jumbo frames
835			 * if ASPM is disabled.
836			 */
837			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
838			    &eeprom_data);
839			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
840				max_frame_size = ETHER_MAX_LEN;
841				break;
842			}
843			/* Allow Jumbo frames - fall thru */
844		case em_82571:
845		case em_82572:
846		case em_80003es2lan:	/* Limit Jumbo Frame size */
847			max_frame_size = 9234;
848			break;
849		case em_ich8lan:
850			/* ICH8 does not support jumbo frames */
851			max_frame_size = ETHER_MAX_LEN;
852			break;
853		default:
854			max_frame_size = MAX_JUMBO_FRAME_SIZE;
855		}
856		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
857		    ETHER_CRC_LEN) {
858			EM_UNLOCK(adapter);
859			error = EINVAL;
860			break;
861		}
862
863		ifp->if_mtu = ifr->ifr_mtu;
864		adapter->hw.max_frame_size =
865		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
866		em_init_locked(adapter);
867		EM_UNLOCK(adapter);
868		break;
869	    }
870	case SIOCSIFFLAGS:
871		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
872		EM_LOCK(adapter);
873		if (ifp->if_flags & IFF_UP) {
874			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
875				if ((ifp->if_flags ^ adapter->if_flags) &
876				    IFF_PROMISC) {
877					em_disable_promisc(adapter);
878					em_set_promisc(adapter);
879				}
880			} else
881				em_init_locked(adapter);
882		} else {
883			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
884				em_stop(adapter);
885			}
886		}
887		adapter->if_flags = ifp->if_flags;
888		EM_UNLOCK(adapter);
889		break;
890	case SIOCADDMULTI:
891	case SIOCDELMULTI:
892		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
893		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894			EM_LOCK(adapter);
895			em_disable_intr(adapter);
896			em_set_multi(adapter);
897			if (adapter->hw.mac_type == em_82542_rev2_0) {
898				em_initialize_receive_unit(adapter);
899			}
900#ifdef DEVICE_POLLING
901			if (!(ifp->if_capenable & IFCAP_POLLING))
902#endif
903				em_enable_intr(adapter);
904			EM_UNLOCK(adapter);
905		}
906		break;
907	case SIOCSIFMEDIA:
908		/* Check SOL/IDER usage */
909		EM_LOCK(adapter);
910		if (em_check_phy_reset_block(&adapter->hw)) {
911			EM_UNLOCK(adapter);
912			device_printf(adapter->dev, "Media change is"
913			    "blocked due to SOL/IDER session.\n");
914			break;
915		}
916		EM_UNLOCK(adapter);
917	case SIOCGIFMEDIA:
918		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
919		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
920		break;
921	case SIOCSIFCAP:
922	    {
923		int mask, reinit;
924
925		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
926		reinit = 0;
927		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
928#ifdef DEVICE_POLLING
929		if (mask & IFCAP_POLLING) {
930			if (ifr->ifr_reqcap & IFCAP_POLLING) {
931				error = ether_poll_register(em_poll, ifp);
932				if (error)
933					return (error);
934				EM_LOCK(adapter);
935				em_disable_intr(adapter);
936				ifp->if_capenable |= IFCAP_POLLING;
937				EM_UNLOCK(adapter);
938			} else {
939				error = ether_poll_deregister(ifp);
940				/* Enable interrupt even in error case */
941				EM_LOCK(adapter);
942				em_enable_intr(adapter);
943				ifp->if_capenable &= ~IFCAP_POLLING;
944				EM_UNLOCK(adapter);
945			}
946		}
947#endif
948		if (mask & IFCAP_HWCSUM) {
949			ifp->if_capenable ^= IFCAP_HWCSUM;
950			reinit = 1;
951		}
952		if (mask & IFCAP_TSO4) {
953			ifp->if_capenable ^= IFCAP_TSO4;
954			reinit = 1;
955		}
956		if (mask & IFCAP_VLAN_HWTAGGING) {
957			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
958			reinit = 1;
959		}
960		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
961			em_init(adapter);
962		VLAN_CAPABILITIES(ifp);
963		break;
964	    }
965	default:
966		error = ether_ioctl(ifp, command, data);
967		break;
968	}
969
970	return (error);
971}
972
973/*********************************************************************
974 *  Watchdog entry point
975 *
976 *  This routine is called whenever hardware quits transmitting.
977 *
978 **********************************************************************/
979
980static void
981em_watchdog(struct ifnet *ifp)
982{
983	struct adapter *adapter = ifp->if_softc;
984
985	EM_LOCK(adapter);
986	/* If we are in this routine because of pause frames, then
987	 * don't reset the hardware.
988	 */
989	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
990		ifp->if_timer = EM_TX_TIMEOUT;
991		EM_UNLOCK(adapter);
992		return;
993	}
994
995	/*
996	 * Reclaim first as there is a possibility of losing Tx completion
997	 * interrupts. Possible cause of missing Tx completion interrupts
998	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
999	 * or chipset bug.
1000	 */
1001	em_txeof(adapter);
1002	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
1003		EM_UNLOCK(adapter);
1004		return;
1005	}
1006
1007	if (em_check_for_link(&adapter->hw) == 0)
1008		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1009
1010	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1011	adapter->watchdog_events++;
1012
1013	em_init_locked(adapter);
1014	EM_UNLOCK(adapter);
1015}
1016
1017/*********************************************************************
1018 *  Init entry point
1019 *
1020 *  This routine is used in two ways. It is used by the stack as
1021 *  init entry point in network interface structure. It is also used
1022 *  by the driver as a hw/sw initialization routine to get to a
1023 *  consistent state.
1024 *
1025 *  return 0 on success, positive on failure
1026 **********************************************************************/
1027
1028static void
1029em_init_locked(struct adapter *adapter)
1030{
1031	struct ifnet	*ifp = adapter->ifp;
1032	device_t	dev = adapter->dev;
1033	uint32_t	pba;
1034
1035	INIT_DEBUGOUT("em_init: begin");
1036
1037	EM_LOCK_ASSERT(adapter);
1038
1039	em_stop(adapter);
1040
1041	/*
1042	 * Packet Buffer Allocation (PBA)
1043	 * Writing PBA sets the receive portion of the buffer
1044	 * the remainder is used for the transmit buffer.
1045	 *
1046	 * Devices before the 82547 had a Packet Buffer of 64K.
1047	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1048	 * After the 82547 the buffer was reduced to 40K.
1049	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1050	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1051	 */
1052	switch (adapter->hw.mac_type) {
1053	case em_82547:
1054	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1055		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1056			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1057		else
1058			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1059		adapter->tx_fifo_head = 0;
1060		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1061		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1062		break;
1063	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1064	case em_82571: /* 82571: Total Packet Buffer is 48K */
1065	case em_82572: /* 82572: Total Packet Buffer is 48K */
1066			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1067		break;
1068	case em_82573: /* 82573: Total Packet Buffer is 32K */
1069		/* Jumbo frames not supported */
1070			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1071		break;
1072	case em_ich8lan:
1073		pba = E1000_PBA_8K;
1074		break;
1075	default:
1076		/* Devices before 82547 had a Packet Buffer of 64K.   */
1077		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1078			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1079		else
1080			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1081	}
1082
1083	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1084	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1085
1086	/* Get the latest mac address, User can use a LAA */
1087	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1088
1089	/* Initialize the hardware */
1090	if (em_hardware_init(adapter)) {
1091		device_printf(dev, "Unable to initialize the hardware\n");
1092		return;
1093	}
1094	em_update_link_status(adapter);
1095
1096	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1097		em_enable_vlans(adapter);
1098
1099	ifp->if_hwassist = 0;
1100	if (adapter->hw.mac_type >= em_82543) {
1101		if (ifp->if_capenable & IFCAP_TXCSUM)
1102			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1103		/*
1104		 * em_setup_transmit_structures() will behave differently
1105		 * based on the state of TSO.
1106		 */
1107		if (ifp->if_capenable & IFCAP_TSO)
1108			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1109	}
1110
1111	/* Prepare transmit descriptors and buffers */
1112	em_setup_transmit_structures(adapter);
1113	em_initialize_transmit_unit(adapter);
1114
1115	/* Setup Multicast table */
1116	em_set_multi(adapter);
1117
1118	/* Prepare receive descriptors and buffers */
1119	if (em_setup_receive_structures(adapter)) {
1120		device_printf(dev, "Could not setup receive structures\n");
1121		em_stop(adapter);
1122		return;
1123	}
1124	em_initialize_receive_unit(adapter);
1125
1126	/* Don't lose promiscuous settings */
1127	em_set_promisc(adapter);
1128
1129	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1130	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1131
1132	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1133	em_clear_hw_cntrs(&adapter->hw);
1134#ifdef DEVICE_POLLING
1135	/*
1136	 * Only enable interrupts if we are not polling, make sure
1137	 * they are off otherwise.
1138	 */
1139	if (ifp->if_capenable & IFCAP_POLLING)
1140		em_disable_intr(adapter);
1141	else
1142#endif /* DEVICE_POLLING */
1143		em_enable_intr(adapter);
1144
1145	/* Don't reset the phy next time init gets called */
1146	adapter->hw.phy_reset_disable = TRUE;
1147}
1148
1149static void
1150em_init(void *arg)
1151{
1152	struct adapter *adapter = arg;
1153
1154	EM_LOCK(adapter);
1155	em_init_locked(adapter);
1156	EM_UNLOCK(adapter);
1157}
1158
1159
1160#ifdef DEVICE_POLLING
1161/*********************************************************************
1162 *
1163 *  Legacy polling routine
1164 *
1165 *********************************************************************/
1166static void
1167em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1168{
1169	struct adapter *adapter = ifp->if_softc;
1170	uint32_t reg_icr;
1171
1172	EM_LOCK(adapter);
1173	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1174		EM_UNLOCK(adapter);
1175		return;
1176	}
1177
1178	if (cmd == POLL_AND_CHECK_STATUS) {
1179		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1180		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1181			callout_stop(&adapter->timer);
1182			adapter->hw.get_link_status = 1;
1183			em_check_for_link(&adapter->hw);
1184			em_update_link_status(adapter);
1185			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1186		}
1187	}
1188	em_rxeof(adapter, count);
1189	em_txeof(adapter);
1190
1191	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1192		em_start_locked(ifp);
1193	EM_UNLOCK(adapter);
1194}
1195
1196/*********************************************************************
1197 *
1198 *  Legacy Interrupt Service routine
1199 *
1200 *********************************************************************/
1201static void
1202em_intr(void *arg)
1203{
1204	struct adapter	*adapter = arg;
1205	struct ifnet	*ifp;
1206	uint32_t	reg_icr;
1207
1208	EM_LOCK(adapter);
1209
1210	ifp = adapter->ifp;
1211
1212	if (ifp->if_capenable & IFCAP_POLLING) {
1213		EM_UNLOCK(adapter);
1214		return;
1215	}
1216
1217	for (;;) {
1218		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1219		if (adapter->hw.mac_type >= em_82571 &&
1220		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1221			break;
1222		else if (reg_icr == 0)
1223			break;
1224
1225		/*
1226		 * XXX: some laptops trigger several spurious interrupts
1227		 * on em(4) when in the resume cycle. The ICR register
1228		 * reports all-ones value in this case. Processing such
1229		 * interrupts would lead to a freeze. I don't know why.
1230		 */
1231		if (reg_icr == 0xffffffff)
1232			break;
1233
1234		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1235			em_rxeof(adapter, -1);
1236			em_txeof(adapter);
1237		}
1238
1239		/* Link status change */
1240		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1241			callout_stop(&adapter->timer);
1242			adapter->hw.get_link_status = 1;
1243			em_check_for_link(&adapter->hw);
1244			em_update_link_status(adapter);
1245			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1246		}
1247
1248		if (reg_icr & E1000_ICR_RXO)
1249			adapter->rx_overruns++;
1250	}
1251
1252	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1253	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1254		em_start_locked(ifp);
1255
1256	EM_UNLOCK(adapter);
1257}
1258
1259#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1260
1261static void
1262em_handle_link(void *context, int pending)
1263{
1264	struct adapter	*adapter = context;
1265	struct ifnet *ifp;
1266
1267	ifp = adapter->ifp;
1268
1269	EM_LOCK(adapter);
1270	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1271		EM_UNLOCK(adapter);
1272		return;
1273	}
1274
1275	callout_stop(&adapter->timer);
1276	adapter->hw.get_link_status = 1;
1277	em_check_for_link(&adapter->hw);
1278	em_update_link_status(adapter);
1279	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1280	EM_UNLOCK(adapter);
1281}
1282
1283static void
1284em_handle_rxtx(void *context, int pending)
1285{
1286	struct adapter	*adapter = context;
1287	struct ifnet	*ifp;
1288
1289	NET_LOCK_GIANT();
1290	ifp = adapter->ifp;
1291
1292	/*
1293	 * TODO:
1294	 * It should be possible to run the tx clean loop without the lock.
1295	 */
1296	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1297		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1298			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1299		EM_LOCK(adapter);
1300		em_txeof(adapter);
1301
1302		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1303			em_start_locked(ifp);
1304		EM_UNLOCK(adapter);
1305	}
1306
1307	em_enable_intr(adapter);
1308	NET_UNLOCK_GIANT();
1309}
1310
1311/*********************************************************************
1312 *
1313 *  Fast Interrupt Service routine
1314 *
1315 *********************************************************************/
1316static void
1317em_intr_fast(void *arg)
1318{
1319	struct adapter	*adapter = arg;
1320	struct ifnet	*ifp;
1321	uint32_t	reg_icr;
1322
1323	ifp = adapter->ifp;
1324
1325	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1326
1327	/* Hot eject?  */
1328	if (reg_icr == 0xffffffff)
1329		return;
1330
1331	/* Definitely not our interrupt.  */
1332	if (reg_icr == 0x0)
1333		return;
1334
1335	/*
1336	 * Starting with the 82571 chip, bit 31 should be used to
1337	 * determine whether the interrupt belongs to us.
1338	 */
1339	if (adapter->hw.mac_type >= em_82571 &&
1340	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1341		return;
1342
1343	/*
1344	 * Mask interrupts until the taskqueue is finished running.  This is
1345	 * cheap, just assume that it is needed.  This also works around the
1346	 * MSI message reordering errata on certain systems.
1347	 */
1348	em_disable_intr(adapter);
1349	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1350
1351	/* Link status change */
1352	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1353		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1354
1355	if (reg_icr & E1000_ICR_RXO)
1356		adapter->rx_overruns++;
1357}
1358#endif /* ! DEVICE_POLLING */
1359
1360/*********************************************************************
1361 *
1362 *  Media Ioctl callback
1363 *
1364 *  This routine is called whenever the user queries the status of
1365 *  the interface using ifconfig.
1366 *
1367 **********************************************************************/
1368static void
1369em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1370{
1371	struct adapter *adapter = ifp->if_softc;
1372
1373	INIT_DEBUGOUT("em_media_status: begin");
1374
1375	EM_LOCK(adapter);
1376	em_check_for_link(&adapter->hw);
1377	em_update_link_status(adapter);
1378
1379	ifmr->ifm_status = IFM_AVALID;
1380	ifmr->ifm_active = IFM_ETHER;
1381
1382	if (!adapter->link_active) {
1383		EM_UNLOCK(adapter);
1384		return;
1385	}
1386
1387	ifmr->ifm_status |= IFM_ACTIVE;
1388
1389	if ((adapter->hw.media_type == em_media_type_fiber) ||
1390	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1391		if (adapter->hw.mac_type == em_82545)
1392			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1393		else
1394			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1395	} else {
1396		switch (adapter->link_speed) {
1397		case 10:
1398			ifmr->ifm_active |= IFM_10_T;
1399			break;
1400		case 100:
1401			ifmr->ifm_active |= IFM_100_TX;
1402			break;
1403		case 1000:
1404			ifmr->ifm_active |= IFM_1000_T;
1405			break;
1406		}
1407		if (adapter->link_duplex == FULL_DUPLEX)
1408			ifmr->ifm_active |= IFM_FDX;
1409		else
1410			ifmr->ifm_active |= IFM_HDX;
1411	}
1412	EM_UNLOCK(adapter);
1413}
1414
1415/*********************************************************************
1416 *
1417 *  Media Ioctl callback
1418 *
1419 *  This routine is called when the user changes speed/duplex using
1420 *  media/mediopt option with ifconfig.
1421 *
1422 **********************************************************************/
1423static int
1424em_media_change(struct ifnet *ifp)
1425{
1426	struct adapter *adapter = ifp->if_softc;
1427	struct ifmedia  *ifm = &adapter->media;
1428
1429	INIT_DEBUGOUT("em_media_change: begin");
1430
1431	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1432		return (EINVAL);
1433
1434	EM_LOCK(adapter);
1435	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1436	case IFM_AUTO:
1437		adapter->hw.autoneg = DO_AUTO_NEG;
1438		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1439		break;
1440	case IFM_1000_LX:
1441	case IFM_1000_SX:
1442	case IFM_1000_T:
1443		adapter->hw.autoneg = DO_AUTO_NEG;
1444		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1445		break;
1446	case IFM_100_TX:
1447		adapter->hw.autoneg = FALSE;
1448		adapter->hw.autoneg_advertised = 0;
1449		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1450			adapter->hw.forced_speed_duplex = em_100_full;
1451		else
1452			adapter->hw.forced_speed_duplex = em_100_half;
1453		break;
1454	case IFM_10_T:
1455		adapter->hw.autoneg = FALSE;
1456		adapter->hw.autoneg_advertised = 0;
1457		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1458			adapter->hw.forced_speed_duplex = em_10_full;
1459		else
1460			adapter->hw.forced_speed_duplex = em_10_half;
1461		break;
1462	default:
1463		device_printf(adapter->dev, "Unsupported media type\n");
1464	}
1465
1466	/* As the speed/duplex settings my have changed we need to
1467	 * reset the PHY.
1468	 */
1469	adapter->hw.phy_reset_disable = FALSE;
1470
1471	em_init_locked(adapter);
1472	EM_UNLOCK(adapter);
1473
1474	return (0);
1475}
1476
1477/*********************************************************************
1478 *
1479 *  This routine maps the mbufs to tx descriptors.
1480 *
1481 *  return 0 on success, positive on failure
1482 **********************************************************************/
1483static int
1484em_encap(struct adapter *adapter, struct mbuf **m_headp)
1485{
1486	struct ifnet		*ifp = adapter->ifp;
1487	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1488	bus_dmamap_t		map;
1489	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1490	struct em_tx_desc	*current_tx_desc;
1491	struct mbuf		*m_head;
1492	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1493	int			nsegs, i, j, first, last = 0;
1494	int			error, do_tso, tso_desc = 0;
1495
1496	m_head = *m_headp;
1497	current_tx_desc = NULL;
1498	txd_upper = txd_lower = txd_used = txd_saved = 0;
1499
1500	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1501
1502	/*
1503	 * Force a cleanup if number of TX descriptors
1504	 * available hits the threshold.
1505	 */
1506	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1507		em_txeof(adapter);
1508		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1509			adapter->no_tx_desc_avail1++;
1510			return (ENOBUFS);
1511		}
1512	}
1513
1514	/*
1515	 * When operating in promiscuous mode, hardware stripping of the
1516	 * VLAN tag on receive is disabled.  This should not prevent us
1517	 * from doing hardware insertion of the VLAN tag here as that
1518	 * is controlled by the dma descriptor flags and not the receive
1519	 * tag strip setting.  Unfortunatly this hardware switches the
1520	 * VLAN encapsulation type from 802.1q to ISL when stripping om
1521	 * receive is disabled.  This means we have to add the vlan
1522	 * encapsulation here in the driver, since it will have come down
1523	 * from the VLAN layer with a tag instead of a VLAN header.
1524	 */
1525	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1526		struct ether_vlan_header *evl;
1527		struct ether_header eh;
1528
1529		m_head = m_pullup(m_head, sizeof(eh));
1530		if (m_head == NULL) {
1531			*m_headp = NULL;
1532			return (ENOBUFS);
1533		}
1534		eh = *mtod(m_head, struct ether_header *);
1535		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1536		if (m_head == NULL) {
1537			*m_headp = NULL;
1538			return (ENOBUFS);
1539		}
1540		m_head = m_pullup(m_head, sizeof(*evl));
1541		if (m_head == NULL) {
1542			*m_headp = NULL;
1543			return (ENOBUFS);
1544		}
1545		evl = mtod(m_head, struct ether_vlan_header *);
1546		bcopy(&eh, evl, sizeof(*evl));
1547		evl->evl_proto = evl->evl_encap_proto;
1548		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1549		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1550		*m_headp = m_head;
1551	}
1552
1553	/*
1554	 * TSO workaround:
1555	 *  If an mbuf contains only the IP and TCP header we have
1556	 *  to pull 4 bytes of data into it.
1557	 */
1558	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1559		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1560		*m_headp = m_head;
1561		if (m_head == NULL) {
1562			return (ENOBUFS);
1563		}
1564	}
1565
1566	/*
1567	 * Map the packet for DMA.
1568	 *
1569	 * Capture the first descriptor index,
1570	 * this descriptor will have the index
1571	 * of the EOP which is the only one that
1572	 * now gets a DONE bit writeback.
1573	 */
1574	first = adapter->next_avail_tx_desc;
1575	tx_buffer = &adapter->tx_buffer_area[first];
1576	tx_buffer_mapped = tx_buffer;
1577	map = tx_buffer->map;
1578
1579	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1580	    &nsegs, BUS_DMA_NOWAIT);
1581
1582	/*
1583	 * There are two types of errors we can (try) to handle:
1584	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1585	 *   out of segments.  Defragment the mbuf chain and try again.
1586	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1587	 *   at this point in time.  Defer sending and try again later.
1588	 * All other errors, in particular EINVAL, are fatal and prevent the
1589	 * mbuf chain from ever going through.  Drop it and report error.
1590	 */
1591	if (error == EFBIG) {
1592		struct mbuf *m;
1593
1594		m = m_defrag(*m_headp, M_DONTWAIT);
1595		if (m == NULL) {
1596			/* Assume m_defrag(9) used only m_get(9). */
1597			adapter->mbuf_alloc_failed++;
1598			m_freem(*m_headp);
1599			*m_headp = NULL;
1600			return (ENOBUFS);
1601		}
1602		*m_headp = m;
1603
1604		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1605		    segs, &nsegs, BUS_DMA_NOWAIT);
1606
1607		if (error == ENOMEM) {
1608			adapter->no_tx_dma_setup++;
1609			return (error);
1610		} else if (error != 0) {
1611			adapter->no_tx_dma_setup++;
1612			m_freem(*m_headp);
1613			*m_headp = NULL;
1614			return (error);
1615		}
1616	} else if (error == ENOMEM) {
1617		adapter->no_tx_dma_setup++;
1618		return (error);
1619	} else if (error != 0) {
1620		adapter->no_tx_dma_setup++;
1621		m_freem(*m_headp);
1622		*m_headp = NULL;
1623		return (error);
1624	}
1625
1626	/*
1627	 * TSO Hardware workaround, if this packet is not
1628	 * TSO, and is only a single descriptor long, and
1629	 * it follows a TSO burst, then we need to add a
1630	 * sentinel descriptor to prevent premature writeback.
1631	 */
1632	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1633		if (nsegs == 1)
1634			tso_desc = TRUE;
1635		adapter->tx_tso = FALSE;
1636	}
1637
1638	if (nsegs > adapter->num_tx_desc_avail - 2) {
1639		adapter->no_tx_desc_avail2++;
1640		bus_dmamap_unload(adapter->txtag, map);
1641		return (ENOBUFS);
1642	}
1643	m_head = *m_headp;
1644
1645	/* Do hardware assists */
1646	if (ifp->if_hwassist) {
1647		if (do_tso &&
1648		    em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1649			/* we need to make a final sentinel transmit desc */
1650			tso_desc = TRUE;
1651		} else
1652			em_transmit_checksum_setup(adapter,  m_head,
1653			    &txd_upper, &txd_lower);
1654	}
1655
1656	i = adapter->next_avail_tx_desc;
1657	if (adapter->pcix_82544)
1658		txd_saved = i;
1659
1660	for (j = 0; j < nsegs; j++) {
1661		bus_size_t seg_len;
1662		bus_addr_t seg_addr;
1663		/* If adapter is 82544 and on PCIX bus. */
1664		if(adapter->pcix_82544) {
1665			DESC_ARRAY	desc_array;
1666			uint32_t	array_elements, counter;
1667
1668			/*
1669			 * Check the Address and Length combination and
1670			 * split the data accordingly
1671			 */
1672			array_elements = em_fill_descriptors(segs[j].ds_addr,
1673			    segs[j].ds_len, &desc_array);
1674			for (counter = 0; counter < array_elements; counter++) {
1675				if (txd_used == adapter->num_tx_desc_avail) {
1676					adapter->next_avail_tx_desc = txd_saved;
1677					adapter->no_tx_desc_avail2++;
1678					bus_dmamap_unload(adapter->txtag, map);
1679					return (ENOBUFS);
1680				}
1681				tx_buffer = &adapter->tx_buffer_area[i];
1682				current_tx_desc = &adapter->tx_desc_base[i];
1683				current_tx_desc->buffer_addr = htole64(
1684					desc_array.descriptor[counter].address);
1685				current_tx_desc->lower.data = htole32(
1686					(adapter->txd_cmd | txd_lower |
1687					(uint16_t)desc_array.descriptor[counter].length));
1688				current_tx_desc->upper.data = htole32((txd_upper));
1689				last = i;
1690				if (++i == adapter->num_tx_desc)
1691					i = 0;
1692
1693				tx_buffer->m_head = NULL;
1694				tx_buffer->next_eop = -1;
1695				txd_used++;
1696			}
1697		} else {
1698			tx_buffer = &adapter->tx_buffer_area[i];
1699			current_tx_desc = &adapter->tx_desc_base[i];
1700			seg_addr = htole64(segs[j].ds_addr);
1701			seg_len  = segs[j].ds_len;
1702			/*
1703			** TSO Workaround:
1704			** If this is the last descriptor, we want to
1705			** split it so we have a small final sentinel
1706			*/
1707			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1708				seg_len -= 4;
1709				current_tx_desc->buffer_addr = seg_addr;
1710				current_tx_desc->lower.data = htole32(
1711				adapter->txd_cmd | txd_lower | seg_len);
1712				current_tx_desc->upper.data =
1713				    htole32(txd_upper);
1714				if (++i == adapter->num_tx_desc)
1715					i = 0;
1716				/* Now make the sentinel */
1717				++txd_used; /* using an extra txd */
1718				current_tx_desc = &adapter->tx_desc_base[i];
1719				tx_buffer = &adapter->tx_buffer_area[i];
1720				current_tx_desc->buffer_addr =
1721				    seg_addr + seg_len;
1722				current_tx_desc->lower.data = htole32(
1723				adapter->txd_cmd | txd_lower | 4);
1724				current_tx_desc->upper.data =
1725				    htole32(txd_upper);
1726				last = i;
1727				if (++i == adapter->num_tx_desc)
1728					i = 0;
1729			} else {
1730				current_tx_desc->buffer_addr = seg_addr;
1731				current_tx_desc->lower.data = htole32(
1732				adapter->txd_cmd | txd_lower | seg_len);
1733				current_tx_desc->upper.data =
1734				    htole32(txd_upper);
1735				last = i;
1736				if (++i == adapter->num_tx_desc)
1737					i = 0;
1738			}
1739			tx_buffer->m_head = NULL;
1740			tx_buffer->next_eop = -1;
1741		}
1742	}
1743
1744	adapter->next_avail_tx_desc = i;
1745	if (adapter->pcix_82544)
1746		adapter->num_tx_desc_avail -= txd_used;
1747	else {
1748		adapter->num_tx_desc_avail -= nsegs;
1749		if (tso_desc) /* TSO used an extra for sentinel */
1750			adapter->num_tx_desc_avail -= txd_used;
1751	}
1752
1753	if (m_head->m_flags & M_VLANTAG) {
1754		/* Set the vlan id. */
1755		current_tx_desc->upper.fields.special =
1756		    htole16(m_head->m_pkthdr.ether_vtag);
1757
1758		/* Tell hardware to add tag. */
1759		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1760	}
1761
1762	tx_buffer->m_head = m_head;
1763	tx_buffer_mapped->map = tx_buffer->map;
1764	tx_buffer->map = map;
1765	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1766
1767	/*
1768	 * Last Descriptor of Packet
1769	 * needs End Of Packet (EOP)
1770	 * and Report Status (RS)
1771	 */
1772	current_tx_desc->lower.data |=
1773	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1774	/*
1775	 * Keep track in the first buffer which
1776	 * descriptor will be written back
1777	 */
1778	tx_buffer = &adapter->tx_buffer_area[first];
1779	tx_buffer->next_eop = last;
1780
1781	/*
1782	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1783	 * that this frame is available to transmit.
1784	 */
1785	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1786	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1787
1788	if (adapter->hw.mac_type == em_82547 &&
1789	    adapter->link_duplex == HALF_DUPLEX)
1790		em_82547_move_tail(adapter);
1791	else {
1792		E1000_WRITE_REG(&adapter->hw, TDT, i);
1793		if (adapter->hw.mac_type == em_82547)
1794			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1795	}
1796
1797	return (0);
1798}
1799
1800/*********************************************************************
1801 *
1802 * 82547 workaround to avoid controller hang in half-duplex environment.
1803 * The workaround is to avoid queuing a large packet that would span
1804 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1805 * in this case. We do that only when FIFO is quiescent.
1806 *
1807 **********************************************************************/
1808static void
1809em_82547_move_tail(void *arg)
1810{
1811	struct adapter *adapter = arg;
1812	uint16_t hw_tdt;
1813	uint16_t sw_tdt;
1814	struct em_tx_desc *tx_desc;
1815	uint16_t length = 0;
1816	boolean_t eop = 0;
1817
1818	EM_LOCK_ASSERT(adapter);
1819
1820	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1821	sw_tdt = adapter->next_avail_tx_desc;
1822
1823	while (hw_tdt != sw_tdt) {
1824		tx_desc = &adapter->tx_desc_base[hw_tdt];
1825		length += tx_desc->lower.flags.length;
1826		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1827		if(++hw_tdt == adapter->num_tx_desc)
1828			hw_tdt = 0;
1829
1830		if (eop) {
1831			if (em_82547_fifo_workaround(adapter, length)) {
1832				adapter->tx_fifo_wrk_cnt++;
1833				callout_reset(&adapter->tx_fifo_timer, 1,
1834					em_82547_move_tail, adapter);
1835				break;
1836			}
1837			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1838			em_82547_update_fifo_head(adapter, length);
1839			length = 0;
1840		}
1841	}
1842}
1843
1844static int
1845em_82547_fifo_workaround(struct adapter *adapter, int len)
1846{
1847	int fifo_space, fifo_pkt_len;
1848
1849	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1850
1851	if (adapter->link_duplex == HALF_DUPLEX) {
1852		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1853
1854		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1855			if (em_82547_tx_fifo_reset(adapter))
1856				return (0);
1857			else
1858				return (1);
1859		}
1860	}
1861
1862	return (0);
1863}
1864
1865static void
1866em_82547_update_fifo_head(struct adapter *adapter, int len)
1867{
1868	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1869
1870	/* tx_fifo_head is always 16 byte aligned */
1871	adapter->tx_fifo_head += fifo_pkt_len;
1872	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1873		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1874	}
1875}
1876
1877
1878static int
1879em_82547_tx_fifo_reset(struct adapter *adapter)
1880{
1881	uint32_t tctl;
1882
1883	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1884	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1885	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1886	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1887
1888		/* Disable TX unit */
1889		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1890		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1891
1892		/* Reset FIFO pointers */
1893		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1894		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1895		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1896		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1897
1898		/* Re-enable TX unit */
1899		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1900		E1000_WRITE_FLUSH(&adapter->hw);
1901
1902		adapter->tx_fifo_head = 0;
1903		adapter->tx_fifo_reset_cnt++;
1904
1905		return (TRUE);
1906	}
1907	else {
1908		return (FALSE);
1909	}
1910}
1911
1912static void
1913em_set_promisc(struct adapter *adapter)
1914{
1915	struct ifnet	*ifp = adapter->ifp;
1916	uint32_t	reg_rctl;
1917
1918	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1919
1920	if (ifp->if_flags & IFF_PROMISC) {
1921		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1922		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1923		/*
1924		 * Disable VLAN stripping in promiscous mode.
1925		 * This enables bridging of vlan tagged frames to occur
1926		 * and also allows vlan tags to be seen in tcpdump.
1927		 * XXX: This is a bit bogus as tcpdump may be used
1928		 * w/o promisc mode as well.
1929		 */
1930		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1931			em_disable_vlans(adapter);
1932		adapter->em_insert_vlan_header = 1;
1933	} else if (ifp->if_flags & IFF_ALLMULTI) {
1934		reg_rctl |= E1000_RCTL_MPE;
1935		reg_rctl &= ~E1000_RCTL_UPE;
1936		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1937		adapter->em_insert_vlan_header = 0;
1938	} else
1939		adapter->em_insert_vlan_header = 0;
1940}
1941
1942static void
1943em_disable_promisc(struct adapter *adapter)
1944{
1945	struct ifnet	*ifp = adapter->ifp;
1946	uint32_t	reg_rctl;
1947
1948	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1949
1950	reg_rctl &=  (~E1000_RCTL_UPE);
1951	reg_rctl &=  (~E1000_RCTL_MPE);
1952	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1953
1954	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1955		em_enable_vlans(adapter);
1956	adapter->em_insert_vlan_header = 0;
1957}
1958
1959
1960/*********************************************************************
1961 *  Multicast Update
1962 *
1963 *  This routine is called whenever multicast address list is updated.
1964 *
1965 **********************************************************************/
1966
1967static void
1968em_set_multi(struct adapter *adapter)
1969{
1970	struct ifnet	*ifp = adapter->ifp;
1971	struct ifmultiaddr *ifma;
1972	uint32_t reg_rctl = 0;
1973	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1974	int mcnt = 0;
1975
1976	IOCTL_DEBUGOUT("em_set_multi: begin");
1977
1978	if (adapter->hw.mac_type == em_82542_rev2_0) {
1979		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1980		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1981			em_pci_clear_mwi(&adapter->hw);
1982		reg_rctl |= E1000_RCTL_RST;
1983		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1984		msec_delay(5);
1985	}
1986
1987	IF_ADDR_LOCK(ifp);
1988	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1989		if (ifma->ifma_addr->sa_family != AF_LINK)
1990			continue;
1991
1992		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1993			break;
1994
1995		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1996		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1997		mcnt++;
1998	}
1999	IF_ADDR_UNLOCK(ifp);
2000
2001	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2002		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
2003		reg_rctl |= E1000_RCTL_MPE;
2004		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
2005	} else
2006		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
2007
2008	if (adapter->hw.mac_type == em_82542_rev2_0) {
2009		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
2010		reg_rctl &= ~E1000_RCTL_RST;
2011		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
2012		msec_delay(5);
2013		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2014			em_pci_set_mwi(&adapter->hw);
2015	}
2016}
2017
2018
2019/*********************************************************************
2020 *  Timer routine
2021 *
2022 *  This routine checks for link status and updates statistics.
2023 *
2024 **********************************************************************/
2025
2026static void
2027em_local_timer(void *arg)
2028{
2029	struct adapter	*adapter = arg;
2030	struct ifnet	*ifp = adapter->ifp;
2031
2032	EM_LOCK_ASSERT(adapter);
2033
2034	em_check_for_link(&adapter->hw);
2035	em_update_link_status(adapter);
2036	em_update_stats_counters(adapter);
2037	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2038		em_print_hw_stats(adapter);
2039	em_smartspeed(adapter);
2040
2041	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2042}
2043
2044static void
2045em_update_link_status(struct adapter *adapter)
2046{
2047	struct ifnet *ifp = adapter->ifp;
2048	device_t dev = adapter->dev;
2049
2050	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
2051		if (adapter->link_active == 0) {
2052			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
2053			    &adapter->link_duplex);
2054			/* Check if we may set SPEED_MODE bit on PCI-E */
2055			if ((adapter->link_speed == SPEED_1000) &&
2056			    ((adapter->hw.mac_type == em_82571) ||
2057			    (adapter->hw.mac_type == em_82572))) {
2058				int tarc0;
2059
2060				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2061				tarc0 |= SPEED_MODE_BIT;
2062				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2063			}
2064			if (bootverbose)
2065				device_printf(dev, "Link is up %d Mbps %s\n",
2066				    adapter->link_speed,
2067				    ((adapter->link_duplex == FULL_DUPLEX) ?
2068				    "Full Duplex" : "Half Duplex"));
2069			adapter->link_active = 1;
2070			adapter->smartspeed = 0;
2071			ifp->if_baudrate = adapter->link_speed * 1000000;
2072			if_link_state_change(ifp, LINK_STATE_UP);
2073		}
2074	} else {
2075		if (adapter->link_active == 1) {
2076			ifp->if_baudrate = adapter->link_speed = 0;
2077			adapter->link_duplex = 0;
2078			if (bootverbose)
2079				device_printf(dev, "Link is Down\n");
2080			adapter->link_active = 0;
2081			if_link_state_change(ifp, LINK_STATE_DOWN);
2082		}
2083	}
2084}
2085
2086/*********************************************************************
2087 *
2088 *  This routine disables all traffic on the adapter by issuing a
2089 *  global reset on the MAC and deallocates TX/RX buffers.
2090 *
2091 **********************************************************************/
2092
2093static void
2094em_stop(void *arg)
2095{
2096	struct adapter	*adapter = arg;
2097	struct ifnet	*ifp = adapter->ifp;
2098
2099	EM_LOCK_ASSERT(adapter);
2100
2101	INIT_DEBUGOUT("em_stop: begin");
2102
2103	em_disable_intr(adapter);
2104	em_reset_hw(&adapter->hw);
2105	callout_stop(&adapter->timer);
2106	callout_stop(&adapter->tx_fifo_timer);
2107
2108	/* Tell the stack that the interface is no longer active */
2109	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2110}
2111
2112
2113/********************************************************************
2114 *
2115 *  Determine hardware revision.
2116 *
2117 **********************************************************************/
2118static void
2119em_identify_hardware(struct adapter *adapter)
2120{
2121	device_t dev = adapter->dev;
2122
2123	/* Make sure our PCI config space has the necessary stuff set */
2124	pci_enable_busmaster(dev);
2125	pci_enable_io(dev, SYS_RES_MEMORY);
2126	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2127
2128	/* Save off the information about this board */
2129	adapter->hw.vendor_id = pci_get_vendor(dev);
2130	adapter->hw.device_id = pci_get_device(dev);
2131	adapter->hw.revision_id = pci_get_revid(dev);
2132	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2133	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2134
2135	/* Identify the MAC */
2136	if (em_set_mac_type(&adapter->hw))
2137		device_printf(dev, "Unknown MAC Type\n");
2138
2139	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2140	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2141		adapter->hw.phy_init_script = TRUE;
2142}
2143
2144static int
2145em_allocate_pci_resources(struct adapter *adapter)
2146{
2147	device_t	dev = adapter->dev;
2148	int		val, rid;
2149
2150	rid = PCIR_BAR(0);
2151	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2152	    &rid, RF_ACTIVE);
2153	if (adapter->res_memory == NULL) {
2154		device_printf(dev, "Unable to allocate bus resource: memory\n");
2155		return (ENXIO);
2156	}
2157	adapter->osdep.mem_bus_space_tag =
2158	rman_get_bustag(adapter->res_memory);
2159	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2160	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2161
2162	if (adapter->hw.mac_type > em_82543) {
2163		/* Figure our where our IO BAR is ? */
2164		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2165			val = pci_read_config(dev, rid, 4);
2166			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2167				adapter->io_rid = rid;
2168				break;
2169			}
2170			rid += 4;
2171			/* check for 64bit BAR */
2172			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2173				rid += 4;
2174		}
2175		if (rid >= PCIR_CIS) {
2176			device_printf(dev, "Unable to locate IO BAR\n");
2177			return (ENXIO);
2178		}
2179		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2180		    &adapter->io_rid, RF_ACTIVE);
2181		if (adapter->res_ioport == NULL) {
2182			device_printf(dev, "Unable to allocate bus resource: "
2183			    "ioport\n");
2184			return (ENXIO);
2185		}
2186		adapter->hw.io_base = 0;
2187		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2188		adapter->osdep.io_bus_space_handle =
2189		    rman_get_bushandle(adapter->res_ioport);
2190	}
2191
2192	/* For ICH8 we need to find the flash memory. */
2193	if (adapter->hw.mac_type == em_ich8lan) {
2194		rid = EM_FLASH;
2195
2196		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2197		    &rid, RF_ACTIVE);
2198		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2199		adapter->osdep.flash_bus_space_handle =
2200		    rman_get_bushandle(adapter->flash_mem);
2201	}
2202
2203	rid = 0x0;
2204	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2205	    RF_SHAREABLE | RF_ACTIVE);
2206	if (adapter->res_interrupt == NULL) {
2207		device_printf(dev, "Unable to allocate bus resource: "
2208		    "interrupt\n");
2209		return (ENXIO);
2210	}
2211
2212	adapter->hw.back = &adapter->osdep;
2213
2214	return (0);
2215}
2216
2217int
2218em_allocate_intr(struct adapter *adapter)
2219{
2220	device_t dev = adapter->dev;
2221	int error;
2222
2223	/* Manually turn off all interrupts */
2224	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2225
2226#ifdef DEVICE_POLLING
2227	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2228	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2229	    &adapter->int_handler_tag)) != 0) {
2230		device_printf(dev, "Failed to register interrupt handler");
2231		return (error);
2232	}
2233#else
2234	/*
2235	 * Try allocating a fast interrupt and the associated deferred
2236	 * processing contexts.
2237	 */
2238	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2239	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2240	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2241	    taskqueue_thread_enqueue, &adapter->tq);
2242	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2243	    device_get_nameunit(adapter->dev));
2244	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2245	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2246	    &adapter->int_handler_tag)) != 0) {
2247		device_printf(dev, "Failed to register fast interrupt "
2248			    "handler: %d\n", error);
2249		taskqueue_free(adapter->tq);
2250		adapter->tq = NULL;
2251		return (error);
2252	}
2253#endif
2254
2255	em_enable_intr(adapter);
2256	return (0);
2257}
2258
2259static void
2260em_free_intr(struct adapter *adapter)
2261{
2262	device_t dev = adapter->dev;
2263
2264	if (adapter->int_handler_tag != NULL) {
2265		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2266		adapter->int_handler_tag = NULL;
2267	}
2268	if (adapter->tq != NULL) {
2269		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2270		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2271		taskqueue_free(adapter->tq);
2272		adapter->tq = NULL;
2273	}
2274}
2275
2276static void
2277em_free_pci_resources(struct adapter *adapter)
2278{
2279	device_t dev = adapter->dev;
2280
2281	if (adapter->res_interrupt != NULL)
2282		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2283
2284	if (adapter->res_memory != NULL)
2285		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2286		    adapter->res_memory);
2287
2288	if (adapter->flash_mem != NULL)
2289		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2290		    adapter->flash_mem);
2291
2292	if (adapter->res_ioport != NULL)
2293		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2294		    adapter->res_ioport);
2295}
2296
2297/*********************************************************************
2298 *
2299 *  Initialize the hardware to a configuration as specified by the
2300 *  adapter structure. The controller is reset, the EEPROM is
2301 *  verified, the MAC address is set, then the shared initialization
2302 *  routines are called.
2303 *
2304 **********************************************************************/
2305static int
2306em_hardware_init(struct adapter *adapter)
2307{
2308	device_t dev = adapter->dev;
2309	uint16_t rx_buffer_size;
2310
2311	INIT_DEBUGOUT("em_hardware_init: begin");
2312	/* Issue a global reset */
2313	em_reset_hw(&adapter->hw);
2314
2315	/* When hardware is reset, fifo_head is also reset */
2316	adapter->tx_fifo_head = 0;
2317
2318	/* Make sure we have a good EEPROM before we read from it */
2319	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2320		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2321		return (EIO);
2322	}
2323
2324	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2325		device_printf(dev, "EEPROM read error while reading part "
2326		    "number\n");
2327		return (EIO);
2328	}
2329
2330	/* Set up smart power down as default off on newer adapters. */
2331	if (!em_smart_pwr_down &&
2332	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2333		uint16_t phy_tmp = 0;
2334
2335		/* Speed up time to link by disabling smart power down. */
2336		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2337		phy_tmp &= ~IGP02E1000_PM_SPD;
2338		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2339	}
2340
2341	/*
2342	 * These parameters control the automatic generation (Tx) and
2343	 * response (Rx) to Ethernet PAUSE frames.
2344	 * - High water mark should allow for at least two frames to be
2345	 *   received after sending an XOFF.
2346	 * - Low water mark works best when it is very near the high water mark.
2347	 *   This allows the receiver to restart by sending XON when it has
2348	 *   drained a bit. Here we use an arbitary value of 1500 which will
2349	 *   restart after one full frame is pulled from the buffer. There
2350	 *   could be several smaller frames in the buffer and if so they will
2351	 *   not trigger the XON until their total number reduces the buffer
2352	 *   by 1500.
2353	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2354	 */
2355	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2356
2357	adapter->hw.fc_high_water = rx_buffer_size -
2358	    roundup2(adapter->hw.max_frame_size, 1024);
2359	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2360	if (adapter->hw.mac_type == em_80003es2lan)
2361		adapter->hw.fc_pause_time = 0xFFFF;
2362	else
2363		adapter->hw.fc_pause_time = 0x1000;
2364	adapter->hw.fc_send_xon = TRUE;
2365	adapter->hw.fc = E1000_FC_FULL;
2366
2367	if (em_init_hw(&adapter->hw) < 0) {
2368		device_printf(dev, "Hardware Initialization Failed");
2369		return (EIO);
2370	}
2371
2372	em_check_for_link(&adapter->hw);
2373
2374	return (0);
2375}
2376
2377/*********************************************************************
2378 *
2379 *  Setup networking device structure and register an interface.
2380 *
2381 **********************************************************************/
2382static void
2383em_setup_interface(device_t dev, struct adapter *adapter)
2384{
2385	struct ifnet   *ifp;
2386	INIT_DEBUGOUT("em_setup_interface: begin");
2387
2388	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2389	if (ifp == NULL)
2390		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2391	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2392	ifp->if_mtu = ETHERMTU;
2393	ifp->if_init =  em_init;
2394	ifp->if_softc = adapter;
2395	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2396	ifp->if_ioctl = em_ioctl;
2397	ifp->if_start = em_start;
2398	ifp->if_watchdog = em_watchdog;
2399	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2400	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2401	IFQ_SET_READY(&ifp->if_snd);
2402
2403	ether_ifattach(ifp, adapter->hw.mac_addr);
2404
2405	ifp->if_capabilities = ifp->if_capenable = 0;
2406
2407	if (adapter->hw.mac_type >= em_82543) {
2408		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2409		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2410	}
2411
2412	/* Enable TSO if available */
2413	if ((adapter->hw.mac_type > em_82544) &&
2414	    (adapter->hw.mac_type != em_82547)) {
2415		ifp->if_capabilities |= IFCAP_TSO4;
2416		ifp->if_capenable |= IFCAP_TSO4;
2417	}
2418
2419	/*
2420	 * Tell the upper layer(s) we support long frames.
2421	 */
2422	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2423	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2424	ifp->if_capenable |= IFCAP_VLAN_MTU;
2425
2426#ifdef DEVICE_POLLING
2427	ifp->if_capabilities |= IFCAP_POLLING;
2428#endif
2429
2430	/*
2431	 * Specify the media types supported by this adapter and register
2432	 * callbacks to update media and link information
2433	 */
2434	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2435	    em_media_status);
2436	if ((adapter->hw.media_type == em_media_type_fiber) ||
2437	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2438		u_char fiber_type = IFM_1000_SX;	/* default type; */
2439
2440		if (adapter->hw.mac_type == em_82545)
2441			fiber_type = IFM_1000_LX;
2442		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2443		    0, NULL);
2444		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2445	} else {
2446		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2447		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2448			    0, NULL);
2449		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2450			    0, NULL);
2451		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2452			    0, NULL);
2453		if (adapter->hw.phy_type != em_phy_ife) {
2454			ifmedia_add(&adapter->media,
2455				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2456			ifmedia_add(&adapter->media,
2457				IFM_ETHER | IFM_1000_T, 0, NULL);
2458		}
2459	}
2460	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2461	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2462}
2463
2464
2465/*********************************************************************
2466 *
2467 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2468 *
2469 **********************************************************************/
2470static void
2471em_smartspeed(struct adapter *adapter)
2472{
2473	uint16_t phy_tmp;
2474
2475	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2476	    adapter->hw.autoneg == 0 ||
2477	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2478		return;
2479
2480	if (adapter->smartspeed == 0) {
2481		/* If Master/Slave config fault is asserted twice,
2482		 * we assume back-to-back */
2483		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2484		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2485			return;
2486		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2487		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2488			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2489			if(phy_tmp & CR_1000T_MS_ENABLE) {
2490				phy_tmp &= ~CR_1000T_MS_ENABLE;
2491				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2492				    phy_tmp);
2493				adapter->smartspeed++;
2494				if(adapter->hw.autoneg &&
2495				   !em_phy_setup_autoneg(&adapter->hw) &&
2496				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2497				    &phy_tmp)) {
2498					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2499						    MII_CR_RESTART_AUTO_NEG);
2500					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2501					    phy_tmp);
2502				}
2503			}
2504		}
2505		return;
2506	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2507		/* If still no link, perhaps using 2/3 pair cable */
2508		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2509		phy_tmp |= CR_1000T_MS_ENABLE;
2510		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2511		if(adapter->hw.autoneg &&
2512		   !em_phy_setup_autoneg(&adapter->hw) &&
2513		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2514			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2515				    MII_CR_RESTART_AUTO_NEG);
2516			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2517		}
2518	}
2519	/* Restart process after EM_SMARTSPEED_MAX iterations */
2520	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2521		adapter->smartspeed = 0;
2522}
2523
2524
2525/*
2526 * Manage DMA'able memory.
2527 */
2528static void
2529em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2530{
2531	if (error)
2532		return;
2533	*(bus_addr_t *) arg = segs[0].ds_addr;
2534}
2535
2536static int
2537em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2538	int mapflags)
2539{
2540	int error;
2541
2542	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2543				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2544				BUS_SPACE_MAXADDR,	/* lowaddr */
2545				BUS_SPACE_MAXADDR,	/* highaddr */
2546				NULL, NULL,		/* filter, filterarg */
2547				size,			/* maxsize */
2548				1,			/* nsegments */
2549				size,			/* maxsegsize */
2550				0,			/* flags */
2551				NULL,			/* lockfunc */
2552				NULL,			/* lockarg */
2553				&dma->dma_tag);
2554	if (error) {
2555		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2556		    __func__, error);
2557		goto fail_0;
2558	}
2559
2560	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2561	    BUS_DMA_NOWAIT, &dma->dma_map);
2562	if (error) {
2563		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2564		    __func__, (uintmax_t)size, error);
2565		goto fail_2;
2566	}
2567
2568	dma->dma_paddr = 0;
2569	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2570	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2571	if (error || dma->dma_paddr == 0) {
2572		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2573		    __func__, error);
2574		goto fail_3;
2575	}
2576
2577	return (0);
2578
2579fail_3:
2580	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2581fail_2:
2582	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2583	bus_dma_tag_destroy(dma->dma_tag);
2584fail_0:
2585	dma->dma_map = NULL;
2586	dma->dma_tag = NULL;
2587
2588	return (error);
2589}
2590
2591static void
2592em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2593{
2594	if (dma->dma_tag == NULL)
2595		return;
2596	if (dma->dma_map != NULL) {
2597		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2598		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2599		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2600		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2601		dma->dma_map = NULL;
2602	}
2603	bus_dma_tag_destroy(dma->dma_tag);
2604	dma->dma_tag = NULL;
2605}
2606
2607
2608/*********************************************************************
2609 *
2610 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2611 *  the information needed to transmit a packet on the wire.
2612 *
2613 **********************************************************************/
2614static int
2615em_allocate_transmit_structures(struct adapter *adapter)
2616{
2617	struct ifnet   *ifp = adapter->ifp;
2618	device_t dev = adapter->dev;
2619	struct em_buffer *tx_buffer;
2620	bus_size_t size, segsize;
2621	int error, i;
2622
2623	/*
2624	 * Setup DMA descriptor areas.
2625	 */
2626	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2627
2628	/* Overrides for TSO - want large sizes */
2629	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2630		size = EM_TSO_SIZE;
2631		segsize = PAGE_SIZE;
2632	}
2633
2634	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2635				1, 0,			/* alignment, bounds */
2636				BUS_SPACE_MAXADDR,	/* lowaddr */
2637				BUS_SPACE_MAXADDR,	/* highaddr */
2638				NULL, NULL,		/* filter, filterarg */
2639				size,			/* maxsize */
2640				EM_MAX_SCATTER,		/* nsegments */
2641				segsize,		/* maxsegsize */
2642				0,			/* flags */
2643				NULL,		/* lockfunc */
2644				NULL,		/* lockarg */
2645				&adapter->txtag)) != 0) {
2646		device_printf(dev, "Unable to allocate TX DMA tag\n");
2647		goto fail;
2648	}
2649
2650	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2651	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2652	if (adapter->tx_buffer_area == NULL) {
2653		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2654		error = ENOMEM;
2655		goto fail;
2656	}
2657
2658	tx_buffer = adapter->tx_buffer_area;
2659	for (i = 0; i < adapter->num_tx_desc; i++) {
2660		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2661		if (error != 0) {
2662			device_printf(dev, "Unable to create TX DMA map\n");
2663			goto fail;
2664		}
2665		tx_buffer++;
2666	}
2667
2668	return (0);
2669
2670fail:
2671	em_free_transmit_structures(adapter);
2672	return (error);
2673}
2674
2675/*********************************************************************
2676 *
2677 *  Initialize transmit structures.
2678 *
2679 **********************************************************************/
2680static void
2681em_setup_transmit_structures(struct adapter *adapter)
2682{
2683	struct em_buffer *tx_buffer;
2684	int i;
2685
2686	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2687
2688	adapter->next_avail_tx_desc = 0;
2689	adapter->next_tx_to_clean = 0;
2690
2691	/* Free any existing tx buffers. */
2692	tx_buffer = adapter->tx_buffer_area;
2693	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2694		if (tx_buffer->m_head != NULL) {
2695			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2696			    BUS_DMASYNC_POSTWRITE);
2697			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2698			m_freem(tx_buffer->m_head);
2699			tx_buffer->m_head = NULL;
2700		}
2701	}
2702
2703	/* Set number of descriptors available */
2704	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2705
2706	/* Set checksum context */
2707	adapter->active_checksum_context = OFFLOAD_NONE;
2708	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2709	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2710}
2711
2712/*********************************************************************
2713 *
2714 *  Enable transmit unit.
2715 *
2716 **********************************************************************/
2717static void
2718em_initialize_transmit_unit(struct adapter *adapter)
2719{
2720	uint32_t	reg_tctl;
2721	uint32_t	reg_tipg = 0;
2722	uint64_t	bus_addr;
2723
2724	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2725	/* Setup the Base and Length of the Tx Descriptor Ring */
2726	bus_addr = adapter->txdma.dma_paddr;
2727	E1000_WRITE_REG(&adapter->hw, TDLEN,
2728	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2729	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2730	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2731
2732	/* Setup the HW Tx Head and Tail descriptor pointers */
2733	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2734	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2735
2736
2737	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2738	    E1000_READ_REG(&adapter->hw, TDLEN));
2739
2740	/* Set the default values for the Tx Inter Packet Gap timer */
2741	switch (adapter->hw.mac_type) {
2742	case em_82542_rev2_0:
2743	case em_82542_rev2_1:
2744		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2745		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2746		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2747		break;
2748	case em_80003es2lan:
2749		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2750		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2751		    E1000_TIPG_IPGR2_SHIFT;
2752		break;
2753	default:
2754		if ((adapter->hw.media_type == em_media_type_fiber) ||
2755		    (adapter->hw.media_type == em_media_type_internal_serdes))
2756			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2757		else
2758			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2759		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2760		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2761	}
2762
2763	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2764	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2765	if(adapter->hw.mac_type >= em_82540)
2766		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2767
2768	/* Program the Transmit Control Register */
2769	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2770		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2771	if (adapter->hw.mac_type >= em_82571)
2772		reg_tctl |= E1000_TCTL_MULR;
2773	if (adapter->link_duplex == FULL_DUPLEX) {
2774		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2775	} else {
2776		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2777	}
2778	/* This write will effectively turn on the transmit unit. */
2779	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2780
2781	/* Setup Transmit Descriptor Base Settings */
2782	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2783
2784	if (adapter->tx_int_delay.value > 0)
2785		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2786}
2787
2788/*********************************************************************
2789 *
2790 *  Free all transmit related data structures.
2791 *
2792 **********************************************************************/
2793static void
2794em_free_transmit_structures(struct adapter *adapter)
2795{
2796	struct em_buffer *tx_buffer;
2797	int i;
2798
2799	INIT_DEBUGOUT("free_transmit_structures: begin");
2800
2801	if (adapter->tx_buffer_area != NULL) {
2802		tx_buffer = adapter->tx_buffer_area;
2803		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2804			if (tx_buffer->m_head != NULL) {
2805				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2806				    BUS_DMASYNC_POSTWRITE);
2807				bus_dmamap_unload(adapter->txtag,
2808				    tx_buffer->map);
2809				m_freem(tx_buffer->m_head);
2810				tx_buffer->m_head = NULL;
2811			} else if (tx_buffer->map != NULL)
2812				bus_dmamap_unload(adapter->txtag,
2813				    tx_buffer->map);
2814			if (tx_buffer->map != NULL) {
2815				bus_dmamap_destroy(adapter->txtag,
2816				    tx_buffer->map);
2817				tx_buffer->map = NULL;
2818			}
2819		}
2820	}
2821	if (adapter->tx_buffer_area != NULL) {
2822		free(adapter->tx_buffer_area, M_DEVBUF);
2823		adapter->tx_buffer_area = NULL;
2824	}
2825	if (adapter->txtag != NULL) {
2826		bus_dma_tag_destroy(adapter->txtag);
2827		adapter->txtag = NULL;
2828	}
2829}
2830
2831/*********************************************************************
2832 *
2833 *  The offload context needs to be set when we transfer the first
2834 *  packet of a particular protocol (TCP/UDP). We change the
2835 *  context only if the protocol type changes.
2836 *
2837 **********************************************************************/
2838static void
2839em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2840    uint32_t *txd_upper, uint32_t *txd_lower)
2841{
2842	struct em_context_desc *TXD;
2843	struct em_buffer *tx_buffer;
2844	struct ether_vlan_header *eh;
2845	struct ip *ip;
2846	struct ip6_hdr *ip6;
2847	struct tcp_hdr *th;
2848	int curr_txd, ehdrlen, hdr_len, ip_hlen;
2849	uint32_t cmd = 0;
2850	uint16_t etype;
2851	uint8_t ipproto;
2852
2853	/* Setup checksum offload context. */
2854	curr_txd = adapter->next_avail_tx_desc;
2855	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2856	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2857
2858	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
2859		     E1000_TXD_DTYP_D;		/* Data descr */
2860
2861	/*
2862	 * Determine where frame payload starts.
2863	 * Jump over vlan headers if already present,
2864	 * helpful for QinQ too.
2865	 */
2866	eh = mtod(mp, struct ether_vlan_header *);
2867	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2868		etype = ntohs(eh->evl_proto);
2869		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2870	} else {
2871		etype = ntohs(eh->evl_encap_proto);
2872		ehdrlen = ETHER_HDR_LEN;
2873	}
2874
2875	/*
2876	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2877	 * TODO: Support SCTP too when it hits the tree.
2878	 */
2879	switch (etype) {
2880	case ETHERTYPE_IP:
2881		ip = (struct ip *)(mp->m_data + ehdrlen);
2882		ip_hlen = ip->ip_hl << 2;
2883
2884		/* Setup of IP header checksum. */
2885		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2886			/*
2887			 * Start offset for header checksum calculation.
2888			 * End offset for header checksum calculation.
2889			 * Offset of place to put the checksum.
2890			 */
2891			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2892			TXD->lower_setup.ip_fields.ipcse =
2893			    htole16(ehdrlen + ip_hlen);
2894			TXD->lower_setup.ip_fields.ipcso =
2895			    ehdrlen + offsetof(struct ip, ip_sum);
2896			cmd |= E1000_TXD_CMD_IP;
2897			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2898		}
2899
2900		if (mp->m_len < ehdrlen + ip_hlen)
2901			return;	/* failure */
2902
2903		hdr_len = ehdrlen + ip_hlen;
2904		ipproto = ip->ip_p;
2905
2906		break;
2907	case ETHERTYPE_IPV6:
2908		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2909		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
2910
2911		if (mp->m_len < ehdrlen + ip_hlen)
2912			return;	/* failure */
2913
2914		/* IPv6 doesn't have a header checksum. */
2915
2916		hdr_len = ehdrlen + ip_hlen;
2917		ipproto = ip6->ip6_nxt;
2918
2919		break;
2920	default:
2921		*txd_upper = 0;
2922		*txd_lower = 0;
2923		return;
2924	}
2925
2926	switch (ipproto) {
2927	case IPPROTO_TCP:
2928		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2929			/*
2930			 * Start offset for payload checksum calculation.
2931			 * End offset for payload checksum calculation.
2932			 * Offset of place to put the checksum.
2933			 */
2934			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
2935			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2936			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2937			TXD->upper_setup.tcp_fields.tucso =
2938			    hdr_len + offsetof(struct tcphdr, th_sum);
2939			cmd |= E1000_TXD_CMD_TCP;
2940			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2941		}
2942		break;
2943	case IPPROTO_UDP:
2944		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2945			/*
2946			 * Start offset for header checksum calculation.
2947			 * End offset for header checksum calculation.
2948			 * Offset of place to put the checksum.
2949			 */
2950			TXD->upper_setup.tcp_fields.tucss = hdr_len;
2951			TXD->upper_setup.tcp_fields.tucse = htole16(0);
2952			TXD->upper_setup.tcp_fields.tucso =
2953			    hdr_len + offsetof(struct udphdr, uh_sum);
2954			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
2955		}
2956		break;
2957	default:
2958		break;
2959	}
2960
2961	TXD->tcp_seg_setup.data = htole32(0);
2962	TXD->cmd_and_length =
2963	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
2964	tx_buffer->m_head = NULL;
2965	tx_buffer->next_eop = -1;
2966
2967	if (++curr_txd == adapter->num_tx_desc)
2968		curr_txd = 0;
2969
2970	adapter->num_tx_desc_avail--;
2971	adapter->next_avail_tx_desc = curr_txd;
2972}
2973
2974/**********************************************************************
2975 *
2976 *  Setup work for hardware segmentation offload (TSO)
2977 *
2978 **********************************************************************/
2979static boolean_t
2980em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2981   uint32_t *txd_lower)
2982{
2983	struct em_context_desc *TXD;
2984	struct em_buffer *tx_buffer;
2985	struct ether_vlan_header *eh;
2986	struct ip *ip;
2987	struct ip6_hdr *ip6;
2988	struct tcphdr *th;
2989	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
2990	uint16_t etype;
2991
2992	/*
2993	 * XXX: This is not really correct as the stack would not have
2994	 * set up all checksums.
2995	 * XXX: Return FALSE is not sufficient as we may have to return
2996	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
2997	 * and 1 (success).
2998	 */
2999	if (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)
3000		return FALSE;	/* 0 */
3001
3002	/*
3003	 * This function could/should be extended to support IP/IPv6
3004	 * fragmentation as well.  But as they say, one step at a time.
3005	 */
3006
3007	/*
3008	 * Determine where frame payload starts.
3009	 * Jump over vlan headers if already present,
3010	 * helpful for QinQ too.
3011	 */
3012	eh = mtod(mp, struct ether_vlan_header *);
3013	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3014		etype = ntohs(eh->evl_proto);
3015		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3016	} else {
3017		etype = ntohs(eh->evl_encap_proto);
3018		ehdrlen = ETHER_HDR_LEN;
3019	}
3020
3021	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3022	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3023		return FALSE;	/* -1 */
3024
3025	/*
3026	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3027	 * TODO: Support SCTP too when it hits the tree.
3028	 */
3029	switch (etype) {
3030	case ETHERTYPE_IP:
3031		isip6 = 0;
3032		ip = (struct ip *)(mp->m_data + ehdrlen);
3033		if (ip->ip_p != IPPROTO_TCP)
3034			return FALSE;	/* 0 */
3035		ip->ip_len = 0;
3036		ip->ip_sum = 0;
3037		ip_hlen = ip->ip_hl << 2;
3038		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3039			return FALSE;	/* -1 */
3040		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3041#if 1
3042		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3043		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3044#else
3045		th->th_sum = mp->m_pkthdr.csum_data;
3046#endif
3047		break;
3048	case ETHERTYPE_IPV6:
3049		isip6 = 1;
3050		return FALSE;			/* Not supported yet. */
3051		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3052		if (ip6->ip6_nxt != IPPROTO_TCP)
3053			return FALSE;	/* 0 */
3054		ip6->ip6_plen = 0;
3055		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3056		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3057			return FALSE;	/* -1 */
3058		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3059#if 0
3060		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3061		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3062#else
3063		th->th_sum = mp->m_pkthdr.csum_data;
3064#endif
3065		break;
3066	default:
3067		return FALSE;
3068	}
3069	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3070
3071	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3072		      E1000_TXD_DTYP_D |	/* Data descr type */
3073		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3074
3075	/* IP and/or TCP header checksum calculation and insertion. */
3076	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3077		      E1000_TXD_POPTS_TXSM) << 8;
3078
3079	curr_txd = adapter->next_avail_tx_desc;
3080	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3081	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
3082
3083	/* IPv6 doesn't have a header checksum. */
3084	if (!isip6) {
3085		/*
3086		 * Start offset for header checksum calculation.
3087		 * End offset for header checksum calculation.
3088		 * Offset of place put the checksum.
3089		 */
3090		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3091		TXD->lower_setup.ip_fields.ipcse =
3092		    htole16(ehdrlen + ip_hlen - 1);
3093		TXD->lower_setup.ip_fields.ipcso =
3094		    ehdrlen + offsetof(struct ip, ip_sum);
3095	}
3096	/*
3097	 * Start offset for payload checksum calculation.
3098	 * End offset for payload checksum calculation.
3099	 * Offset of place to put the checksum.
3100	 */
3101	TXD->upper_setup.tcp_fields.tucss =
3102	    ehdrlen + ip_hlen;
3103	TXD->upper_setup.tcp_fields.tucse = 0;
3104	TXD->upper_setup.tcp_fields.tucso =
3105	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3106	/*
3107	 * Payload size per packet w/o any headers.
3108	 * Length of all headers up to payload.
3109	 */
3110	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3111	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3112
3113	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3114				E1000_TXD_CMD_DEXT |	/* Extended descr */
3115				E1000_TXD_CMD_TSE |	/* TSE context */
3116				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3117				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3118				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3119
3120	tx_buffer->m_head = NULL;
3121
3122	if (++curr_txd == adapter->num_tx_desc)
3123		curr_txd = 0;
3124
3125	adapter->num_tx_desc_avail--;
3126	adapter->next_avail_tx_desc = curr_txd;
3127	adapter->tx_tso = TRUE;
3128
3129	return TRUE;
3130}
3131
3132/**********************************************************************
3133 *
3134 *  Examine each tx_buffer in the used queue. If the hardware is done
3135 *  processing the packet then free associated resources. The
3136 *  tx_buffer is put back on the free queue.
3137 *
3138 **********************************************************************/
3139static void
3140em_txeof(struct adapter *adapter)
3141{
3142	int first, last, done, num_avail;
3143	struct em_buffer *tx_buffer;
3144	struct em_tx_desc   *tx_desc, *eop_desc;
3145	struct ifnet   *ifp = adapter->ifp;
3146
3147	EM_LOCK_ASSERT(adapter);
3148
3149	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3150		return;
3151
3152	num_avail = adapter->num_tx_desc_avail;
3153	first = adapter->next_tx_to_clean;
3154	tx_desc = &adapter->tx_desc_base[first];
3155	tx_buffer = &adapter->tx_buffer_area[first];
3156	last = tx_buffer->next_eop;
3157	eop_desc = &adapter->tx_desc_base[last];
3158
3159	/*
3160	 * Now calculate the terminating index
3161	 * for the cleanup loop below.
3162	 */
3163	if (++last == adapter->num_tx_desc)
3164		last = 0;
3165	done = last;
3166
3167	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3168	    BUS_DMASYNC_POSTREAD);
3169	while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3170		/* We clean the range of the packet */
3171		while (first != done) {
3172			tx_desc->upper.data = 0;
3173			tx_desc->lower.data = 0;
3174			num_avail++;
3175
3176			if (tx_buffer->m_head) {
3177				ifp->if_opackets++;
3178				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3179				    BUS_DMASYNC_POSTWRITE);
3180				bus_dmamap_unload(adapter->txtag,
3181				    tx_buffer->map);
3182
3183				m_freem(tx_buffer->m_head);
3184				tx_buffer->m_head = NULL;
3185			}
3186			tx_buffer->next_eop = -1;
3187
3188			if (++first == adapter->num_tx_desc)
3189				first = 0;
3190
3191			tx_buffer = &adapter->tx_buffer_area[first];
3192			tx_desc = &adapter->tx_desc_base[first];
3193		}
3194		/* See if we can continue to the next packet */
3195		last = tx_buffer->next_eop;
3196		if (last != -1) {
3197			eop_desc = &adapter->tx_desc_base[last];
3198			/* Get new done point */
3199			if (++last == adapter->num_tx_desc)
3200				last = 0;
3201			done = last;
3202		} else
3203			break;
3204	}
3205	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3206	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3207
3208	adapter->next_tx_to_clean = first;
3209
3210	/*
3211	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3212	 * that it is OK to send packets.
3213	 * If there are no pending descriptors, clear the timeout. Otherwise,
3214	 * if some descriptors have been freed, restart the timeout.
3215	 */
3216	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3217		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3218		if (num_avail == adapter->num_tx_desc)
3219			ifp->if_timer = 0;
3220		else if (num_avail != adapter->num_tx_desc_avail)
3221			ifp->if_timer = EM_TX_TIMEOUT;
3222	}
3223	adapter->num_tx_desc_avail = num_avail;
3224}
3225
3226/*********************************************************************
3227 *
3228 *  Get a buffer from system mbuf buffer pool.
3229 *
3230 **********************************************************************/
3231static int
3232em_get_buf(struct adapter *adapter, int i)
3233{
3234	struct mbuf		*m;
3235	bus_dma_segment_t	segs[1];
3236	bus_dmamap_t		map;
3237	struct em_buffer	*rx_buffer;
3238	int			error, nsegs;
3239
3240	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3241	if (m == NULL) {
3242		adapter->mbuf_cluster_failed++;
3243		return (ENOBUFS);
3244	}
3245	m->m_len = m->m_pkthdr.len = MCLBYTES;
3246	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3247		m_adj(m, ETHER_ALIGN);
3248
3249	/*
3250	 * Using memory from the mbuf cluster pool, invoke the
3251	 * bus_dma machinery to arrange the memory mapping.
3252	 */
3253	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3254	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3255	if (error != 0) {
3256		m_free(m);
3257		return (error);
3258	}
3259	/* If nsegs is wrong then the stack is corrupt. */
3260	KASSERT(nsegs == 1, ("Too many segments returned!"));
3261
3262	rx_buffer = &adapter->rx_buffer_area[i];
3263	if (rx_buffer->m_head != NULL)
3264		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3265
3266	map = rx_buffer->map;
3267	rx_buffer->map = adapter->rx_sparemap;
3268	adapter->rx_sparemap = map;
3269	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3270	rx_buffer->m_head = m;
3271
3272	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3273
3274	return (0);
3275}
3276
3277/*********************************************************************
3278 *
3279 *  Allocate memory for rx_buffer structures. Since we use one
3280 *  rx_buffer per received packet, the maximum number of rx_buffer's
3281 *  that we'll need is equal to the number of receive descriptors
3282 *  that we've allocated.
3283 *
3284 **********************************************************************/
3285static int
3286em_allocate_receive_structures(struct adapter *adapter)
3287{
3288	device_t dev = adapter->dev;
3289	struct em_buffer *rx_buffer;
3290	int i, error;
3291
3292	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3293	    M_DEVBUF, M_NOWAIT);
3294	if (adapter->rx_buffer_area == NULL) {
3295		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3296		return (ENOMEM);
3297	}
3298
3299	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3300
3301	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3302				1, 0,			/* alignment, bounds */
3303				BUS_SPACE_MAXADDR,	/* lowaddr */
3304				BUS_SPACE_MAXADDR,	/* highaddr */
3305				NULL, NULL,		/* filter, filterarg */
3306				MCLBYTES,		/* maxsize */
3307				1,			/* nsegments */
3308				MCLBYTES,		/* maxsegsize */
3309				0,			/* flags */
3310				NULL,			/* lockfunc */
3311				NULL,			/* lockarg */
3312				&adapter->rxtag);
3313	if (error) {
3314		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3315		    __func__, error);
3316		goto fail;
3317	}
3318
3319	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3320	    &adapter->rx_sparemap);
3321	if (error) {
3322		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3323		    __func__, error);
3324		goto fail;
3325	}
3326	rx_buffer = adapter->rx_buffer_area;
3327	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3328		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3329		    &rx_buffer->map);
3330		if (error) {
3331			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3332			    __func__, error);
3333			goto fail;
3334		}
3335	}
3336
3337	return (0);
3338
3339fail:
3340	em_free_receive_structures(adapter);
3341	return (error);
3342}
3343
3344/*********************************************************************
3345 *
3346 *  Allocate and initialize receive structures.
3347 *
3348 **********************************************************************/
3349static int
3350em_setup_receive_structures(struct adapter *adapter)
3351{
3352	struct em_buffer *rx_buffer;
3353	int i, error;
3354
3355	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3356
3357	/* Free current RX buffers. */
3358	rx_buffer = adapter->rx_buffer_area;
3359	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3360		if (rx_buffer->m_head != NULL) {
3361			bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3362			    BUS_DMASYNC_POSTREAD);
3363			bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3364			m_freem(rx_buffer->m_head);
3365			rx_buffer->m_head = NULL;
3366		}
3367	}
3368
3369	/* Allocate new ones. */
3370	for (i = 0; i < adapter->num_rx_desc; i++) {
3371		error = em_get_buf(adapter, i);
3372		if (error)
3373			return (error);
3374	}
3375
3376	/* Setup our descriptor pointers */
3377	adapter->next_rx_desc_to_check = 0;
3378	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3379	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3380
3381	return (0);
3382}
3383
3384/*********************************************************************
3385 *
3386 *  Enable receive unit.
3387 *
3388 **********************************************************************/
3389static void
3390em_initialize_receive_unit(struct adapter *adapter)
3391{
3392	struct ifnet	*ifp = adapter->ifp;
3393	uint64_t	bus_addr;
3394	uint32_t	reg_rctl;
3395	uint32_t	reg_rxcsum;
3396
3397	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3398
3399	/*
3400	 * Make sure receives are disabled while setting
3401	 * up the descriptor ring
3402	 */
3403	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3404
3405	/* Set the Receive Delay Timer Register */
3406	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3407
3408	if(adapter->hw.mac_type >= em_82540) {
3409		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3410
3411		/*
3412		 * Set the interrupt throttling rate. Value is calculated
3413		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3414		 */
3415#define MAX_INTS_PER_SEC	8000
3416#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3417		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3418	}
3419
3420	/* Setup the Base and Length of the Rx Descriptor Ring */
3421	bus_addr = adapter->rxdma.dma_paddr;
3422	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3423			sizeof(struct em_rx_desc));
3424	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3425	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3426
3427	/* Setup the Receive Control Register */
3428	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3429		   E1000_RCTL_RDMTS_HALF |
3430		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3431
3432	if (adapter->hw.tbi_compatibility_on == TRUE)
3433		reg_rctl |= E1000_RCTL_SBP;
3434
3435
3436	switch (adapter->rx_buffer_len) {
3437	default:
3438	case EM_RXBUFFER_2048:
3439		reg_rctl |= E1000_RCTL_SZ_2048;
3440		break;
3441	case EM_RXBUFFER_4096:
3442		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3443		break;
3444	case EM_RXBUFFER_8192:
3445		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3446		break;
3447	case EM_RXBUFFER_16384:
3448		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3449		break;
3450	}
3451
3452	if (ifp->if_mtu > ETHERMTU)
3453		reg_rctl |= E1000_RCTL_LPE;
3454
3455	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3456	if ((adapter->hw.mac_type >= em_82543) &&
3457	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3458		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3459		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3460		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3461	}
3462
3463	/* Enable Receives */
3464	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3465
3466	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3467	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3468	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3469}
3470
3471/*********************************************************************
3472 *
3473 *  Free receive related data structures.
3474 *
3475 **********************************************************************/
3476static void
3477em_free_receive_structures(struct adapter *adapter)
3478{
3479	struct em_buffer *rx_buffer;
3480	int i;
3481
3482	INIT_DEBUGOUT("free_receive_structures: begin");
3483
3484	if (adapter->rx_sparemap) {
3485		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3486		adapter->rx_sparemap = NULL;
3487	}
3488	if (adapter->rx_buffer_area != NULL) {
3489		rx_buffer = adapter->rx_buffer_area;
3490		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3491			if (rx_buffer->m_head != NULL) {
3492				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3493				    BUS_DMASYNC_POSTREAD);
3494				bus_dmamap_unload(adapter->rxtag,
3495				    rx_buffer->map);
3496				m_freem(rx_buffer->m_head);
3497				rx_buffer->m_head = NULL;
3498			} else if (rx_buffer->map != NULL)
3499				bus_dmamap_unload(adapter->rxtag,
3500				    rx_buffer->map);
3501			if (rx_buffer->map != NULL) {
3502				bus_dmamap_destroy(adapter->rxtag,
3503				    rx_buffer->map);
3504				rx_buffer->map = NULL;
3505			}
3506		}
3507	}
3508	if (adapter->rx_buffer_area != NULL) {
3509		free(adapter->rx_buffer_area, M_DEVBUF);
3510		adapter->rx_buffer_area = NULL;
3511	}
3512	if (adapter->rxtag != NULL) {
3513		bus_dma_tag_destroy(adapter->rxtag);
3514		adapter->rxtag = NULL;
3515	}
3516}
3517
3518/*********************************************************************
3519 *
3520 *  This routine executes in interrupt context. It replenishes
3521 *  the mbufs in the descriptor and sends data which has been
3522 *  dma'ed into host memory to upper layer.
3523 *
3524 *  We loop at most count times if count is > 0, or until done if
3525 *  count < 0.
3526 *
3527 *********************************************************************/
3528static int
3529em_rxeof(struct adapter *adapter, int count)
3530{
3531	struct ifnet	*ifp;
3532	struct mbuf	*mp;
3533	uint8_t		accept_frame = 0;
3534	uint8_t		eop = 0;
3535	uint16_t 	len, desc_len, prev_len_adj;
3536	int		i;
3537
3538	/* Pointer to the receive descriptor being examined. */
3539	struct em_rx_desc   *current_desc;
3540	uint8_t		status;
3541
3542	ifp = adapter->ifp;
3543	i = adapter->next_rx_desc_to_check;
3544	current_desc = &adapter->rx_desc_base[i];
3545	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3546	    BUS_DMASYNC_POSTREAD);
3547
3548	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3549		return (0);
3550
3551	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3552	    (count != 0) &&
3553	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3554		struct mbuf *m = NULL;
3555
3556		mp = adapter->rx_buffer_area[i].m_head;
3557		/*
3558		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3559		 * needs to access the last received byte in the mbuf.
3560		 */
3561		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3562		    BUS_DMASYNC_POSTREAD);
3563
3564		accept_frame = 1;
3565		prev_len_adj = 0;
3566		desc_len = le16toh(current_desc->length);
3567		status = current_desc->status;
3568		if (status & E1000_RXD_STAT_EOP) {
3569			count--;
3570			eop = 1;
3571			if (desc_len < ETHER_CRC_LEN) {
3572				len = 0;
3573				prev_len_adj = ETHER_CRC_LEN - desc_len;
3574			} else
3575				len = desc_len - ETHER_CRC_LEN;
3576		} else {
3577			eop = 0;
3578			len = desc_len;
3579		}
3580
3581		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3582			uint8_t		last_byte;
3583			uint32_t	pkt_len = desc_len;
3584
3585			if (adapter->fmp != NULL)
3586				pkt_len += adapter->fmp->m_pkthdr.len;
3587
3588			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3589			if (TBI_ACCEPT(&adapter->hw, status,
3590			    current_desc->errors, pkt_len, last_byte)) {
3591				em_tbi_adjust_stats(&adapter->hw,
3592				    &adapter->stats, pkt_len,
3593				    adapter->hw.mac_addr);
3594				if (len > 0)
3595					len--;
3596			} else
3597				accept_frame = 0;
3598		}
3599
3600		if (accept_frame) {
3601			if (em_get_buf(adapter, i) != 0) {
3602				ifp->if_iqdrops++;
3603				goto discard;
3604			}
3605
3606			/* Assign correct length to the current fragment */
3607			mp->m_len = len;
3608
3609			if (adapter->fmp == NULL) {
3610				mp->m_pkthdr.len = len;
3611				adapter->fmp = mp; /* Store the first mbuf */
3612				adapter->lmp = mp;
3613			} else {
3614				/* Chain mbuf's together */
3615				mp->m_flags &= ~M_PKTHDR;
3616				/*
3617				 * Adjust length of previous mbuf in chain if
3618				 * we received less than 4 bytes in the last
3619				 * descriptor.
3620				 */
3621				if (prev_len_adj > 0) {
3622					adapter->lmp->m_len -= prev_len_adj;
3623					adapter->fmp->m_pkthdr.len -=
3624					    prev_len_adj;
3625				}
3626				adapter->lmp->m_next = mp;
3627				adapter->lmp = adapter->lmp->m_next;
3628				adapter->fmp->m_pkthdr.len += len;
3629			}
3630
3631			if (eop) {
3632				adapter->fmp->m_pkthdr.rcvif = ifp;
3633				ifp->if_ipackets++;
3634				em_receive_checksum(adapter, current_desc,
3635				    adapter->fmp);
3636#ifndef __NO_STRICT_ALIGNMENT
3637				if (adapter->hw.max_frame_size >
3638				    (MCLBYTES - ETHER_ALIGN) &&
3639				    em_fixup_rx(adapter) != 0)
3640					goto skip;
3641#endif
3642				if (status & E1000_RXD_STAT_VP) {
3643					adapter->fmp->m_pkthdr.ether_vtag =
3644					    (le16toh(current_desc->special) &
3645					    E1000_RXD_SPC_VLAN_MASK);
3646					adapter->fmp->m_flags |= M_VLANTAG;
3647				}
3648#ifndef __NO_STRICT_ALIGNMENT
3649skip:
3650#endif
3651				m = adapter->fmp;
3652				adapter->fmp = NULL;
3653				adapter->lmp = NULL;
3654			}
3655		} else {
3656			ifp->if_ierrors++;
3657discard:
3658			/* Reuse loaded DMA map and just update mbuf chain */
3659			mp = adapter->rx_buffer_area[i].m_head;
3660			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3661			mp->m_data = mp->m_ext.ext_buf;
3662			mp->m_next = NULL;
3663			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3664				m_adj(mp, ETHER_ALIGN);
3665			if (adapter->fmp != NULL) {
3666				m_freem(adapter->fmp);
3667				adapter->fmp = NULL;
3668				adapter->lmp = NULL;
3669			}
3670			m = NULL;
3671		}
3672
3673		/* Zero out the receive descriptors status. */
3674		current_desc->status = 0;
3675		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3676		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3677
3678		/* Advance our pointers to the next descriptor. */
3679		if (++i == adapter->num_rx_desc)
3680			i = 0;
3681		if (m != NULL) {
3682			adapter->next_rx_desc_to_check = i;
3683#ifdef DEVICE_POLLING
3684			EM_UNLOCK(adapter);
3685			(*ifp->if_input)(ifp, m);
3686			EM_LOCK(adapter);
3687#else
3688			(*ifp->if_input)(ifp, m);
3689#endif
3690			i = adapter->next_rx_desc_to_check;
3691		}
3692		current_desc = &adapter->rx_desc_base[i];
3693	}
3694	adapter->next_rx_desc_to_check = i;
3695
3696	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3697	if (--i < 0)
3698		i = adapter->num_rx_desc - 1;
3699	E1000_WRITE_REG(&adapter->hw, RDT, i);
3700	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3701		return (0);
3702
3703	return (1);
3704}
3705
3706#ifndef __NO_STRICT_ALIGNMENT
3707/*
3708 * When jumbo frames are enabled we should realign entire payload on
3709 * architecures with strict alignment. This is serious design mistake of 8254x
3710 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3711 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3712 * payload. On architecures without strict alignment restrictions 8254x still
3713 * performs unaligned memory access which would reduce the performance too.
3714 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3715 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3716 * existing mbuf chain.
3717 *
3718 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3719 * not used at all on architectures with strict alignment.
3720 */
3721static int
3722em_fixup_rx(struct adapter *adapter)
3723{
3724	struct mbuf *m, *n;
3725	int error;
3726
3727	error = 0;
3728	m = adapter->fmp;
3729	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3730		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3731		m->m_data += ETHER_HDR_LEN;
3732	} else {
3733		MGETHDR(n, M_DONTWAIT, MT_DATA);
3734		if (n != NULL) {
3735			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3736			m->m_data += ETHER_HDR_LEN;
3737			m->m_len -= ETHER_HDR_LEN;
3738			n->m_len = ETHER_HDR_LEN;
3739			M_MOVE_PKTHDR(n, m);
3740			n->m_next = m;
3741			adapter->fmp = n;
3742		} else {
3743			adapter->ifp->if_iqdrops++;
3744			adapter->mbuf_alloc_failed++;
3745			m_freem(adapter->fmp);
3746			adapter->fmp = NULL;
3747			adapter->lmp = NULL;
3748			error = ENOBUFS;
3749		}
3750	}
3751
3752	return (error);
3753}
3754#endif
3755
3756/*********************************************************************
3757 *
3758 *  Verify that the hardware indicated that the checksum is valid.
3759 *  Inform the stack about the status of checksum so that stack
3760 *  doesn't spend time verifying the checksum.
3761 *
3762 *********************************************************************/
3763static void
3764em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3765		    struct mbuf *mp)
3766{
3767	/* 82543 or newer only */
3768	if ((adapter->hw.mac_type < em_82543) ||
3769	    /* Ignore Checksum bit is set */
3770	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3771		mp->m_pkthdr.csum_flags = 0;
3772		return;
3773	}
3774
3775	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3776		/* Did it pass? */
3777		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3778			/* IP Checksum Good */
3779			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3780			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3781
3782		} else {
3783			mp->m_pkthdr.csum_flags = 0;
3784		}
3785	}
3786
3787	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3788		/* Did it pass? */
3789		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3790			mp->m_pkthdr.csum_flags |=
3791			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3792			mp->m_pkthdr.csum_data = htons(0xffff);
3793		}
3794	}
3795}
3796
3797
3798static void
3799em_enable_vlans(struct adapter *adapter)
3800{
3801	uint32_t ctrl;
3802
3803	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3804
3805	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3806	ctrl |= E1000_CTRL_VME;
3807	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3808}
3809
3810static void
3811em_disable_vlans(struct adapter *adapter)
3812{
3813	uint32_t ctrl;
3814
3815	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3816	ctrl &= ~E1000_CTRL_VME;
3817	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3818}
3819
3820static void
3821em_enable_intr(struct adapter *adapter)
3822{
3823	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3824}
3825
3826static void
3827em_disable_intr(struct adapter *adapter)
3828{
3829	/*
3830	 * The first version of 82542 had an errata where when link was forced
3831	 * it would stay up even up even if the cable was disconnected.
3832	 * Sequence errors were used to detect the disconnect and then the
3833	 * driver would unforce the link. This code in the in the ISR. For this
3834	 * to work correctly the Sequence error interrupt had to be enabled
3835	 * all the time.
3836	 */
3837
3838	if (adapter->hw.mac_type == em_82542_rev2_0)
3839	    E1000_WRITE_REG(&adapter->hw, IMC,
3840		(0xffffffff & ~E1000_IMC_RXSEQ));
3841	else
3842	    E1000_WRITE_REG(&adapter->hw, IMC,
3843		0xffffffff);
3844}
3845
3846static int
3847em_is_valid_ether_addr(uint8_t *addr)
3848{
3849	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3850
3851	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3852		return (FALSE);
3853	}
3854
3855	return (TRUE);
3856}
3857
3858void
3859em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3860{
3861	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3862}
3863
3864void
3865em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3866{
3867	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3868}
3869
3870void
3871em_pci_set_mwi(struct em_hw *hw)
3872{
3873	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3874	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3875}
3876
3877void
3878em_pci_clear_mwi(struct em_hw *hw)
3879{
3880	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3881	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3882}
3883
3884/*
3885 * We may eventually really do this, but its unnecessary
3886 * for now so we just return unsupported.
3887 */
3888int32_t
3889em_read_pcie_cap_reg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3890{
3891	return (0);
3892}
3893
3894/*********************************************************************
3895* 82544 Coexistence issue workaround.
3896*    There are 2 issues.
3897*       1. Transmit Hang issue.
3898*    To detect this issue, following equation can be used...
3899*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3900*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3901*
3902*       2. DAC issue.
3903*    To detect this issue, following equation can be used...
3904*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3905*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3906*
3907*
3908*    WORKAROUND:
3909*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3910*
3911*** *********************************************************************/
3912static uint32_t
3913em_fill_descriptors (bus_addr_t address, uint32_t length,
3914		PDESC_ARRAY desc_array)
3915{
3916	/* Since issue is sensitive to length and address.*/
3917	/* Let us first check the address...*/
3918	uint32_t safe_terminator;
3919	if (length <= 4) {
3920		desc_array->descriptor[0].address = address;
3921		desc_array->descriptor[0].length = length;
3922		desc_array->elements = 1;
3923		return (desc_array->elements);
3924	}
3925	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3926	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3927	if (safe_terminator == 0   ||
3928	(safe_terminator > 4   &&
3929	safe_terminator < 9)   ||
3930	(safe_terminator > 0xC &&
3931	safe_terminator <= 0xF)) {
3932		desc_array->descriptor[0].address = address;
3933		desc_array->descriptor[0].length = length;
3934		desc_array->elements = 1;
3935		return (desc_array->elements);
3936	}
3937
3938	desc_array->descriptor[0].address = address;
3939	desc_array->descriptor[0].length = length - 4;
3940	desc_array->descriptor[1].address = address + (length - 4);
3941	desc_array->descriptor[1].length = 4;
3942	desc_array->elements = 2;
3943	return (desc_array->elements);
3944}
3945
3946/**********************************************************************
3947 *
3948 *  Update the board statistics counters.
3949 *
3950 **********************************************************************/
3951static void
3952em_update_stats_counters(struct adapter *adapter)
3953{
3954	struct ifnet   *ifp;
3955
3956	if(adapter->hw.media_type == em_media_type_copper ||
3957	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3958		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3959		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3960	}
3961	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3962	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3963	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3964	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3965
3966	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3967	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3968	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3969	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3970	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3971	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3972	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3973	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3974	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3975	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3976	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3977	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3978	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3979	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3980	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3981	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3982	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3983	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3984	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3985	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3986
3987	/* For the 64-bit byte counters the low dword must be read first. */
3988	/* Both registers clear on the read of the high dword */
3989
3990	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3991	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3992	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3993	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3994
3995	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3996	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3997	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3998	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3999	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
4000
4001	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
4002	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
4003	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
4004	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
4005
4006	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
4007	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
4008	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
4009	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
4010	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
4011	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
4012	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
4013	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
4014	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
4015	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
4016
4017	if (adapter->hw.mac_type >= em_82543) {
4018		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
4019		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
4020		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
4021		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
4022		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
4023		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
4024	}
4025	ifp = adapter->ifp;
4026
4027	ifp->if_collisions = adapter->stats.colc;
4028
4029	/* Rx Errors */
4030	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
4031	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
4032	    adapter->stats.mpc + adapter->stats.cexterr;
4033
4034	/* Tx Errors */
4035	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
4036	    adapter->watchdog_events;
4037}
4038
4039
4040/**********************************************************************
4041 *
4042 *  This routine is called only when em_display_debug_stats is enabled.
4043 *  This routine provides a way to take a look at important statistics
4044 *  maintained by the driver and hardware.
4045 *
4046 **********************************************************************/
4047static void
4048em_print_debug_info(struct adapter *adapter)
4049{
4050	device_t dev = adapter->dev;
4051	uint8_t *hw_addr = adapter->hw.hw_addr;
4052
4053	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4054	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4055	    E1000_READ_REG(&adapter->hw, CTRL),
4056	    E1000_READ_REG(&adapter->hw, RCTL));
4057	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4058	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
4059	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
4060	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4061	    adapter->hw.fc_high_water,
4062	    adapter->hw.fc_low_water);
4063	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4064	    E1000_READ_REG(&adapter->hw, TIDV),
4065	    E1000_READ_REG(&adapter->hw, TADV));
4066	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4067	    E1000_READ_REG(&adapter->hw, RDTR),
4068	    E1000_READ_REG(&adapter->hw, RADV));
4069	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4070	    (long long)adapter->tx_fifo_wrk_cnt,
4071	    (long long)adapter->tx_fifo_reset_cnt);
4072	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4073	    E1000_READ_REG(&adapter->hw, TDH),
4074	    E1000_READ_REG(&adapter->hw, TDT));
4075	device_printf(dev, "Num Tx descriptors avail = %d\n",
4076	    adapter->num_tx_desc_avail);
4077	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4078	    adapter->no_tx_desc_avail1);
4079	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4080	    adapter->no_tx_desc_avail2);
4081	device_printf(dev, "Std mbuf failed = %ld\n",
4082	    adapter->mbuf_alloc_failed);
4083	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4084	    adapter->mbuf_cluster_failed);
4085}
4086
4087static void
4088em_print_hw_stats(struct adapter *adapter)
4089{
4090	device_t dev = adapter->dev;
4091
4092	device_printf(dev, "Excessive collisions = %lld\n",
4093	    (long long)adapter->stats.ecol);
4094	device_printf(dev, "Symbol errors = %lld\n",
4095	    (long long)adapter->stats.symerrs);
4096	device_printf(dev, "Sequence errors = %lld\n",
4097	    (long long)adapter->stats.sec);
4098	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
4099
4100	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
4101	device_printf(dev, "Receive No Buffers = %lld\n",
4102	    (long long)adapter->stats.rnbc);
4103	/* RLEC is inaccurate on some hardware, calculate our own. */
4104	device_printf(dev, "Receive Length Errors = %lld\n",
4105	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4106	device_printf(dev, "Receive errors = %lld\n",
4107	    (long long)adapter->stats.rxerrc);
4108	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
4109	device_printf(dev, "Alignment errors = %lld\n",
4110	    (long long)adapter->stats.algnerrc);
4111	device_printf(dev, "Carrier extension errors = %lld\n",
4112	    (long long)adapter->stats.cexterr);
4113	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4114	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
4115
4116	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
4117	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
4118	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
4119	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
4120
4121	device_printf(dev, "Good Packets Rcvd = %lld\n",
4122	    (long long)adapter->stats.gprc);
4123	device_printf(dev, "Good Packets Xmtd = %lld\n",
4124	    (long long)adapter->stats.gptc);
4125	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4126	    (long long)adapter->stats.tsctc);
4127	device_printf(dev, "TSO Contexts Failed = %lld\n",
4128	    (long long)adapter->stats.tsctfc);
4129}
4130
4131static int
4132em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4133{
4134	struct adapter *adapter;
4135	int error;
4136	int result;
4137
4138	result = -1;
4139	error = sysctl_handle_int(oidp, &result, 0, req);
4140
4141	if (error || !req->newptr)
4142		return (error);
4143
4144	if (result == 1) {
4145		adapter = (struct adapter *)arg1;
4146		em_print_debug_info(adapter);
4147	}
4148
4149	return (error);
4150}
4151
4152
4153static int
4154em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4155{
4156	struct adapter *adapter;
4157	int error;
4158	int result;
4159
4160	result = -1;
4161	error = sysctl_handle_int(oidp, &result, 0, req);
4162
4163	if (error || !req->newptr)
4164		return (error);
4165
4166	if (result == 1) {
4167		adapter = (struct adapter *)arg1;
4168		em_print_hw_stats(adapter);
4169	}
4170
4171	return (error);
4172}
4173
4174static int
4175em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4176{
4177	struct em_int_delay_info *info;
4178	struct adapter *adapter;
4179	uint32_t regval;
4180	int error;
4181	int usecs;
4182	int ticks;
4183
4184	info = (struct em_int_delay_info *)arg1;
4185	usecs = info->value;
4186	error = sysctl_handle_int(oidp, &usecs, 0, req);
4187	if (error != 0 || req->newptr == NULL)
4188		return (error);
4189	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
4190		return (EINVAL);
4191	info->value = usecs;
4192	ticks = E1000_USECS_TO_TICKS(usecs);
4193
4194	adapter = info->adapter;
4195
4196	EM_LOCK(adapter);
4197	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4198	regval = (regval & ~0xffff) | (ticks & 0xffff);
4199	/* Handle a few special cases. */
4200	switch (info->offset) {
4201	case E1000_RDTR:
4202	case E1000_82542_RDTR:
4203		regval |= E1000_RDT_FPDB;
4204		break;
4205	case E1000_TIDV:
4206	case E1000_82542_TIDV:
4207		if (ticks == 0) {
4208			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4209			/* Don't write 0 into the TIDV register. */
4210			regval++;
4211		} else
4212			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4213		break;
4214	}
4215	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4216	EM_UNLOCK(adapter);
4217	return (0);
4218}
4219
4220static void
4221em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4222	const char *description, struct em_int_delay_info *info,
4223	int offset, int value)
4224{
4225	info->adapter = adapter;
4226	info->offset = offset;
4227	info->value = value;
4228	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4229	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4230	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4231	    info, 0, em_sysctl_int_delay, "I", description);
4232}
4233
4234#ifndef DEVICE_POLLING
4235static void
4236em_add_int_process_limit(struct adapter *adapter, const char *name,
4237	const char *description, int *limit, int value)
4238{
4239	*limit = value;
4240	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4241	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4242	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4243}
4244#endif
4245