if_em.c revision 162783
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162783 2006-09-29 13:17:16Z andre $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78#include <dev/em/if_em_hw.h>
79#include <dev/em/if_em.h>
80
81/*********************************************************************
82 *  Set this to one to display debug statistics
83 *********************************************************************/
84int	em_display_debug_stats = 0;
85
86/*********************************************************************
87 *  Driver version
88 *********************************************************************/
89
90char em_driver_version[] = "Version - 6.1.4 - TSO";
91
92
93/*********************************************************************
94 *  PCI Device ID Table
95 *
96 *  Used by probe to select devices to load on
97 *  Last field stores an index into em_strings
98 *  Last entry must be all 0s
99 *
100 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
101 *********************************************************************/
102
103static em_vendor_info_t em_vendor_info_array[] =
104{
105	/* Intel(R) PRO/1000 Network Connection */
106	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
111
112	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
119
120	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
124
125	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
129
130	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146
147	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150
151	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
155						PCI_ANY_ID, PCI_ANY_ID, 0},
156
157	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
161
162	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
166						PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
168						PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
170						PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
172						PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
176
177	/* required last entry */
178	{ 0, 0, 0, 0, 0}
179};
180
181/*********************************************************************
182 *  Table of branding strings for all supported NICs.
183 *********************************************************************/
184
185static char *em_strings[] = {
186	"Intel(R) PRO/1000 Network Connection"
187};
188
189/*********************************************************************
190 *  Function prototypes
191 *********************************************************************/
192static int	em_probe(device_t);
193static int	em_attach(device_t);
194static int	em_detach(device_t);
195static int	em_shutdown(device_t);
196static int	em_suspend(device_t);
197static int	em_resume(device_t);
198static void	em_start(struct ifnet *);
199static void	em_start_locked(struct ifnet *ifp);
200static int	em_ioctl(struct ifnet *, u_long, caddr_t);
201static void	em_watchdog(struct ifnet *);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_intr(struct adapter *);
210static void	em_free_intr(struct adapter *);
211static void	em_free_pci_resources(struct adapter *);
212static void	em_local_timer(void *);
213static int	em_hardware_init(struct adapter *);
214static void	em_setup_interface(device_t, struct adapter *);
215static int	em_setup_transmit_structures(struct adapter *);
216static void	em_initialize_transmit_unit(struct adapter *);
217static int	em_setup_receive_structures(struct adapter *);
218static void	em_initialize_receive_unit(struct adapter *);
219static void	em_enable_intr(struct adapter *);
220static void	em_disable_intr(struct adapter *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_receive_structures(struct adapter *);
223static void	em_update_stats_counters(struct adapter *);
224static void	em_txeof(struct adapter *);
225static int	em_allocate_receive_structures(struct adapter *);
226static int	em_allocate_transmit_structures(struct adapter *);
227static int	em_rxeof(struct adapter *, int);
228#ifndef __NO_STRICT_ALIGNMENT
229static int	em_fixup_rx(struct adapter *);
230#endif
231static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
232		    struct mbuf *);
233static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
234		    uint32_t *, uint32_t *);
235static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
236		    uint32_t *, uint32_t *);
237static void	em_set_promisc(struct adapter *);
238static void	em_disable_promisc(struct adapter *);
239static void	em_set_multi(struct adapter *);
240static void	em_print_hw_stats(struct adapter *);
241static void	em_update_link_status(struct adapter *);
242static int	em_get_buf(struct adapter *, int);
243static void	em_enable_vlans(struct adapter *);
244static void	em_disable_vlans(struct adapter *);
245static int	em_encap(struct adapter *, struct mbuf **);
246static void	em_smartspeed(struct adapter *);
247static int	em_82547_fifo_workaround(struct adapter *, int);
248static void	em_82547_update_fifo_head(struct adapter *, int);
249static int	em_82547_tx_fifo_reset(struct adapter *);
250static void	em_82547_move_tail(void *arg);
251static void	em_82547_move_tail_locked(struct adapter *);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static int 	em_is_valid_ether_addr(uint8_t *);
257static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
258static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
259static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
260		    PDESC_ARRAY desc_array);
261static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
262static void	em_add_int_delay_sysctl(struct adapter *, const char *,
263		const char *, struct em_int_delay_info *, int, int);
264
265/*
266 * Fast interrupt handler and legacy ithread/polling modes are
267 * mutually exclusive.
268 */
269#ifdef DEVICE_POLLING
270static poll_handler_t em_poll;
271static void	em_intr(void *);
272#else
273static void	em_intr_fast(void *);
274static void	em_add_int_process_limit(struct adapter *, const char *,
275		const char *, int *, int);
276static void	em_handle_rxtx(void *context, int pending);
277static void	em_handle_link(void *context, int pending);
278#endif
279
280/*********************************************************************
281 *  FreeBSD Device Interface Entry Points
282 *********************************************************************/
283
284static device_method_t em_methods[] = {
285	/* Device interface */
286	DEVMETHOD(device_probe, em_probe),
287	DEVMETHOD(device_attach, em_attach),
288	DEVMETHOD(device_detach, em_detach),
289	DEVMETHOD(device_shutdown, em_shutdown),
290	DEVMETHOD(device_suspend, em_suspend),
291	DEVMETHOD(device_resume, em_resume),
292	{0, 0}
293};
294
295static driver_t em_driver = {
296	"em", em_methods, sizeof(struct adapter),
297};
298
299static devclass_t em_devclass;
300DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
301MODULE_DEPEND(em, pci, 1, 1, 1);
302MODULE_DEPEND(em, ether, 1, 1, 1);
303
304/*********************************************************************
305 *  Tunable default values.
306 *********************************************************************/
307
308#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
309#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
310#define M_TSO_LEN			66
311
312static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
313static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
314static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
315static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
316static int em_rxd = EM_DEFAULT_RXD;
317static int em_txd = EM_DEFAULT_TXD;
318static int em_smart_pwr_down = FALSE;
319
320TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
322TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
324TUNABLE_INT("hw.em.rxd", &em_rxd);
325TUNABLE_INT("hw.em.txd", &em_txd);
326TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
327#ifndef DEVICE_POLLING
328static int em_rx_process_limit = 100;
329TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
330#endif
331
332/*********************************************************************
333 *  Device identification routine
334 *
335 *  em_probe determines if the driver should be loaded on
336 *  adapter based on PCI vendor/device id of the adapter.
337 *
338 *  return BUS_PROBE_DEFAULT on success, positive on failure
339 *********************************************************************/
340
341static int
342em_probe(device_t dev)
343{
344	char		adapter_name[60];
345	uint16_t	pci_vendor_id = 0;
346	uint16_t	pci_device_id = 0;
347	uint16_t	pci_subvendor_id = 0;
348	uint16_t	pci_subdevice_id = 0;
349	em_vendor_info_t *ent;
350
351	INIT_DEBUGOUT("em_probe: begin");
352
353	pci_vendor_id = pci_get_vendor(dev);
354	if (pci_vendor_id != EM_VENDOR_ID)
355		return (ENXIO);
356
357	pci_device_id = pci_get_device(dev);
358	pci_subvendor_id = pci_get_subvendor(dev);
359	pci_subdevice_id = pci_get_subdevice(dev);
360
361	ent = em_vendor_info_array;
362	while (ent->vendor_id != 0) {
363		if ((pci_vendor_id == ent->vendor_id) &&
364		    (pci_device_id == ent->device_id) &&
365
366		    ((pci_subvendor_id == ent->subvendor_id) ||
367		    (ent->subvendor_id == PCI_ANY_ID)) &&
368
369		    ((pci_subdevice_id == ent->subdevice_id) ||
370		    (ent->subdevice_id == PCI_ANY_ID))) {
371			sprintf(adapter_name, "%s %s",
372				em_strings[ent->index],
373				em_driver_version);
374			device_set_desc_copy(dev, adapter_name);
375			return (BUS_PROBE_DEFAULT);
376		}
377		ent++;
378	}
379
380	return (ENXIO);
381}
382
383/*********************************************************************
384 *  Device initialization routine
385 *
386 *  The attach entry point is called when the driver is being loaded.
387 *  This routine identifies the type of hardware, allocates all resources
388 *  and initializes the hardware.
389 *
390 *  return 0 on success, positive on failure
391 *********************************************************************/
392
393static int
394em_attach(device_t dev)
395{
396	struct adapter	*adapter;
397	int		tsize, rsize;
398	int		error = 0;
399
400	INIT_DEBUGOUT("em_attach: begin");
401
402	adapter = device_get_softc(dev);
403	adapter->dev = adapter->osdep.dev = dev;
404	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
405
406	/* SYSCTL stuff */
407	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
409	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
410	    em_sysctl_debug_info, "I", "Debug Information");
411
412	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
415	    em_sysctl_stats, "I", "Statistics");
416
417	callout_init(&adapter->timer, CALLOUT_MPSAFE);
418	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
419
420	/* Determine hardware revision */
421	em_identify_hardware(adapter);
422
423	/* Set up some sysctls for the tunable interrupt delays */
424	em_add_int_delay_sysctl(adapter, "rx_int_delay",
425	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
426	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
427	em_add_int_delay_sysctl(adapter, "tx_int_delay",
428	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
429	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
430	if (adapter->hw.mac_type >= em_82540) {
431		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
432		    "receive interrupt delay limit in usecs",
433		    &adapter->rx_abs_int_delay,
434		    E1000_REG_OFFSET(&adapter->hw, RADV),
435		    em_rx_abs_int_delay_dflt);
436		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
437		    "transmit interrupt delay limit in usecs",
438		    &adapter->tx_abs_int_delay,
439		    E1000_REG_OFFSET(&adapter->hw, TADV),
440		    em_tx_abs_int_delay_dflt);
441	}
442
443#ifndef DEVICE_POLLING
444	/* Sysctls for limiting the amount of work done in the taskqueue */
445	em_add_int_process_limit(adapter, "rx_processing_limit",
446	    "max number of rx packets to process", &adapter->rx_process_limit,
447	    em_rx_process_limit);
448#endif
449
450	/*
451	 * Validate number of transmit and receive descriptors. It
452	 * must not exceed hardware maximum, and must be multiple
453	 * of EM_DBA_ALIGN.
454	 */
455	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
456	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
457	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
458	    (em_txd < EM_MIN_TXD)) {
459		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
460		    EM_DEFAULT_TXD, em_txd);
461		adapter->num_tx_desc = EM_DEFAULT_TXD;
462	} else
463		adapter->num_tx_desc = em_txd;
464	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
465	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
466	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
467	    (em_rxd < EM_MIN_RXD)) {
468		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
469		    EM_DEFAULT_RXD, em_rxd);
470		adapter->num_rx_desc = EM_DEFAULT_RXD;
471	} else
472		adapter->num_rx_desc = em_rxd;
473
474	adapter->hw.autoneg = DO_AUTO_NEG;
475	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
476	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
477	adapter->hw.tbi_compatibility_en = TRUE;
478	adapter->rx_buffer_len = EM_RXBUFFER_2048;
479
480	adapter->hw.phy_init_script = 1;
481	adapter->hw.phy_reset_disable = FALSE;
482
483#ifndef EM_MASTER_SLAVE
484	adapter->hw.master_slave = em_ms_hw_default;
485#else
486	adapter->hw.master_slave = EM_MASTER_SLAVE;
487#endif
488	/*
489	 * Set the max frame size assuming standard ethernet
490	 * sized frames.
491	 */
492	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
493
494	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
495
496	/*
497	 * This controls when hardware reports transmit completion
498	 * status.
499	 */
500	adapter->hw.report_tx_early = 1;
501	if (em_allocate_pci_resources(adapter)) {
502		device_printf(dev, "Allocation of PCI resources failed\n");
503		error = ENXIO;
504		goto err_pci;
505	}
506
507	/* Initialize eeprom parameters */
508	em_init_eeprom_params(&adapter->hw);
509
510	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
511	    EM_DBA_ALIGN);
512
513	/* Allocate Transmit Descriptor ring */
514	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
515		device_printf(dev, "Unable to allocate tx_desc memory\n");
516		error = ENOMEM;
517		goto err_tx_desc;
518	}
519	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
520
521	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
522	    EM_DBA_ALIGN);
523
524	/* Allocate Receive Descriptor ring */
525	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
526		device_printf(dev, "Unable to allocate rx_desc memory\n");
527		error = ENOMEM;
528		goto err_rx_desc;
529	}
530	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
531
532	/* Initialize the hardware */
533	if (em_hardware_init(adapter)) {
534		device_printf(dev, "Unable to initialize the hardware\n");
535		error = EIO;
536		goto err_hw_init;
537	}
538
539	/* Copy the permanent MAC address out of the EEPROM */
540	if (em_read_mac_addr(&adapter->hw) < 0) {
541		device_printf(dev, "EEPROM read error while reading MAC"
542		    " address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
548		device_printf(dev, "Invalid MAC address\n");
549		error = EIO;
550		goto err_hw_init;
551	}
552
553	/* Setup OS specific network interface */
554	em_setup_interface(dev, adapter);
555
556	em_allocate_intr(adapter);
557
558	/* Initialize statistics */
559	em_clear_hw_cntrs(&adapter->hw);
560	em_update_stats_counters(adapter);
561	adapter->hw.get_link_status = 1;
562	em_update_link_status(adapter);
563
564	/* Indicate SOL/IDER usage */
565	if (em_check_phy_reset_block(&adapter->hw))
566		device_printf(dev,
567		    "PHY reset is blocked due to SOL/IDER session.\n");
568
569	/* Identify 82544 on PCIX */
570	em_get_bus_info(&adapter->hw);
571	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
572		adapter->pcix_82544 = TRUE;
573	else
574		adapter->pcix_82544 = FALSE;
575
576	INIT_DEBUGOUT("em_attach: end");
577
578	return (0);
579
580err_hw_init:
581	em_dma_free(adapter, &adapter->rxdma);
582err_rx_desc:
583	em_dma_free(adapter, &adapter->txdma);
584err_tx_desc:
585err_pci:
586	em_free_intr(adapter);
587	em_free_pci_resources(adapter);
588	EM_LOCK_DESTROY(adapter);
589
590	return (error);
591}
592
593/*********************************************************************
594 *  Device removal routine
595 *
596 *  The detach entry point is called when the driver is being removed.
597 *  This routine stops the adapter and deallocates all the resources
598 *  that were allocated for driver operation.
599 *
600 *  return 0 on success, positive on failure
601 *********************************************************************/
602
603static int
604em_detach(device_t dev)
605{
606	struct adapter	*adapter = device_get_softc(dev);
607	struct ifnet	*ifp = adapter->ifp;
608
609	INIT_DEBUGOUT("em_detach: begin");
610
611#ifdef DEVICE_POLLING
612	if (ifp->if_capenable & IFCAP_POLLING)
613		ether_poll_deregister(ifp);
614#endif
615
616	em_free_intr(adapter);
617	EM_LOCK(adapter);
618	adapter->in_detach = 1;
619	em_stop(adapter);
620	em_phy_hw_reset(&adapter->hw);
621	EM_UNLOCK(adapter);
622	ether_ifdetach(adapter->ifp);
623
624	em_free_pci_resources(adapter);
625	bus_generic_detach(dev);
626	if_free(ifp);
627
628	/* Free Transmit Descriptor ring */
629	if (adapter->tx_desc_base) {
630		em_dma_free(adapter, &adapter->txdma);
631		adapter->tx_desc_base = NULL;
632	}
633
634	/* Free Receive Descriptor ring */
635	if (adapter->rx_desc_base) {
636		em_dma_free(adapter, &adapter->rxdma);
637		adapter->rx_desc_base = NULL;
638	}
639
640	EM_LOCK_DESTROY(adapter);
641
642	return (0);
643}
644
645/*********************************************************************
646 *
647 *  Shutdown entry point
648 *
649 **********************************************************************/
650
651static int
652em_shutdown(device_t dev)
653{
654	struct adapter *adapter = device_get_softc(dev);
655	EM_LOCK(adapter);
656	em_stop(adapter);
657	EM_UNLOCK(adapter);
658	return (0);
659}
660
661/*
662 * Suspend/resume device methods.
663 */
664static int
665em_suspend(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668
669	EM_LOCK(adapter);
670	em_stop(adapter);
671	EM_UNLOCK(adapter);
672
673	return bus_generic_suspend(dev);
674}
675
676static int
677em_resume(device_t dev)
678{
679	struct adapter *adapter = device_get_softc(dev);
680	struct ifnet *ifp = adapter->ifp;
681
682	EM_LOCK(adapter);
683	em_init_locked(adapter);
684	if ((ifp->if_flags & IFF_UP) &&
685	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
686		em_start_locked(ifp);
687	EM_UNLOCK(adapter);
688
689	return bus_generic_resume(dev);
690}
691
692
693/*********************************************************************
694 *  Transmit entry point
695 *
696 *  em_start is called by the stack to initiate a transmit.
697 *  The driver will remain in this routine as long as there are
698 *  packets to transmit and transmit resources are available.
699 *  In case resources are not available stack is notified and
700 *  the packet is requeued.
701 **********************************************************************/
702
703static void
704em_start_locked(struct ifnet *ifp)
705{
706	struct adapter	*adapter = ifp->if_softc;
707	struct mbuf	*m_head;
708
709	EM_LOCK_ASSERT(adapter);
710
711	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
712	    IFF_DRV_RUNNING)
713		return;
714	if (!adapter->link_active)
715		return;
716
717	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
718
719		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
720		if (m_head == NULL)
721			break;
722		/*
723		 * em_encap() can modify our pointer, and or make it NULL on
724		 * failure.  In that event, we can't requeue.
725		 */
726		if (em_encap(adapter, &m_head)) {
727			if (m_head == NULL)
728				break;
729			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
730			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
731			break;
732		}
733
734		/* Send a copy of the frame to the BPF listener */
735		BPF_MTAP(ifp, m_head);
736
737		/* Set timeout in case hardware has problems transmitting. */
738		ifp->if_timer = EM_TX_TIMEOUT;
739	}
740}
741
742static void
743em_start(struct ifnet *ifp)
744{
745	struct adapter *adapter = ifp->if_softc;
746
747	EM_LOCK(adapter);
748	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
749		em_start_locked(ifp);
750	EM_UNLOCK(adapter);
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  em_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775	case SIOCGIFADDR:
776		if (ifa->ifa_addr->sa_family == AF_INET) {
777			/*
778			 * XXX
779			 * Since resetting hardware takes a very long time
780			 * and results in link renegotiation we only
781			 * initialize the hardware only when it is absolutely
782			 * required.
783			 */
784			ifp->if_flags |= IFF_UP;
785			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
786				EM_LOCK(adapter);
787				em_init_locked(adapter);
788				EM_UNLOCK(adapter);
789			}
790			arp_ifinit(ifp, ifa);
791		} else
792			error = ether_ioctl(ifp, command, data);
793		break;
794	case SIOCSIFMTU:
795	    {
796		int max_frame_size;
797		uint16_t eeprom_data = 0;
798
799		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
800
801		EM_LOCK(adapter);
802		switch (adapter->hw.mac_type) {
803		case em_82573:
804			/*
805			 * 82573 only supports jumbo frames
806			 * if ASPM is disabled.
807			 */
808			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
809			    &eeprom_data);
810			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
811				max_frame_size = ETHER_MAX_LEN;
812				break;
813			}
814			/* Allow Jumbo frames - fall thru */
815		case em_82571:
816		case em_82572:
817		case em_80003es2lan:	/* Limit Jumbo Frame size */
818			max_frame_size = 9234;
819			break;
820		case em_ich8lan:
821			/* ICH8 does not support jumbo frames */
822			max_frame_size = ETHER_MAX_LEN;
823			break;
824		default:
825			max_frame_size = MAX_JUMBO_FRAME_SIZE;
826		}
827		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
828		    ETHER_CRC_LEN) {
829			EM_UNLOCK(adapter);
830			error = EINVAL;
831			break;
832		}
833
834		ifp->if_mtu = ifr->ifr_mtu;
835		adapter->hw.max_frame_size =
836		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
837		em_init_locked(adapter);
838		EM_UNLOCK(adapter);
839		break;
840	    }
841	case SIOCSIFFLAGS:
842		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
843		EM_LOCK(adapter);
844		if (ifp->if_flags & IFF_UP) {
845			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
846				if ((ifp->if_flags ^ adapter->if_flags) &
847				    IFF_PROMISC) {
848					em_disable_promisc(adapter);
849					em_set_promisc(adapter);
850				}
851			} else
852				em_init_locked(adapter);
853		} else {
854			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
855				em_stop(adapter);
856			}
857		}
858		adapter->if_flags = ifp->if_flags;
859		EM_UNLOCK(adapter);
860		break;
861	case SIOCADDMULTI:
862	case SIOCDELMULTI:
863		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
864		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
865			EM_LOCK(adapter);
866			em_disable_intr(adapter);
867			em_set_multi(adapter);
868			if (adapter->hw.mac_type == em_82542_rev2_0) {
869				em_initialize_receive_unit(adapter);
870			}
871#ifdef DEVICE_POLLING
872			if (!(ifp->if_capenable & IFCAP_POLLING))
873#endif
874				em_enable_intr(adapter);
875			EM_UNLOCK(adapter);
876		}
877		break;
878	case SIOCSIFMEDIA:
879	case SIOCGIFMEDIA:
880		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
881		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
882		break;
883	case SIOCSIFCAP:
884	    {
885		int mask, reinit;
886
887		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
888		reinit = 0;
889		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
890#ifdef DEVICE_POLLING
891		if (mask & IFCAP_POLLING) {
892			if (ifr->ifr_reqcap & IFCAP_POLLING) {
893				error = ether_poll_register(em_poll, ifp);
894				if (error)
895					return (error);
896				EM_LOCK(adapter);
897				em_disable_intr(adapter);
898				ifp->if_capenable |= IFCAP_POLLING;
899				EM_UNLOCK(adapter);
900			} else {
901				error = ether_poll_deregister(ifp);
902				/* Enable interrupt even in error case */
903				EM_LOCK(adapter);
904				em_enable_intr(adapter);
905				ifp->if_capenable &= ~IFCAP_POLLING;
906				EM_UNLOCK(adapter);
907			}
908		}
909#endif
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO4) {
915			ifp->if_capenable ^= IFCAP_TSO4;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
923			em_init(adapter);
924		VLAN_CAPABILITIES(ifp);
925		break;
926	    }
927	default:
928		error = ether_ioctl(ifp, command, data);
929		break;
930	}
931
932	return (error);
933}
934
935/*********************************************************************
936 *  Watchdog entry point
937 *
938 *  This routine is called whenever hardware quits transmitting.
939 *
940 **********************************************************************/
941
942static void
943em_watchdog(struct ifnet *ifp)
944{
945	struct adapter *adapter = ifp->if_softc;
946
947	EM_LOCK(adapter);
948	/* If we are in this routine because of pause frames, then
949	 * don't reset the hardware.
950	 */
951	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
952		ifp->if_timer = EM_TX_TIMEOUT;
953		EM_UNLOCK(adapter);
954		return;
955	}
956
957	/*
958	 * Reclaim first as there is a possibility of losing Tx completion
959	 * interrupts. Possible cause of missing Tx completion interrupts
960	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
961	 * or chipset bug.
962	 */
963	em_txeof(adapter);
964	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
965		EM_UNLOCK(adapter);
966		return;
967	}
968
969	if (em_check_for_link(&adapter->hw) == 0)
970		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
971
972	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
973	adapter->watchdog_events++;
974
975	em_init_locked(adapter);
976	EM_UNLOCK(adapter);
977}
978
979/*********************************************************************
980 *  Init entry point
981 *
982 *  This routine is used in two ways. It is used by the stack as
983 *  init entry point in network interface structure. It is also used
984 *  by the driver as a hw/sw initialization routine to get to a
985 *  consistent state.
986 *
987 *  return 0 on success, positive on failure
988 **********************************************************************/
989
990static void
991em_init_locked(struct adapter *adapter)
992{
993	struct ifnet	*ifp = adapter->ifp;
994	device_t	dev = adapter->dev;
995	uint32_t	pba;
996
997	INIT_DEBUGOUT("em_init: begin");
998
999	EM_LOCK_ASSERT(adapter);
1000
1001	em_stop(adapter);
1002
1003	/*
1004	 * Packet Buffer Allocation (PBA)
1005	 * Writing PBA sets the receive portion of the buffer
1006	 * the remainder is used for the transmit buffer.
1007	 *
1008	 * Devices before the 82547 had a Packet Buffer of 64K.
1009	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1010	 * After the 82547 the buffer was reduced to 40K.
1011	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1012	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1013	 */
1014	switch (adapter->hw.mac_type) {
1015	case em_82547:
1016	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1017		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1019		else
1020			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1021		adapter->tx_fifo_head = 0;
1022		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1023		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1024		break;
1025	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1026	case em_82571: /* 82571: Total Packet Buffer is 48K */
1027	case em_82572: /* 82572: Total Packet Buffer is 48K */
1028			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		break;
1030	case em_82573: /* 82573: Total Packet Buffer is 32K */
1031		/* Jumbo frames not supported */
1032			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1033		break;
1034	case em_ich8lan:
1035		pba = E1000_PBA_8K;
1036		break;
1037	default:
1038		/* Devices before 82547 had a Packet Buffer of 64K.   */
1039		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1040			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1041		else
1042			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1043	}
1044
1045	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1046	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1047
1048	/* Get the latest mac address, User can use a LAA */
1049	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1050
1051	/* Initialize the hardware */
1052	if (em_hardware_init(adapter)) {
1053		device_printf(dev, "Unable to initialize the hardware\n");
1054		return;
1055	}
1056	em_update_link_status(adapter);
1057
1058	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1059		em_enable_vlans(adapter);
1060
1061	ifp->if_hwassist = 0;
1062	if (adapter->hw.mac_type >= em_82543) {
1063		if (ifp->if_capenable & IFCAP_TXCSUM)
1064			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1065		/*
1066		 * em_setup_transmit_structures() will behave differently
1067		 * based on the state of TSO.
1068		 */
1069		if (ifp->if_capenable & IFCAP_TSO)
1070			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1071	}
1072
1073	/* Prepare transmit descriptors and buffers */
1074	if (em_setup_transmit_structures(adapter)) {
1075		device_printf(dev, "Could not setup transmit structures\n");
1076		em_stop(adapter);
1077		return;
1078	}
1079	em_initialize_transmit_unit(adapter);
1080
1081	/* Setup Multicast table */
1082	em_set_multi(adapter);
1083
1084	/* Prepare receive descriptors and buffers */
1085	if (em_setup_receive_structures(adapter)) {
1086		device_printf(dev, "Could not setup receive structures\n");
1087		em_stop(adapter);
1088		return;
1089	}
1090	em_initialize_receive_unit(adapter);
1091
1092	/* Don't lose promiscuous settings */
1093	em_set_promisc(adapter);
1094
1095	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1096	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1097
1098	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1099	em_clear_hw_cntrs(&adapter->hw);
1100#ifdef DEVICE_POLLING
1101	/*
1102	 * Only enable interrupts if we are not polling, make sure
1103	 * they are off otherwise.
1104	 */
1105	if (ifp->if_capenable & IFCAP_POLLING)
1106		em_disable_intr(adapter);
1107	else
1108#endif /* DEVICE_POLLING */
1109		em_enable_intr(adapter);
1110
1111	/* Don't reset the phy next time init gets called */
1112	adapter->hw.phy_reset_disable = TRUE;
1113}
1114
1115static void
1116em_init(void *arg)
1117{
1118	struct adapter *adapter = arg;
1119
1120	EM_LOCK(adapter);
1121	em_init_locked(adapter);
1122	EM_UNLOCK(adapter);
1123}
1124
1125
1126#ifdef DEVICE_POLLING
1127/*********************************************************************
1128 *
1129 *  Legacy polling routine
1130 *
1131 *********************************************************************/
1132static void
1133em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1134{
1135	struct adapter *adapter = ifp->if_softc;
1136	uint32_t reg_icr;
1137
1138	EM_LOCK(adapter);
1139	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1140		EM_UNLOCK(adapter);
1141		return;
1142	}
1143
1144	if (cmd == POLL_AND_CHECK_STATUS) {
1145		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1146		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1147			callout_stop(&adapter->timer);
1148			adapter->hw.get_link_status = 1;
1149			em_check_for_link(&adapter->hw);
1150			em_update_link_status(adapter);
1151			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1152		}
1153	}
1154	em_rxeof(adapter, count);
1155	em_txeof(adapter);
1156
1157	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1158		em_start_locked(ifp);
1159	EM_UNLOCK(adapter);
1160}
1161
1162/*********************************************************************
1163 *
1164 *  Legacy Interrupt Service routine
1165 *
1166 *********************************************************************/
1167static void
1168em_intr(void *arg)
1169{
1170	struct adapter	*adapter = arg;
1171	struct ifnet	*ifp;
1172	uint32_t	reg_icr;
1173
1174	EM_LOCK(adapter);
1175
1176	ifp = adapter->ifp;
1177
1178	if (ifp->if_capenable & IFCAP_POLLING) {
1179		EM_UNLOCK(adapter);
1180		return;
1181	}
1182
1183	for (;;) {
1184		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1185		if (adapter->hw.mac_type >= em_82571 &&
1186		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1187			break;
1188		else if (reg_icr == 0)
1189			break;
1190
1191		/*
1192		 * XXX: some laptops trigger several spurious interrupts
1193		 * on em(4) when in the resume cycle. The ICR register
1194		 * reports all-ones value in this case. Processing such
1195		 * interrupts would lead to a freeze. I don't know why.
1196		 */
1197		if (reg_icr == 0xffffffff)
1198			break;
1199
1200		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1201			em_rxeof(adapter, -1);
1202			em_txeof(adapter);
1203		}
1204
1205		/* Link status change */
1206		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1207			callout_stop(&adapter->timer);
1208			adapter->hw.get_link_status = 1;
1209			em_check_for_link(&adapter->hw);
1210			em_update_link_status(adapter);
1211			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1212		}
1213
1214		if (reg_icr & E1000_ICR_RXO)
1215			adapter->rx_overruns++;
1216	}
1217
1218	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1219	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1220		em_start_locked(ifp);
1221
1222	EM_UNLOCK(adapter);
1223}
1224
1225#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1226
1227static void
1228em_handle_link(void *context, int pending)
1229{
1230	struct adapter	*adapter = context;
1231	struct ifnet *ifp;
1232
1233	ifp = adapter->ifp;
1234
1235	EM_LOCK(adapter);
1236
1237	callout_stop(&adapter->timer);
1238	adapter->hw.get_link_status = 1;
1239	em_check_for_link(&adapter->hw);
1240	em_update_link_status(adapter);
1241	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1242	EM_UNLOCK(adapter);
1243}
1244
1245static void
1246em_handle_rxtx(void *context, int pending)
1247{
1248	struct adapter	*adapter = context;
1249	struct ifnet	*ifp;
1250
1251	NET_LOCK_GIANT();
1252	ifp = adapter->ifp;
1253
1254	/*
1255	 * TODO:
1256	 * It should be possible to run the tx clean loop without the lock.
1257	 */
1258	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1259		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1260			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1261		EM_LOCK(adapter);
1262		em_txeof(adapter);
1263
1264		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1265			em_start_locked(ifp);
1266		EM_UNLOCK(adapter);
1267	}
1268
1269	em_enable_intr(adapter);
1270	NET_UNLOCK_GIANT();
1271}
1272
1273/*********************************************************************
1274 *
1275 *  Fast Interrupt Service routine
1276 *
1277 *********************************************************************/
1278static void
1279em_intr_fast(void *arg)
1280{
1281	struct adapter	*adapter = arg;
1282	struct ifnet	*ifp;
1283	uint32_t	reg_icr;
1284
1285	ifp = adapter->ifp;
1286
1287	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1288
1289	/* Hot eject?  */
1290	if (reg_icr == 0xffffffff)
1291		return;
1292
1293	/* Definitely not our interrupt.  */
1294	if (reg_icr == 0x0)
1295		return;
1296
1297	/*
1298	 * Starting with the 82571 chip, bit 31 should be used to
1299	 * determine whether the interrupt belongs to us.
1300	 */
1301	if (adapter->hw.mac_type >= em_82571 &&
1302	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1303		return;
1304
1305	/*
1306	 * Mask interrupts until the taskqueue is finished running.  This is
1307	 * cheap, just assume that it is needed.  This also works around the
1308	 * MSI message reordering errata on certain systems.
1309	 */
1310	em_disable_intr(adapter);
1311	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1312
1313	/* Link status change */
1314	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1315		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1316
1317	if (reg_icr & E1000_ICR_RXO)
1318		adapter->rx_overruns++;
1319}
1320#endif /* ! DEVICE_POLLING */
1321
1322/*********************************************************************
1323 *
1324 *  Media Ioctl callback
1325 *
1326 *  This routine is called whenever the user queries the status of
1327 *  the interface using ifconfig.
1328 *
1329 **********************************************************************/
1330static void
1331em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1332{
1333	struct adapter *adapter = ifp->if_softc;
1334
1335	INIT_DEBUGOUT("em_media_status: begin");
1336
1337	EM_LOCK(adapter);
1338	em_check_for_link(&adapter->hw);
1339	em_update_link_status(adapter);
1340
1341	ifmr->ifm_status = IFM_AVALID;
1342	ifmr->ifm_active = IFM_ETHER;
1343
1344	if (!adapter->link_active) {
1345		EM_UNLOCK(adapter);
1346		return;
1347	}
1348
1349	ifmr->ifm_status |= IFM_ACTIVE;
1350
1351	if ((adapter->hw.media_type == em_media_type_fiber) ||
1352	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1353		if (adapter->hw.mac_type == em_82545)
1354			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1355		else
1356			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1357	} else {
1358		switch (adapter->link_speed) {
1359		case 10:
1360			ifmr->ifm_active |= IFM_10_T;
1361			break;
1362		case 100:
1363			ifmr->ifm_active |= IFM_100_TX;
1364			break;
1365		case 1000:
1366			ifmr->ifm_active |= IFM_1000_T;
1367			break;
1368		}
1369		if (adapter->link_duplex == FULL_DUPLEX)
1370			ifmr->ifm_active |= IFM_FDX;
1371		else
1372			ifmr->ifm_active |= IFM_HDX;
1373	}
1374	EM_UNLOCK(adapter);
1375}
1376
1377/*********************************************************************
1378 *
1379 *  Media Ioctl callback
1380 *
1381 *  This routine is called when the user changes speed/duplex using
1382 *  media/mediopt option with ifconfig.
1383 *
1384 **********************************************************************/
1385static int
1386em_media_change(struct ifnet *ifp)
1387{
1388	struct adapter *adapter = ifp->if_softc;
1389	struct ifmedia  *ifm = &adapter->media;
1390
1391	INIT_DEBUGOUT("em_media_change: begin");
1392
1393	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1394		return (EINVAL);
1395
1396	EM_LOCK(adapter);
1397	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1398	case IFM_AUTO:
1399		adapter->hw.autoneg = DO_AUTO_NEG;
1400		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1401		break;
1402	case IFM_1000_LX:
1403	case IFM_1000_SX:
1404	case IFM_1000_T:
1405		adapter->hw.autoneg = DO_AUTO_NEG;
1406		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1407		break;
1408	case IFM_100_TX:
1409		adapter->hw.autoneg = FALSE;
1410		adapter->hw.autoneg_advertised = 0;
1411		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1412			adapter->hw.forced_speed_duplex = em_100_full;
1413		else
1414			adapter->hw.forced_speed_duplex = em_100_half;
1415		break;
1416	case IFM_10_T:
1417		adapter->hw.autoneg = FALSE;
1418		adapter->hw.autoneg_advertised = 0;
1419		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1420			adapter->hw.forced_speed_duplex = em_10_full;
1421		else
1422			adapter->hw.forced_speed_duplex = em_10_half;
1423		break;
1424	default:
1425		device_printf(adapter->dev, "Unsupported media type\n");
1426	}
1427
1428	/* As the speed/duplex settings my have changed we need to
1429	 * reset the PHY.
1430	 */
1431	adapter->hw.phy_reset_disable = FALSE;
1432
1433	em_init_locked(adapter);
1434	EM_UNLOCK(adapter);
1435
1436	return (0);
1437}
1438
1439/*********************************************************************
1440 *
1441 *  This routine maps the mbufs to tx descriptors.
1442 *
1443 *  return 0 on success, positive on failure
1444 **********************************************************************/
1445static int
1446em_encap(struct adapter *adapter, struct mbuf **m_headp)
1447{
1448	struct ifnet		*ifp = adapter->ifp;
1449	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1450	bus_dmamap_t		map;
1451	struct em_buffer	*tx_buffer, *tx_buffer_last;
1452	struct em_tx_desc	*current_tx_desc;
1453	struct mbuf		*m_head;
1454	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1455	int			nsegs, i, j;
1456	int			error, do_tso, tso_desc = 0;
1457
1458	m_head = *m_headp;
1459	current_tx_desc = NULL;
1460	txd_upper = txd_lower = txd_used = txd_saved = 0;
1461
1462	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1463
1464	/*
1465	 * Force a cleanup if number of TX descriptors
1466	 * available hits the threshold.
1467	 */
1468	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1469		em_txeof(adapter);
1470		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1471			adapter->no_tx_desc_avail1++;
1472			return (ENOBUFS);
1473		}
1474	}
1475
1476	/*
1477	 * When operating in promiscuous mode, hardware encapsulation for
1478	 * packets is disabled.  This means we have to add the vlan
1479	 * encapsulation in the driver, since it will have come down from the
1480	 * VLAN layer with a tag instead of a VLAN header.
1481	 */
1482	if ((m_head->m_flags & M_VLANTAG) && adapter->em_insert_vlan_header) {
1483		struct ether_vlan_header *evl;
1484		struct ether_header eh;
1485
1486		m_head = m_pullup(m_head, sizeof(eh));
1487		if (m_head == NULL) {
1488			*m_headp = NULL;
1489			return (ENOBUFS);
1490		}
1491		eh = *mtod(m_head, struct ether_header *);
1492		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1493		if (m_head == NULL) {
1494			*m_headp = NULL;
1495			return (ENOBUFS);
1496		}
1497		m_head = m_pullup(m_head, sizeof(*evl));
1498		if (m_head == NULL) {
1499			*m_headp = NULL;
1500			return (ENOBUFS);
1501		}
1502		evl = mtod(m_head, struct ether_vlan_header *);
1503		bcopy(&eh, evl, sizeof(*evl));
1504		evl->evl_proto = evl->evl_encap_proto;
1505		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1506		evl->evl_tag = htons(m_head->m_pkthdr.ether_vtag);
1507		*m_headp = m_head;
1508	}
1509
1510	/*
1511	 * TSO workaround:
1512	 *  If an mbuf is only header we need
1513	 *     to pull 4 bytes of data into it.
1514	 */
1515	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1516		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1517		*m_headp = m_head;
1518		if (m_head == NULL) {
1519			return (ENOBUFS);
1520		}
1521	}
1522
1523	/*
1524	 * Map the packet for DMA.
1525	 */
1526	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1527	tx_buffer_last = tx_buffer;
1528	map = tx_buffer->map;
1529
1530	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1531	    &nsegs, BUS_DMA_NOWAIT);
1532
1533	/*
1534	 * There are two types of errors we can (try) to handle:
1535	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1536	 *   out of segments.  Defragment the mbuf chain and try again.
1537	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1538	 *   at this point in time.  Defer sending and try again later.
1539	 * All other errors, in particular EINVAL, are fatal and prevent the
1540	 * mbuf chain from ever going through.  Drop it and report error.
1541	 */
1542	if (error == EFBIG) {
1543		struct mbuf *m;
1544
1545		m = m_defrag(*m_headp, M_DONTWAIT);
1546		if (m == NULL) {
1547			/* Assume m_defrag(9) used only m_get(9). */
1548			adapter->mbuf_alloc_failed++;
1549			m_freem(*m_headp);
1550			*m_headp = NULL;
1551			return (ENOBUFS);
1552		}
1553		*m_headp = m;
1554
1555		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1556		    segs, &nsegs, BUS_DMA_NOWAIT);
1557
1558		if (error == ENOMEM) {
1559			adapter->no_tx_dma_setup++;
1560			return (error);
1561		} else if (error != 0) {
1562			adapter->no_tx_dma_setup++;
1563			m_freem(*m_headp);
1564			*m_headp = NULL;
1565			return (error);
1566		}
1567	} else if (error == ENOMEM) {
1568		adapter->no_tx_dma_setup++;
1569		return (error);
1570	} else if (error != 0) {
1571		adapter->no_tx_dma_setup++;
1572		m_freem(*m_headp);
1573		*m_headp = NULL;
1574		return (error);
1575	}
1576
1577	/*
1578	 * TSO Hardware workaround, if this packet is not
1579	 * TSO, and is only a single descriptor long, and
1580	 * it follows a TSO burst, then we need to add a
1581	 * sentinel descriptor to prevent premature writeback.
1582	 */
1583	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1584		if (nsegs == 1)
1585			tso_desc = TRUE;
1586		adapter->tx_tso = FALSE;
1587	}
1588
1589	if (nsegs > adapter->num_tx_desc_avail - 2) {
1590		adapter->no_tx_desc_avail2++;
1591		bus_dmamap_unload(adapter->txtag, map);
1592		return (ENOBUFS);
1593	}
1594
1595	/* Do hardware assists */
1596	m_head = *m_headp;
1597	if (ifp->if_hwassist > 0) {
1598		if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1599			/* we need to make a final sentinel transmit desc */
1600			tso_desc = TRUE;
1601		} else
1602			em_transmit_checksum_setup(adapter,  m_head,
1603			    &txd_upper, &txd_lower);
1604	}
1605
1606	i = adapter->next_avail_tx_desc;
1607	if (adapter->pcix_82544)
1608		txd_saved = i;
1609
1610	for (j = 0; j < nsegs; j++) {
1611		bus_size_t seg_len;
1612		bus_addr_t seg_addr;
1613		/* If adapter is 82544 and on PCIX bus. */
1614		if(adapter->pcix_82544) {
1615			DESC_ARRAY	desc_array;
1616			uint32_t	array_elements, counter;
1617
1618			/*
1619			 * Check the Address and Length combination and
1620			 * split the data accordingly
1621			 */
1622			array_elements = em_fill_descriptors(segs[j].ds_addr,
1623			    segs[j].ds_len, &desc_array);
1624			for (counter = 0; counter < array_elements; counter++) {
1625				if (txd_used == adapter->num_tx_desc_avail) {
1626					adapter->next_avail_tx_desc = txd_saved;
1627					adapter->no_tx_desc_avail2++;
1628					bus_dmamap_unload(adapter->txtag, map);
1629					return (ENOBUFS);
1630				}
1631				tx_buffer = &adapter->tx_buffer_area[i];
1632				current_tx_desc = &adapter->tx_desc_base[i];
1633				current_tx_desc->buffer_addr = htole64(
1634					desc_array.descriptor[counter].address);
1635				current_tx_desc->lower.data = htole32(
1636					(adapter->txd_cmd | txd_lower |
1637					(uint16_t)desc_array.descriptor[counter].length));
1638				current_tx_desc->upper.data = htole32((txd_upper));
1639				if (++i == adapter->num_tx_desc)
1640					i = 0;
1641
1642				tx_buffer->m_head = NULL;
1643				txd_used++;
1644			}
1645		} else {
1646			tx_buffer = &adapter->tx_buffer_area[i];
1647			current_tx_desc = &adapter->tx_desc_base[i];
1648			seg_addr = htole64(segs[j].ds_addr);
1649			seg_len  = segs[j].ds_len;
1650			/*
1651			** TSO Workaround:
1652			** If this is the last descriptor, we want to
1653			** split it so we have a small final sentinel
1654			*/
1655			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1656				seg_len -= 4;
1657				current_tx_desc->buffer_addr = seg_addr;
1658				current_tx_desc->lower.data = htole32(
1659				adapter->txd_cmd | txd_lower | seg_len);
1660				current_tx_desc->upper.data =
1661				    htole32(txd_upper);
1662				if (++i == adapter->num_tx_desc)
1663					i = 0;
1664				/* Now make the sentinel */
1665				++txd_used; /* using an extra txd */
1666				current_tx_desc = &adapter->tx_desc_base[i];
1667				tx_buffer = &adapter->tx_buffer_area[i];
1668				current_tx_desc->buffer_addr =
1669				    seg_addr + seg_len;
1670				current_tx_desc->lower.data = htole32(
1671				adapter->txd_cmd | txd_lower | 4);
1672				current_tx_desc->upper.data =
1673				    htole32(txd_upper);
1674				if (++i == adapter->num_tx_desc)
1675					i = 0;
1676			} else {
1677				current_tx_desc->buffer_addr = seg_addr;
1678				current_tx_desc->lower.data = htole32(
1679				adapter->txd_cmd | txd_lower | seg_len);
1680				current_tx_desc->upper.data =
1681				    htole32(txd_upper);
1682				if (++i == adapter->num_tx_desc)
1683					i = 0;
1684			}
1685			tx_buffer->m_head = NULL;
1686		}
1687	}
1688
1689	adapter->next_avail_tx_desc = i;
1690	if (adapter->pcix_82544)
1691		adapter->num_tx_desc_avail -= txd_used;
1692	else {
1693		adapter->num_tx_desc_avail -= nsegs;
1694		if (tso_desc) /* TSO used an extra for sentinel */
1695			adapter->num_tx_desc_avail -= txd_used;
1696	}
1697
1698	if (m_head->m_flags & M_VLANTAG) {
1699		/* Set the vlan id. */
1700		current_tx_desc->upper.fields.special =
1701		    htole16(m_head->m_pkthdr.ether_vtag);
1702
1703		/* Tell hardware to add tag. */
1704		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1705	}
1706
1707	tx_buffer->m_head = m_head;
1708	tx_buffer_last->map = tx_buffer->map;
1709	tx_buffer->map = map;
1710	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1711
1712	/*
1713	 * Last Descriptor of Packet needs End Of Packet (EOP).
1714	 */
1715	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1716
1717	/*
1718	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1719	 * that this frame is available to transmit.
1720	 */
1721	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1722	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1723	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1724		em_82547_move_tail_locked(adapter);
1725	else {
1726		E1000_WRITE_REG(&adapter->hw, TDT, i);
1727		if (adapter->hw.mac_type == em_82547)
1728			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1729	}
1730
1731	return (0);
1732}
1733
1734/*********************************************************************
1735 *
1736 * 82547 workaround to avoid controller hang in half-duplex environment.
1737 * The workaround is to avoid queuing a large packet that would span
1738 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1739 * in this case. We do that only when FIFO is quiescent.
1740 *
1741 **********************************************************************/
1742static void
1743em_82547_move_tail_locked(struct adapter *adapter)
1744{
1745	uint16_t hw_tdt;
1746	uint16_t sw_tdt;
1747	struct em_tx_desc *tx_desc;
1748	uint16_t length = 0;
1749	boolean_t eop = 0;
1750
1751	EM_LOCK_ASSERT(adapter);
1752
1753	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1754	sw_tdt = adapter->next_avail_tx_desc;
1755
1756	while (hw_tdt != sw_tdt) {
1757		tx_desc = &adapter->tx_desc_base[hw_tdt];
1758		length += tx_desc->lower.flags.length;
1759		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1760		if(++hw_tdt == adapter->num_tx_desc)
1761			hw_tdt = 0;
1762
1763		if (eop) {
1764			if (em_82547_fifo_workaround(adapter, length)) {
1765				adapter->tx_fifo_wrk_cnt++;
1766				callout_reset(&adapter->tx_fifo_timer, 1,
1767					em_82547_move_tail, adapter);
1768				break;
1769			}
1770			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1771			em_82547_update_fifo_head(adapter, length);
1772			length = 0;
1773		}
1774	}
1775}
1776
1777static void
1778em_82547_move_tail(void *arg)
1779{
1780	struct adapter *adapter = arg;
1781
1782	EM_LOCK(adapter);
1783	em_82547_move_tail_locked(adapter);
1784	EM_UNLOCK(adapter);
1785}
1786
1787static int
1788em_82547_fifo_workaround(struct adapter *adapter, int len)
1789{
1790	int fifo_space, fifo_pkt_len;
1791
1792	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1793
1794	if (adapter->link_duplex == HALF_DUPLEX) {
1795		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1796
1797		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1798			if (em_82547_tx_fifo_reset(adapter))
1799				return (0);
1800			else
1801				return (1);
1802		}
1803	}
1804
1805	return (0);
1806}
1807
1808static void
1809em_82547_update_fifo_head(struct adapter *adapter, int len)
1810{
1811	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1812
1813	/* tx_fifo_head is always 16 byte aligned */
1814	adapter->tx_fifo_head += fifo_pkt_len;
1815	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1816		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1817	}
1818}
1819
1820
1821static int
1822em_82547_tx_fifo_reset(struct adapter *adapter)
1823{
1824	uint32_t tctl;
1825
1826	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1827	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1828	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1829	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1830
1831		/* Disable TX unit */
1832		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1833		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1834
1835		/* Reset FIFO pointers */
1836		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1837		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1838		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1839		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1840
1841		/* Re-enable TX unit */
1842		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1843		E1000_WRITE_FLUSH(&adapter->hw);
1844
1845		adapter->tx_fifo_head = 0;
1846		adapter->tx_fifo_reset_cnt++;
1847
1848		return (TRUE);
1849	}
1850	else {
1851		return (FALSE);
1852	}
1853}
1854
1855static void
1856em_set_promisc(struct adapter *adapter)
1857{
1858	struct ifnet	*ifp = adapter->ifp;
1859	uint32_t	reg_rctl;
1860
1861	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1862
1863	if (ifp->if_flags & IFF_PROMISC) {
1864		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1865		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1866		/* Disable VLAN stripping in promiscous mode
1867		 * This enables bridging of vlan tagged frames to occur
1868		 * and also allows vlan tags to be seen in tcpdump
1869		 */
1870		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1871			em_disable_vlans(adapter);
1872		adapter->em_insert_vlan_header = 1;
1873	} else if (ifp->if_flags & IFF_ALLMULTI) {
1874		reg_rctl |= E1000_RCTL_MPE;
1875		reg_rctl &= ~E1000_RCTL_UPE;
1876		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1877		adapter->em_insert_vlan_header = 0;
1878	} else
1879		adapter->em_insert_vlan_header = 0;
1880}
1881
1882static void
1883em_disable_promisc(struct adapter *adapter)
1884{
1885	struct ifnet	*ifp = adapter->ifp;
1886	uint32_t	reg_rctl;
1887
1888	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1889
1890	reg_rctl &=  (~E1000_RCTL_UPE);
1891	reg_rctl &=  (~E1000_RCTL_MPE);
1892	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1893
1894	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1895		em_enable_vlans(adapter);
1896	adapter->em_insert_vlan_header = 0;
1897}
1898
1899
1900/*********************************************************************
1901 *  Multicast Update
1902 *
1903 *  This routine is called whenever multicast address list is updated.
1904 *
1905 **********************************************************************/
1906
1907static void
1908em_set_multi(struct adapter *adapter)
1909{
1910	struct ifnet	*ifp = adapter->ifp;
1911	struct ifmultiaddr *ifma;
1912	uint32_t reg_rctl = 0;
1913	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1914	int mcnt = 0;
1915
1916	IOCTL_DEBUGOUT("em_set_multi: begin");
1917
1918	if (adapter->hw.mac_type == em_82542_rev2_0) {
1919		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1920		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1921			em_pci_clear_mwi(&adapter->hw);
1922		reg_rctl |= E1000_RCTL_RST;
1923		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1924		msec_delay(5);
1925	}
1926
1927	IF_ADDR_LOCK(ifp);
1928	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1929		if (ifma->ifma_addr->sa_family != AF_LINK)
1930			continue;
1931
1932		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1933			break;
1934
1935		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1936		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1937		mcnt++;
1938	}
1939	IF_ADDR_UNLOCK(ifp);
1940
1941	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1942		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1943		reg_rctl |= E1000_RCTL_MPE;
1944		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1945	} else
1946		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1947
1948	if (adapter->hw.mac_type == em_82542_rev2_0) {
1949		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1950		reg_rctl &= ~E1000_RCTL_RST;
1951		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1952		msec_delay(5);
1953		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1954			em_pci_set_mwi(&adapter->hw);
1955	}
1956}
1957
1958
1959/*********************************************************************
1960 *  Timer routine
1961 *
1962 *  This routine checks for link status and updates statistics.
1963 *
1964 **********************************************************************/
1965
1966static void
1967em_local_timer(void *arg)
1968{
1969	struct adapter	*adapter = arg;
1970	struct ifnet	*ifp = adapter->ifp;
1971
1972	EM_LOCK(adapter);
1973
1974	em_check_for_link(&adapter->hw);
1975	em_update_link_status(adapter);
1976	em_update_stats_counters(adapter);
1977	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1978		em_print_hw_stats(adapter);
1979	em_smartspeed(adapter);
1980
1981	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1982
1983	EM_UNLOCK(adapter);
1984}
1985
1986static void
1987em_update_link_status(struct adapter *adapter)
1988{
1989	struct ifnet *ifp = adapter->ifp;
1990	device_t dev = adapter->dev;
1991
1992	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1993		if (adapter->link_active == 0) {
1994			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1995			    &adapter->link_duplex);
1996			/* Check if we may set SPEED_MODE bit on PCI-E */
1997			if ((adapter->link_speed == SPEED_1000) &&
1998			    ((adapter->hw.mac_type == em_82571) ||
1999			    (adapter->hw.mac_type == em_82572))) {
2000				int tarc0;
2001
2002				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
2003				tarc0 |= SPEED_MODE_BIT;
2004				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
2005			}
2006			if (bootverbose)
2007				device_printf(dev, "Link is up %d Mbps %s\n",
2008				    adapter->link_speed,
2009				    ((adapter->link_duplex == FULL_DUPLEX) ?
2010				    "Full Duplex" : "Half Duplex"));
2011			adapter->link_active = 1;
2012			adapter->smartspeed = 0;
2013			ifp->if_baudrate = adapter->link_speed * 1000000;
2014			if_link_state_change(ifp, LINK_STATE_UP);
2015		}
2016	} else {
2017		if (adapter->link_active == 1) {
2018			ifp->if_baudrate = adapter->link_speed = 0;
2019			adapter->link_duplex = 0;
2020			if (bootverbose)
2021				device_printf(dev, "Link is Down\n");
2022			adapter->link_active = 0;
2023			if_link_state_change(ifp, LINK_STATE_DOWN);
2024		}
2025	}
2026}
2027
2028/*********************************************************************
2029 *
2030 *  This routine disables all traffic on the adapter by issuing a
2031 *  global reset on the MAC and deallocates TX/RX buffers.
2032 *
2033 **********************************************************************/
2034
2035static void
2036em_stop(void *arg)
2037{
2038	struct adapter	*adapter = arg;
2039	struct ifnet	*ifp = adapter->ifp;
2040
2041	EM_LOCK_ASSERT(adapter);
2042
2043	INIT_DEBUGOUT("em_stop: begin");
2044
2045	em_disable_intr(adapter);
2046	em_reset_hw(&adapter->hw);
2047	callout_stop(&adapter->timer);
2048	callout_stop(&adapter->tx_fifo_timer);
2049	em_free_transmit_structures(adapter);
2050	em_free_receive_structures(adapter);
2051
2052	/* Tell the stack that the interface is no longer active */
2053	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2054}
2055
2056
2057/********************************************************************
2058 *
2059 *  Determine hardware revision.
2060 *
2061 **********************************************************************/
2062static void
2063em_identify_hardware(struct adapter *adapter)
2064{
2065	device_t dev = adapter->dev;
2066
2067	/* Make sure our PCI config space has the necessary stuff set */
2068	pci_enable_busmaster(dev);
2069	pci_enable_io(dev, SYS_RES_MEMORY);
2070	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2071
2072	/* Save off the information about this board */
2073	adapter->hw.vendor_id = pci_get_vendor(dev);
2074	adapter->hw.device_id = pci_get_device(dev);
2075	adapter->hw.revision_id = pci_get_revid(dev);
2076	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2077	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2078
2079	/* Identify the MAC */
2080	if (em_set_mac_type(&adapter->hw))
2081		device_printf(dev, "Unknown MAC Type\n");
2082
2083	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2084	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2085		adapter->hw.phy_init_script = TRUE;
2086}
2087
2088static int
2089em_allocate_pci_resources(struct adapter *adapter)
2090{
2091	device_t	dev = adapter->dev;
2092	int		val, rid;
2093
2094	rid = PCIR_BAR(0);
2095	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2096	    &rid, RF_ACTIVE);
2097	if (adapter->res_memory == NULL) {
2098		device_printf(dev, "Unable to allocate bus resource: memory\n");
2099		return (ENXIO);
2100	}
2101	adapter->osdep.mem_bus_space_tag =
2102	rman_get_bustag(adapter->res_memory);
2103	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2104	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2105
2106	if (adapter->hw.mac_type > em_82543) {
2107		/* Figure our where our IO BAR is ? */
2108		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2109			val = pci_read_config(dev, rid, 4);
2110			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2111				adapter->io_rid = rid;
2112				break;
2113			}
2114			rid += 4;
2115			/* check for 64bit BAR */
2116			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2117				rid += 4;
2118		}
2119		if (rid >= PCIR_CIS) {
2120			device_printf(dev, "Unable to locate IO BAR\n");
2121			return (ENXIO);
2122		}
2123		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2124		    &adapter->io_rid, RF_ACTIVE);
2125		if (adapter->res_ioport == NULL) {
2126			device_printf(dev, "Unable to allocate bus resource: "
2127			    "ioport\n");
2128			return (ENXIO);
2129		}
2130		adapter->hw.io_base = 0;
2131		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2132		adapter->osdep.io_bus_space_handle =
2133		    rman_get_bushandle(adapter->res_ioport);
2134	}
2135
2136	/* For ICH8 we need to find the flash memory. */
2137	if (adapter->hw.mac_type == em_ich8lan) {
2138		rid = EM_FLASH;
2139
2140		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2141		    &rid, RF_ACTIVE);
2142		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2143		adapter->osdep.flash_bus_space_handle =
2144		    rman_get_bushandle(adapter->flash_mem);
2145	}
2146
2147	rid = 0x0;
2148	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2149	    RF_SHAREABLE | RF_ACTIVE);
2150	if (adapter->res_interrupt == NULL) {
2151		device_printf(dev, "Unable to allocate bus resource: "
2152		    "interrupt\n");
2153		return (ENXIO);
2154	}
2155
2156	adapter->hw.back = &adapter->osdep;
2157
2158	return (0);
2159}
2160
2161int
2162em_allocate_intr(struct adapter *adapter)
2163{
2164	device_t dev = adapter->dev;
2165	int error;
2166
2167	/* Manually turn off all interrupts */
2168	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2169
2170#ifdef DEVICE_POLLING
2171	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2172	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2173	    &adapter->int_handler_tag)) != 0) {
2174		device_printf(dev, "Failed to register interrupt handler");
2175		return (error);
2176	}
2177#else
2178	/*
2179	 * Try allocating a fast interrupt and the associated deferred
2180	 * processing contexts.
2181	 */
2182	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2183	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2184	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2185	    taskqueue_thread_enqueue, &adapter->tq);
2186	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2187	    device_get_nameunit(adapter->dev));
2188	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2189	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2190	    &adapter->int_handler_tag)) != 0) {
2191		device_printf(dev, "Failed to register fast interrupt "
2192			    "handler: %d\n", error);
2193		taskqueue_free(adapter->tq);
2194		adapter->tq = NULL;
2195		return (error);
2196	}
2197#endif
2198
2199	em_enable_intr(adapter);
2200	return (0);
2201}
2202
2203static void
2204em_free_intr(struct adapter *adapter)
2205{
2206	device_t dev = adapter->dev;
2207
2208	if (adapter->int_handler_tag != NULL) {
2209		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2210		adapter->int_handler_tag = NULL;
2211	}
2212	if (adapter->tq != NULL) {
2213		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2214		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2215		taskqueue_free(adapter->tq);
2216		adapter->tq = NULL;
2217	}
2218}
2219
2220static void
2221em_free_pci_resources(struct adapter *adapter)
2222{
2223	device_t dev = adapter->dev;
2224
2225	if (adapter->res_interrupt != NULL)
2226		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2227
2228	if (adapter->res_memory != NULL)
2229		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2230		    adapter->res_memory);
2231
2232	if (adapter->flash_mem != NULL)
2233		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2234		    adapter->flash_mem);
2235
2236	if (adapter->res_ioport != NULL)
2237		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2238		    adapter->res_ioport);
2239}
2240
2241/*********************************************************************
2242 *
2243 *  Initialize the hardware to a configuration as specified by the
2244 *  adapter structure. The controller is reset, the EEPROM is
2245 *  verified, the MAC address is set, then the shared initialization
2246 *  routines are called.
2247 *
2248 **********************************************************************/
2249static int
2250em_hardware_init(struct adapter *adapter)
2251{
2252	device_t dev = adapter->dev;
2253	uint16_t rx_buffer_size;
2254
2255	INIT_DEBUGOUT("em_hardware_init: begin");
2256	/* Issue a global reset */
2257	em_reset_hw(&adapter->hw);
2258
2259	/* When hardware is reset, fifo_head is also reset */
2260	adapter->tx_fifo_head = 0;
2261
2262	/* Make sure we have a good EEPROM before we read from it */
2263	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2264		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2265		return (EIO);
2266	}
2267
2268	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2269		device_printf(dev, "EEPROM read error while reading part "
2270		    "number\n");
2271		return (EIO);
2272	}
2273
2274	/* Set up smart power down as default off on newer adapters. */
2275	if (!em_smart_pwr_down &&
2276	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2277		uint16_t phy_tmp = 0;
2278
2279		/* Speed up time to link by disabling smart power down. */
2280		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2281		phy_tmp &= ~IGP02E1000_PM_SPD;
2282		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2283	}
2284
2285	/*
2286	 * These parameters control the automatic generation (Tx) and
2287	 * response (Rx) to Ethernet PAUSE frames.
2288	 * - High water mark should allow for at least two frames to be
2289	 *   received after sending an XOFF.
2290	 * - Low water mark works best when it is very near the high water mark.
2291	 *   This allows the receiver to restart by sending XON when it has
2292	 *   drained a bit. Here we use an arbitary value of 1500 which will
2293	 *   restart after one full frame is pulled from the buffer. There
2294	 *   could be several smaller frames in the buffer and if so they will
2295	 *   not trigger the XON until their total number reduces the buffer
2296	 *   by 1500.
2297	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2298	 */
2299	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2300
2301	adapter->hw.fc_high_water = rx_buffer_size -
2302	    roundup2(adapter->hw.max_frame_size, 1024);
2303	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2304	if (adapter->hw.mac_type == em_80003es2lan)
2305		adapter->hw.fc_pause_time = 0xFFFF;
2306	else
2307		adapter->hw.fc_pause_time = 0x1000;
2308	adapter->hw.fc_send_xon = TRUE;
2309	adapter->hw.fc = em_fc_full;
2310
2311	if (em_init_hw(&adapter->hw) < 0) {
2312		device_printf(dev, "Hardware Initialization Failed");
2313		return (EIO);
2314	}
2315
2316	em_check_for_link(&adapter->hw);
2317
2318	return (0);
2319}
2320
2321/*********************************************************************
2322 *
2323 *  Setup networking device structure and register an interface.
2324 *
2325 **********************************************************************/
2326static void
2327em_setup_interface(device_t dev, struct adapter *adapter)
2328{
2329	struct ifnet   *ifp;
2330	INIT_DEBUGOUT("em_setup_interface: begin");
2331
2332	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2333	if (ifp == NULL)
2334		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2335	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2336	ifp->if_mtu = ETHERMTU;
2337	ifp->if_init =  em_init;
2338	ifp->if_softc = adapter;
2339	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2340	ifp->if_ioctl = em_ioctl;
2341	ifp->if_start = em_start;
2342	ifp->if_watchdog = em_watchdog;
2343	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2344	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2345	IFQ_SET_READY(&ifp->if_snd);
2346
2347	ether_ifattach(ifp, adapter->hw.mac_addr);
2348
2349	ifp->if_capabilities = ifp->if_capenable = 0;
2350
2351	if (adapter->hw.mac_type >= em_82543) {
2352		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2353		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2354	}
2355
2356	/* Enable TSO if available */
2357	if ((adapter->hw.mac_type > em_82544) &&
2358	    (adapter->hw.mac_type != em_82547)) {
2359		ifp->if_capabilities |= IFCAP_TSO4;
2360		ifp->if_capenable |= IFCAP_TSO4;
2361	}
2362
2363	/*
2364	 * Tell the upper layer(s) we support long frames.
2365	 */
2366	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2367	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2368	ifp->if_capenable |= IFCAP_VLAN_MTU;
2369
2370#ifdef DEVICE_POLLING
2371	ifp->if_capabilities |= IFCAP_POLLING;
2372#endif
2373
2374	/*
2375	 * Specify the media types supported by this adapter and register
2376	 * callbacks to update media and link information
2377	 */
2378	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2379	    em_media_status);
2380	if ((adapter->hw.media_type == em_media_type_fiber) ||
2381	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2382		u_char fiber_type = IFM_1000_SX;	/* default type; */
2383
2384		if (adapter->hw.mac_type == em_82545)
2385			fiber_type = IFM_1000_LX;
2386		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2387		    0, NULL);
2388		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2389	} else {
2390		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2391		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2392			    0, NULL);
2393		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2394			    0, NULL);
2395		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2396			    0, NULL);
2397		if (adapter->hw.phy_type != em_phy_ife) {
2398			ifmedia_add(&adapter->media,
2399				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2400			ifmedia_add(&adapter->media,
2401				IFM_ETHER | IFM_1000_T, 0, NULL);
2402		}
2403	}
2404	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2405	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2406}
2407
2408
2409/*********************************************************************
2410 *
2411 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2412 *
2413 **********************************************************************/
2414static void
2415em_smartspeed(struct adapter *adapter)
2416{
2417	uint16_t phy_tmp;
2418
2419	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2420	    adapter->hw.autoneg == 0 ||
2421	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2422		return;
2423
2424	if (adapter->smartspeed == 0) {
2425		/* If Master/Slave config fault is asserted twice,
2426		 * we assume back-to-back */
2427		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2428		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2429			return;
2430		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2431		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2432			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2433			if(phy_tmp & CR_1000T_MS_ENABLE) {
2434				phy_tmp &= ~CR_1000T_MS_ENABLE;
2435				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2436				    phy_tmp);
2437				adapter->smartspeed++;
2438				if(adapter->hw.autoneg &&
2439				   !em_phy_setup_autoneg(&adapter->hw) &&
2440				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2441				    &phy_tmp)) {
2442					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2443						    MII_CR_RESTART_AUTO_NEG);
2444					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2445					    phy_tmp);
2446				}
2447			}
2448		}
2449		return;
2450	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2451		/* If still no link, perhaps using 2/3 pair cable */
2452		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2453		phy_tmp |= CR_1000T_MS_ENABLE;
2454		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2455		if(adapter->hw.autoneg &&
2456		   !em_phy_setup_autoneg(&adapter->hw) &&
2457		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2458			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2459				    MII_CR_RESTART_AUTO_NEG);
2460			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2461		}
2462	}
2463	/* Restart process after EM_SMARTSPEED_MAX iterations */
2464	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2465		adapter->smartspeed = 0;
2466}
2467
2468
2469/*
2470 * Manage DMA'able memory.
2471 */
2472static void
2473em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2474{
2475	if (error)
2476		return;
2477	*(bus_addr_t *) arg = segs[0].ds_addr;
2478}
2479
2480static int
2481em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2482	int mapflags)
2483{
2484	int error;
2485
2486	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2487				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2488				BUS_SPACE_MAXADDR,	/* lowaddr */
2489				BUS_SPACE_MAXADDR,	/* highaddr */
2490				NULL, NULL,		/* filter, filterarg */
2491				size,			/* maxsize */
2492				1,			/* nsegments */
2493				size,			/* maxsegsize */
2494				0,			/* flags */
2495				NULL,			/* lockfunc */
2496				NULL,			/* lockarg */
2497				&dma->dma_tag);
2498	if (error) {
2499		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2500		    __func__, error);
2501		goto fail_0;
2502	}
2503
2504	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2505	    BUS_DMA_NOWAIT, &dma->dma_map);
2506	if (error) {
2507		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2508		    __func__, (uintmax_t)size, error);
2509		goto fail_2;
2510	}
2511
2512	dma->dma_paddr = 0;
2513	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2514	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2515	if (error || dma->dma_paddr == 0) {
2516		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2517		    __func__, error);
2518		goto fail_3;
2519	}
2520
2521	return (0);
2522
2523fail_3:
2524	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2525fail_2:
2526	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2527	bus_dma_tag_destroy(dma->dma_tag);
2528fail_0:
2529	dma->dma_map = NULL;
2530	dma->dma_tag = NULL;
2531
2532	return (error);
2533}
2534
2535static void
2536em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2537{
2538	if (dma->dma_tag == NULL)
2539		return;
2540	if (dma->dma_map != NULL) {
2541		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2542		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2543		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2544		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2545		dma->dma_map = NULL;
2546	}
2547	bus_dma_tag_destroy(dma->dma_tag);
2548	dma->dma_tag = NULL;
2549}
2550
2551
2552/*********************************************************************
2553 *
2554 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2555 *  the information needed to transmit a packet on the wire.
2556 *
2557 **********************************************************************/
2558static int
2559em_allocate_transmit_structures(struct adapter *adapter)
2560{
2561	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2562	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2563	if (adapter->tx_buffer_area == NULL) {
2564		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2565		return (ENOMEM);
2566	}
2567
2568	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2569
2570	return (0);
2571}
2572
2573/*********************************************************************
2574 *
2575 *  Allocate and initialize transmit structures.
2576 *
2577 **********************************************************************/
2578static int
2579em_setup_transmit_structures(struct adapter *adapter)
2580{
2581	struct ifnet   *ifp = adapter->ifp;
2582	device_t dev = adapter->dev;
2583	struct em_buffer *tx_buffer;
2584	bus_size_t size, segsize;
2585	int error, i;
2586
2587	/*
2588	 * Setup DMA descriptor areas.
2589	 */
2590	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2591
2592	/* Overrides for TSO - want large sizes */
2593	if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2594		size = EM_TSO_SIZE;
2595		segsize = PAGE_SIZE;
2596	}
2597
2598	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2599				1, 0,			/* alignment, bounds */
2600				BUS_SPACE_MAXADDR,	/* lowaddr */
2601				BUS_SPACE_MAXADDR,	/* highaddr */
2602				NULL, NULL,		/* filter, filterarg */
2603				size,			/* maxsize */
2604				EM_MAX_SCATTER,		/* nsegments */
2605				segsize,		/* maxsegsize */
2606				0,			/* flags */
2607				NULL,		/* lockfunc */
2608				NULL,		/* lockarg */
2609				&adapter->txtag)) != 0) {
2610		device_printf(dev, "Unable to allocate TX DMA tag\n");
2611		goto fail;
2612	}
2613
2614	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2615		goto fail;
2616
2617	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2618	tx_buffer = adapter->tx_buffer_area;
2619	for (i = 0; i < adapter->num_tx_desc; i++) {
2620		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2621		if (error != 0) {
2622			device_printf(dev, "Unable to create TX DMA map\n");
2623			goto fail;
2624		}
2625		tx_buffer++;
2626	}
2627
2628	adapter->next_avail_tx_desc = 0;
2629	adapter->oldest_used_tx_desc = 0;
2630
2631	/* Set number of descriptors available */
2632	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2633
2634	/* Set checksum context */
2635	adapter->active_checksum_context = OFFLOAD_NONE;
2636	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2637	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2638
2639	return (0);
2640
2641fail:
2642	em_free_transmit_structures(adapter);
2643	return (error);
2644}
2645
2646/*********************************************************************
2647 *
2648 *  Enable transmit unit.
2649 *
2650 **********************************************************************/
2651static void
2652em_initialize_transmit_unit(struct adapter *adapter)
2653{
2654	uint32_t	reg_tctl, reg_tarc;
2655	uint32_t	reg_tipg = 0;
2656	uint64_t	bus_addr;
2657
2658	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2659	/* Setup the Base and Length of the Tx Descriptor Ring */
2660	bus_addr = adapter->txdma.dma_paddr;
2661	E1000_WRITE_REG(&adapter->hw, TDLEN,
2662	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2663	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2664	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2665
2666	/* Setup the HW Tx Head and Tail descriptor pointers */
2667	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2668	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2669
2670
2671	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2672	    E1000_READ_REG(&adapter->hw, TDLEN));
2673
2674	/* Set the default values for the Tx Inter Packet Gap timer */
2675	switch (adapter->hw.mac_type) {
2676	case em_82542_rev2_0:
2677	case em_82542_rev2_1:
2678		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2679		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2680		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2681		break;
2682	case em_80003es2lan:
2683		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2684		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2685		    E1000_TIPG_IPGR2_SHIFT;
2686		break;
2687	default:
2688		if ((adapter->hw.media_type == em_media_type_fiber) ||
2689		    (adapter->hw.media_type == em_media_type_internal_serdes))
2690			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2691		else
2692			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2693		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2694		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2695	}
2696
2697	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2698	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2699	if(adapter->hw.mac_type >= em_82540)
2700		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2701
2702	/* Do adapter specific tweaks before we enable the transmitter. */
2703	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2704		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2705		reg_tarc |= (1 << 25);
2706		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2707		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2708		reg_tarc |= (1 << 25);
2709		reg_tarc &= ~(1 << 28);
2710		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2711	} else if (adapter->hw.mac_type == em_80003es2lan) {
2712		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2713		reg_tarc |= 1;
2714		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2715		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2716		reg_tarc |= 1;
2717		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2718	}
2719
2720	/* Program the Transmit Control Register */
2721	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2722		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2723	if (adapter->hw.mac_type >= em_82571)
2724		reg_tctl |= E1000_TCTL_MULR;
2725	if (adapter->link_duplex == FULL_DUPLEX) {
2726		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2727	} else {
2728		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2729	}
2730	/* This write will effectively turn on the transmit unit. */
2731	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2732
2733	/* Setup Transmit Descriptor Settings for this adapter */
2734	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2735
2736	if (adapter->tx_int_delay.value > 0)
2737		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2738}
2739
2740/*********************************************************************
2741 *
2742 *  Free all transmit related data structures.
2743 *
2744 **********************************************************************/
2745static void
2746em_free_transmit_structures(struct adapter *adapter)
2747{
2748	struct em_buffer *tx_buffer;
2749	int i;
2750
2751	INIT_DEBUGOUT("free_transmit_structures: begin");
2752
2753	if (adapter->tx_buffer_area != NULL) {
2754		tx_buffer = adapter->tx_buffer_area;
2755		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2756			if (tx_buffer->m_head != NULL) {
2757				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2758				    BUS_DMASYNC_POSTWRITE);
2759				bus_dmamap_unload(adapter->txtag,
2760				    tx_buffer->map);
2761				m_freem(tx_buffer->m_head);
2762				tx_buffer->m_head = NULL;
2763			} else if (tx_buffer->map != NULL)
2764				bus_dmamap_unload(adapter->txtag,
2765				    tx_buffer->map);
2766			if (tx_buffer->map != NULL) {
2767				bus_dmamap_destroy(adapter->txtag,
2768				    tx_buffer->map);
2769				tx_buffer->map = NULL;
2770			}
2771		}
2772	}
2773	if (adapter->tx_buffer_area != NULL) {
2774		free(adapter->tx_buffer_area, M_DEVBUF);
2775		adapter->tx_buffer_area = NULL;
2776	}
2777	if (adapter->txtag != NULL) {
2778		bus_dma_tag_destroy(adapter->txtag);
2779		adapter->txtag = NULL;
2780	}
2781}
2782
2783/*********************************************************************
2784 *
2785 *  The offload context needs to be set when we transfer the first
2786 *  packet of a particular protocol (TCP/UDP). We change the
2787 *  context only if the protocol type changes.
2788 *
2789 **********************************************************************/
2790static void
2791em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2792    uint32_t *txd_upper, uint32_t *txd_lower)
2793{
2794	struct em_context_desc *TXD;
2795	struct em_buffer *tx_buffer;
2796	int curr_txd;
2797
2798	if (mp->m_pkthdr.csum_flags) {
2799
2800		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2801			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2802			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2803			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2804				return;
2805			else
2806				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2807
2808		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2809			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2810			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2811			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2812				return;
2813			else
2814				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2815		} else {
2816			*txd_upper = 0;
2817			*txd_lower = 0;
2818			return;
2819		}
2820	} else {
2821		*txd_upper = 0;
2822		*txd_lower = 0;
2823		return;
2824	}
2825
2826	/* If we reach this point, the checksum offload context
2827	 * needs to be reset.
2828	 */
2829	curr_txd = adapter->next_avail_tx_desc;
2830	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2831	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2832
2833	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2834	TXD->lower_setup.ip_fields.ipcso =
2835		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2836	TXD->lower_setup.ip_fields.ipcse =
2837		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2838
2839	TXD->upper_setup.tcp_fields.tucss =
2840		ETHER_HDR_LEN + sizeof(struct ip);
2841	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2842
2843	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2844		TXD->upper_setup.tcp_fields.tucso =
2845			ETHER_HDR_LEN + sizeof(struct ip) +
2846			offsetof(struct tcphdr, th_sum);
2847	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2848		TXD->upper_setup.tcp_fields.tucso =
2849			ETHER_HDR_LEN + sizeof(struct ip) +
2850			offsetof(struct udphdr, uh_sum);
2851	}
2852
2853	TXD->tcp_seg_setup.data = htole32(0);
2854	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2855
2856	tx_buffer->m_head = NULL;
2857
2858	if (++curr_txd == adapter->num_tx_desc)
2859		curr_txd = 0;
2860
2861	adapter->num_tx_desc_avail--;
2862	adapter->next_avail_tx_desc = curr_txd;
2863}
2864
2865/**********************************************************************
2866 *
2867 *  Setup work for hardware segmentation offload (TSO)
2868 *
2869 **********************************************************************/
2870static boolean_t
2871em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
2872   uint32_t *txd_lower)
2873{
2874	struct em_context_desc *TXD;
2875	struct em_buffer *tx_buffer;
2876	struct ip *ip;
2877	struct tcphdr *th;
2878	int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2879
2880	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2881	    (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2882		return FALSE;
2883	}
2884
2885	*txd_lower = (E1000_TXD_CMD_DEXT |
2886		      E1000_TXD_DTYP_D |
2887		      E1000_TXD_CMD_TSE);
2888
2889	*txd_upper = (E1000_TXD_POPTS_IXSM |
2890		      E1000_TXD_POPTS_TXSM) << 8;
2891
2892	curr_txd = adapter->next_avail_tx_desc;
2893	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2894	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2895
2896	mp->m_data += sizeof(struct ether_header);
2897	ip = mtod(mp, struct ip *);
2898	ip->ip_len = 0;
2899	ip->ip_sum = 0;
2900	ip_hlen = ip->ip_hl << 2 ;
2901	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2902	tcp_hlen = th->th_off << 2;
2903
2904	hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2905	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2906	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2907
2908	mp->m_data -= sizeof(struct ether_header);
2909	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2910	TXD->lower_setup.ip_fields.ipcso =
2911		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2912	TXD->lower_setup.ip_fields.ipcse =
2913		htole16(ETHER_HDR_LEN + ip_hlen - 1);
2914
2915	TXD->upper_setup.tcp_fields.tucss =
2916		ETHER_HDR_LEN + ip_hlen;
2917	TXD->upper_setup.tcp_fields.tucse = 0;
2918	TXD->upper_setup.tcp_fields.tucso =
2919		ETHER_HDR_LEN + ip_hlen +
2920		offsetof(struct tcphdr, th_sum);
2921	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2922	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2923	TXD->cmd_and_length = htole32(adapter->txd_cmd |
2924				E1000_TXD_CMD_DEXT |
2925				E1000_TXD_CMD_TSE |
2926				E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2927				(mp->m_pkthdr.len - (hdr_len)));
2928
2929	tx_buffer->m_head = NULL;
2930
2931	if (++curr_txd == adapter->num_tx_desc)
2932		curr_txd = 0;
2933
2934	adapter->num_tx_desc_avail--;
2935	adapter->next_avail_tx_desc = curr_txd;
2936	adapter->tx_tso = TRUE;
2937
2938	return TRUE;
2939}
2940
2941/**********************************************************************
2942 *
2943 *  Examine each tx_buffer in the used queue. If the hardware is done
2944 *  processing the packet then free associated resources. The
2945 *  tx_buffer is put back on the free queue.
2946 *
2947 **********************************************************************/
2948static void
2949em_txeof(struct adapter *adapter)
2950{
2951	int i, num_avail;
2952	struct em_buffer *tx_buffer;
2953	struct em_tx_desc   *tx_desc;
2954	struct ifnet   *ifp = adapter->ifp;
2955
2956	EM_LOCK_ASSERT(adapter);
2957
2958	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2959		return;
2960
2961	num_avail = adapter->num_tx_desc_avail;
2962	i = adapter->oldest_used_tx_desc;
2963
2964	tx_buffer = &adapter->tx_buffer_area[i];
2965	tx_desc = &adapter->tx_desc_base[i];
2966
2967	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2968	    BUS_DMASYNC_POSTREAD);
2969	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2970
2971		tx_desc->upper.data = 0;
2972		num_avail++;
2973
2974		if (tx_buffer->m_head) {
2975			ifp->if_opackets++;
2976			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2977			    BUS_DMASYNC_POSTWRITE);
2978			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2979
2980			m_freem(tx_buffer->m_head);
2981			tx_buffer->m_head = NULL;
2982		}
2983
2984		if (++i == adapter->num_tx_desc)
2985			i = 0;
2986
2987		tx_buffer = &adapter->tx_buffer_area[i];
2988		tx_desc = &adapter->tx_desc_base[i];
2989	}
2990	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2991	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2992
2993	adapter->oldest_used_tx_desc = i;
2994
2995	/*
2996	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2997	 * that it is OK to send packets.
2998	 * If there are no pending descriptors, clear the timeout. Otherwise,
2999	 * if some descriptors have been freed, restart the timeout.
3000	 */
3001	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3002		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3003		if (num_avail == adapter->num_tx_desc)
3004			ifp->if_timer = 0;
3005		else if (num_avail != adapter->num_tx_desc_avail)
3006			ifp->if_timer = EM_TX_TIMEOUT;
3007	}
3008	adapter->num_tx_desc_avail = num_avail;
3009}
3010
3011/*********************************************************************
3012 *
3013 *  Get a buffer from system mbuf buffer pool.
3014 *
3015 **********************************************************************/
3016static int
3017em_get_buf(struct adapter *adapter, int i)
3018{
3019	struct mbuf		*m;
3020	bus_dma_segment_t	segs[1];
3021	bus_dmamap_t		map;
3022	struct em_buffer	*rx_buffer;
3023	int			error, nsegs;
3024
3025	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3026	if (m == NULL) {
3027		adapter->mbuf_cluster_failed++;
3028		return (ENOBUFS);
3029	}
3030	m->m_len = m->m_pkthdr.len = MCLBYTES;
3031	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3032		m_adj(m, ETHER_ALIGN);
3033
3034	/*
3035	 * Using memory from the mbuf cluster pool, invoke the
3036	 * bus_dma machinery to arrange the memory mapping.
3037	 */
3038	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3039	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3040	if (error != 0) {
3041		m_free(m);
3042		return (error);
3043	}
3044	/* If nsegs is wrong then the stack is corrupt. */
3045	KASSERT(nsegs == 1, ("Too many segments returned!"));
3046
3047	rx_buffer = &adapter->rx_buffer_area[i];
3048	if (rx_buffer->m_head != NULL)
3049		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3050
3051	map = rx_buffer->map;
3052	rx_buffer->map = adapter->rx_sparemap;
3053	adapter->rx_sparemap = map;
3054	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3055	rx_buffer->m_head = m;
3056
3057	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3058
3059	return (0);
3060}
3061
3062/*********************************************************************
3063 *
3064 *  Allocate memory for rx_buffer structures. Since we use one
3065 *  rx_buffer per received packet, the maximum number of rx_buffer's
3066 *  that we'll need is equal to the number of receive descriptors
3067 *  that we've allocated.
3068 *
3069 **********************************************************************/
3070static int
3071em_allocate_receive_structures(struct adapter *adapter)
3072{
3073	device_t dev = adapter->dev;
3074	struct em_buffer *rx_buffer;
3075	int i, error;
3076
3077	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3078	    M_DEVBUF, M_NOWAIT);
3079	if (adapter->rx_buffer_area == NULL) {
3080		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3081		return (ENOMEM);
3082	}
3083
3084	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3085
3086	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3087				1, 0,			/* alignment, bounds */
3088				BUS_SPACE_MAXADDR,	/* lowaddr */
3089				BUS_SPACE_MAXADDR,	/* highaddr */
3090				NULL, NULL,		/* filter, filterarg */
3091				MCLBYTES,		/* maxsize */
3092				1,			/* nsegments */
3093				MCLBYTES,		/* maxsegsize */
3094				0,			/* flags */
3095				NULL,			/* lockfunc */
3096				NULL,			/* lockarg */
3097				&adapter->rxtag);
3098	if (error) {
3099		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3100		    __func__, error);
3101		goto fail;
3102	}
3103
3104	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3105	    &adapter->rx_sparemap);
3106	if (error) {
3107		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3108		    __func__, error);
3109		goto fail;
3110	}
3111	rx_buffer = adapter->rx_buffer_area;
3112	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3113		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3114		    &rx_buffer->map);
3115		if (error) {
3116			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3117			    __func__, error);
3118			goto fail;
3119		}
3120	}
3121
3122	for (i = 0; i < adapter->num_rx_desc; i++) {
3123		error = em_get_buf(adapter, i);
3124		if (error)
3125			goto fail;
3126	}
3127	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3128	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3129
3130	return (0);
3131
3132fail:
3133	em_free_receive_structures(adapter);
3134	return (error);
3135}
3136
3137/*********************************************************************
3138 *
3139 *  Allocate and initialize receive structures.
3140 *
3141 **********************************************************************/
3142static int
3143em_setup_receive_structures(struct adapter *adapter)
3144{
3145	int error;
3146
3147	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3148
3149	if ((error = em_allocate_receive_structures(adapter)) != 0)
3150		return (error);
3151
3152	/* Setup our descriptor pointers */
3153	adapter->next_rx_desc_to_check = 0;
3154
3155	return (0);
3156}
3157
3158/*********************************************************************
3159 *
3160 *  Enable receive unit.
3161 *
3162 **********************************************************************/
3163static void
3164em_initialize_receive_unit(struct adapter *adapter)
3165{
3166	struct ifnet	*ifp = adapter->ifp;
3167	uint64_t	bus_addr;
3168	uint32_t	reg_rctl;
3169	uint32_t	reg_rxcsum;
3170
3171	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3172
3173	/*
3174	 * Make sure receives are disabled while setting
3175	 * up the descriptor ring
3176	 */
3177	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3178
3179	/* Set the Receive Delay Timer Register */
3180	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3181
3182	if(adapter->hw.mac_type >= em_82540) {
3183		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3184
3185		/*
3186		 * Set the interrupt throttling rate. Value is calculated
3187		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3188		 */
3189#define MAX_INTS_PER_SEC	8000
3190#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3191		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3192	}
3193
3194	/* Setup the Base and Length of the Rx Descriptor Ring */
3195	bus_addr = adapter->rxdma.dma_paddr;
3196	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3197			sizeof(struct em_rx_desc));
3198	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3199	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3200
3201	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3202	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3203	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3204
3205	/* Setup the Receive Control Register */
3206	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3207		   E1000_RCTL_RDMTS_HALF |
3208		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3209
3210	if (adapter->hw.tbi_compatibility_on == TRUE)
3211		reg_rctl |= E1000_RCTL_SBP;
3212
3213
3214	switch (adapter->rx_buffer_len) {
3215	default:
3216	case EM_RXBUFFER_2048:
3217		reg_rctl |= E1000_RCTL_SZ_2048;
3218		break;
3219	case EM_RXBUFFER_4096:
3220		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3221		break;
3222	case EM_RXBUFFER_8192:
3223		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3224		break;
3225	case EM_RXBUFFER_16384:
3226		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3227		break;
3228	}
3229
3230	if (ifp->if_mtu > ETHERMTU)
3231		reg_rctl |= E1000_RCTL_LPE;
3232
3233	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3234	if ((adapter->hw.mac_type >= em_82543) &&
3235	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3236		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3237		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3238		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3239	}
3240
3241	/* Enable Receives */
3242	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3243}
3244
3245/*********************************************************************
3246 *
3247 *  Free receive related data structures.
3248 *
3249 **********************************************************************/
3250static void
3251em_free_receive_structures(struct adapter *adapter)
3252{
3253	struct em_buffer *rx_buffer;
3254	int i;
3255
3256	INIT_DEBUGOUT("free_receive_structures: begin");
3257
3258	if (adapter->rx_sparemap) {
3259		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3260		adapter->rx_sparemap = NULL;
3261	}
3262	if (adapter->rx_buffer_area != NULL) {
3263		rx_buffer = adapter->rx_buffer_area;
3264		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3265			if (rx_buffer->m_head != NULL) {
3266				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3267				    BUS_DMASYNC_POSTREAD);
3268				bus_dmamap_unload(adapter->rxtag,
3269				    rx_buffer->map);
3270				m_freem(rx_buffer->m_head);
3271				rx_buffer->m_head = NULL;
3272			} else if (rx_buffer->map != NULL)
3273				bus_dmamap_unload(adapter->rxtag,
3274				    rx_buffer->map);
3275			if (rx_buffer->map != NULL) {
3276				bus_dmamap_destroy(adapter->rxtag,
3277				    rx_buffer->map);
3278				rx_buffer->map = NULL;
3279			}
3280		}
3281	}
3282	if (adapter->rx_buffer_area != NULL) {
3283		free(adapter->rx_buffer_area, M_DEVBUF);
3284		adapter->rx_buffer_area = NULL;
3285	}
3286	if (adapter->rxtag != NULL) {
3287		bus_dma_tag_destroy(adapter->rxtag);
3288		adapter->rxtag = NULL;
3289	}
3290}
3291
3292/*********************************************************************
3293 *
3294 *  This routine executes in interrupt context. It replenishes
3295 *  the mbufs in the descriptor and sends data which has been
3296 *  dma'ed into host memory to upper layer.
3297 *
3298 *  We loop at most count times if count is > 0, or until done if
3299 *  count < 0.
3300 *
3301 *********************************************************************/
3302static int
3303em_rxeof(struct adapter *adapter, int count)
3304{
3305	struct ifnet	*ifp;
3306	struct mbuf	*mp;
3307	uint8_t		accept_frame = 0;
3308	uint8_t		eop = 0;
3309	uint16_t 	len, desc_len, prev_len_adj;
3310	int		i;
3311
3312	/* Pointer to the receive descriptor being examined. */
3313	struct em_rx_desc   *current_desc;
3314	uint8_t		status;
3315
3316	ifp = adapter->ifp;
3317	i = adapter->next_rx_desc_to_check;
3318	current_desc = &adapter->rx_desc_base[i];
3319	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3320	    BUS_DMASYNC_POSTREAD);
3321
3322	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3323		return (0);
3324
3325	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3326	    (count != 0) &&
3327	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3328		struct mbuf *m = NULL;
3329
3330		mp = adapter->rx_buffer_area[i].m_head;
3331		/*
3332		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3333		 * needs to access the last received byte in the mbuf.
3334		 */
3335		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3336		    BUS_DMASYNC_POSTREAD);
3337
3338		accept_frame = 1;
3339		prev_len_adj = 0;
3340		desc_len = le16toh(current_desc->length);
3341		status = current_desc->status;
3342		if (status & E1000_RXD_STAT_EOP) {
3343			count--;
3344			eop = 1;
3345			if (desc_len < ETHER_CRC_LEN) {
3346				len = 0;
3347				prev_len_adj = ETHER_CRC_LEN - desc_len;
3348			} else
3349				len = desc_len - ETHER_CRC_LEN;
3350		} else {
3351			eop = 0;
3352			len = desc_len;
3353		}
3354
3355		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3356			uint8_t		last_byte;
3357			uint32_t	pkt_len = desc_len;
3358
3359			if (adapter->fmp != NULL)
3360				pkt_len += adapter->fmp->m_pkthdr.len;
3361
3362			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3363			if (TBI_ACCEPT(&adapter->hw, status,
3364			    current_desc->errors, pkt_len, last_byte)) {
3365				em_tbi_adjust_stats(&adapter->hw,
3366				    &adapter->stats, pkt_len,
3367				    adapter->hw.mac_addr);
3368				if (len > 0)
3369					len--;
3370			} else
3371				accept_frame = 0;
3372		}
3373
3374		if (accept_frame) {
3375			if (em_get_buf(adapter, i) != 0) {
3376				ifp->if_iqdrops++;
3377				goto discard;
3378			}
3379
3380			/* Assign correct length to the current fragment */
3381			mp->m_len = len;
3382
3383			if (adapter->fmp == NULL) {
3384				mp->m_pkthdr.len = len;
3385				adapter->fmp = mp; /* Store the first mbuf */
3386				adapter->lmp = mp;
3387			} else {
3388				/* Chain mbuf's together */
3389				mp->m_flags &= ~M_PKTHDR;
3390				/*
3391				 * Adjust length of previous mbuf in chain if
3392				 * we received less than 4 bytes in the last
3393				 * descriptor.
3394				 */
3395				if (prev_len_adj > 0) {
3396					adapter->lmp->m_len -= prev_len_adj;
3397					adapter->fmp->m_pkthdr.len -=
3398					    prev_len_adj;
3399				}
3400				adapter->lmp->m_next = mp;
3401				adapter->lmp = adapter->lmp->m_next;
3402				adapter->fmp->m_pkthdr.len += len;
3403			}
3404
3405			if (eop) {
3406				adapter->fmp->m_pkthdr.rcvif = ifp;
3407				ifp->if_ipackets++;
3408				em_receive_checksum(adapter, current_desc,
3409				    adapter->fmp);
3410#ifndef __NO_STRICT_ALIGNMENT
3411				if (adapter->hw.max_frame_size >
3412				    (MCLBYTES - ETHER_ALIGN) &&
3413				    em_fixup_rx(adapter) != 0)
3414					goto skip;
3415#endif
3416				if (status & E1000_RXD_STAT_VP) {
3417					adapter->fmp->m_pkthdr.ether_vtag =
3418					    (le16toh(current_desc->special) &
3419					    E1000_RXD_SPC_VLAN_MASK);
3420					adapter->fmp->m_flags |= M_VLANTAG;
3421				}
3422#ifndef __NO_STRICT_ALIGNMENT
3423skip:
3424#endif
3425				m = adapter->fmp;
3426				adapter->fmp = NULL;
3427				adapter->lmp = NULL;
3428			}
3429		} else {
3430			ifp->if_ierrors++;
3431discard:
3432			/* Reuse loaded DMA map and just update mbuf chain */
3433			mp = adapter->rx_buffer_area[i].m_head;
3434			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3435			mp->m_data = mp->m_ext.ext_buf;
3436			mp->m_next = NULL;
3437			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3438				m_adj(mp, ETHER_ALIGN);
3439			if (adapter->fmp != NULL) {
3440				m_freem(adapter->fmp);
3441				adapter->fmp = NULL;
3442				adapter->lmp = NULL;
3443			}
3444			m = NULL;
3445		}
3446
3447		/* Zero out the receive descriptors status. */
3448		current_desc->status = 0;
3449		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3450		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3451
3452		/* Advance our pointers to the next descriptor. */
3453		if (++i == adapter->num_rx_desc)
3454			i = 0;
3455		if (m != NULL) {
3456			adapter->next_rx_desc_to_check = i;
3457#ifdef DEVICE_POLLING
3458			EM_UNLOCK(adapter);
3459			(*ifp->if_input)(ifp, m);
3460			EM_LOCK(adapter);
3461#else
3462			(*ifp->if_input)(ifp, m);
3463#endif
3464			i = adapter->next_rx_desc_to_check;
3465		}
3466		current_desc = &adapter->rx_desc_base[i];
3467	}
3468	adapter->next_rx_desc_to_check = i;
3469
3470	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3471	if (--i < 0)
3472		i = adapter->num_rx_desc - 1;
3473	E1000_WRITE_REG(&adapter->hw, RDT, i);
3474	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3475		return (0);
3476
3477	return (1);
3478}
3479
3480#ifndef __NO_STRICT_ALIGNMENT
3481/*
3482 * When jumbo frames are enabled we should realign entire payload on
3483 * architecures with strict alignment. This is serious design mistake of 8254x
3484 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3485 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3486 * payload. On architecures without strict alignment restrictions 8254x still
3487 * performs unaligned memory access which would reduce the performance too.
3488 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3489 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3490 * existing mbuf chain.
3491 *
3492 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3493 * not used at all on architectures with strict alignment.
3494 */
3495static int
3496em_fixup_rx(struct adapter *adapter)
3497{
3498	struct mbuf *m, *n;
3499	int error;
3500
3501	error = 0;
3502	m = adapter->fmp;
3503	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3504		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3505		m->m_data += ETHER_HDR_LEN;
3506	} else {
3507		MGETHDR(n, M_DONTWAIT, MT_DATA);
3508		if (n != NULL) {
3509			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3510			m->m_data += ETHER_HDR_LEN;
3511			m->m_len -= ETHER_HDR_LEN;
3512			n->m_len = ETHER_HDR_LEN;
3513			M_MOVE_PKTHDR(n, m);
3514			n->m_next = m;
3515			adapter->fmp = n;
3516		} else {
3517			adapter->ifp->if_iqdrops++;
3518			adapter->mbuf_alloc_failed++;
3519			m_freem(adapter->fmp);
3520			adapter->fmp = NULL;
3521			adapter->lmp = NULL;
3522			error = ENOBUFS;
3523		}
3524	}
3525
3526	return (error);
3527}
3528#endif
3529
3530/*********************************************************************
3531 *
3532 *  Verify that the hardware indicated that the checksum is valid.
3533 *  Inform the stack about the status of checksum so that stack
3534 *  doesn't spend time verifying the checksum.
3535 *
3536 *********************************************************************/
3537static void
3538em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3539		    struct mbuf *mp)
3540{
3541	/* 82543 or newer only */
3542	if ((adapter->hw.mac_type < em_82543) ||
3543	    /* Ignore Checksum bit is set */
3544	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3545		mp->m_pkthdr.csum_flags = 0;
3546		return;
3547	}
3548
3549	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3550		/* Did it pass? */
3551		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3552			/* IP Checksum Good */
3553			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3554			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3555
3556		} else {
3557			mp->m_pkthdr.csum_flags = 0;
3558		}
3559	}
3560
3561	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3562		/* Did it pass? */
3563		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3564			mp->m_pkthdr.csum_flags |=
3565			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3566			mp->m_pkthdr.csum_data = htons(0xffff);
3567		}
3568	}
3569}
3570
3571
3572static void
3573em_enable_vlans(struct adapter *adapter)
3574{
3575	uint32_t ctrl;
3576
3577	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3578
3579	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3580	ctrl |= E1000_CTRL_VME;
3581	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3582}
3583
3584static void
3585em_disable_vlans(struct adapter *adapter)
3586{
3587	uint32_t ctrl;
3588
3589	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3590	ctrl &= ~E1000_CTRL_VME;
3591	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3592}
3593
3594static void
3595em_enable_intr(struct adapter *adapter)
3596{
3597	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3598}
3599
3600static void
3601em_disable_intr(struct adapter *adapter)
3602{
3603	/*
3604	 * The first version of 82542 had an errata where when link was forced
3605	 * it would stay up even up even if the cable was disconnected.
3606	 * Sequence errors were used to detect the disconnect and then the
3607	 * driver would unforce the link. This code in the in the ISR. For this
3608	 * to work correctly the Sequence error interrupt had to be enabled
3609	 * all the time.
3610	 */
3611
3612	if (adapter->hw.mac_type == em_82542_rev2_0)
3613	    E1000_WRITE_REG(&adapter->hw, IMC,
3614		(0xffffffff & ~E1000_IMC_RXSEQ));
3615	else
3616	    E1000_WRITE_REG(&adapter->hw, IMC,
3617		0xffffffff);
3618}
3619
3620static int
3621em_is_valid_ether_addr(uint8_t *addr)
3622{
3623	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3624
3625	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3626		return (FALSE);
3627	}
3628
3629	return (TRUE);
3630}
3631
3632void
3633em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3634{
3635	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3636}
3637
3638void
3639em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3640{
3641	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3642}
3643
3644void
3645em_pci_set_mwi(struct em_hw *hw)
3646{
3647	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3648	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3649}
3650
3651void
3652em_pci_clear_mwi(struct em_hw *hw)
3653{
3654	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3655	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3656}
3657
3658/*********************************************************************
3659* 82544 Coexistence issue workaround.
3660*    There are 2 issues.
3661*       1. Transmit Hang issue.
3662*    To detect this issue, following equation can be used...
3663*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3664*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3665*
3666*       2. DAC issue.
3667*    To detect this issue, following equation can be used...
3668*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3669*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3670*
3671*
3672*    WORKAROUND:
3673*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3674*
3675*** *********************************************************************/
3676static uint32_t
3677em_fill_descriptors (bus_addr_t address, uint32_t length,
3678		PDESC_ARRAY desc_array)
3679{
3680	/* Since issue is sensitive to length and address.*/
3681	/* Let us first check the address...*/
3682	uint32_t safe_terminator;
3683	if (length <= 4) {
3684		desc_array->descriptor[0].address = address;
3685		desc_array->descriptor[0].length = length;
3686		desc_array->elements = 1;
3687		return (desc_array->elements);
3688	}
3689	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3690	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3691	if (safe_terminator == 0   ||
3692	(safe_terminator > 4   &&
3693	safe_terminator < 9)   ||
3694	(safe_terminator > 0xC &&
3695	safe_terminator <= 0xF)) {
3696		desc_array->descriptor[0].address = address;
3697		desc_array->descriptor[0].length = length;
3698		desc_array->elements = 1;
3699		return (desc_array->elements);
3700	}
3701
3702	desc_array->descriptor[0].address = address;
3703	desc_array->descriptor[0].length = length - 4;
3704	desc_array->descriptor[1].address = address + (length - 4);
3705	desc_array->descriptor[1].length = 4;
3706	desc_array->elements = 2;
3707	return (desc_array->elements);
3708}
3709
3710/**********************************************************************
3711 *
3712 *  Update the board statistics counters.
3713 *
3714 **********************************************************************/
3715static void
3716em_update_stats_counters(struct adapter *adapter)
3717{
3718	struct ifnet   *ifp;
3719
3720	if(adapter->hw.media_type == em_media_type_copper ||
3721	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3722		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3723		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3724	}
3725	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3726	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3727	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3728	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3729
3730	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3731	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3732	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3733	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3734	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3735	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3736	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3737	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3738	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3739	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3740	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3741	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3742	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3743	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3744	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3745	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3746	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3747	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3748	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3749	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3750
3751	/* For the 64-bit byte counters the low dword must be read first. */
3752	/* Both registers clear on the read of the high dword */
3753
3754	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3755	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3756	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3757	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3758
3759	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3760	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3761	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3762	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3763	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3764
3765	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3766	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3767	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3768	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3769
3770	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3771	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3772	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3773	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3774	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3775	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3776	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3777	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3778	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3779	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3780
3781	if (adapter->hw.mac_type >= em_82543) {
3782		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3783		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3784		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3785		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3786		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3787		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3788	}
3789	ifp = adapter->ifp;
3790
3791	ifp->if_collisions = adapter->stats.colc;
3792
3793	/* Rx Errors */
3794	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3795	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3796	    adapter->stats.mpc + adapter->stats.cexterr;
3797
3798	/* Tx Errors */
3799	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3800	    adapter->watchdog_events;
3801}
3802
3803
3804/**********************************************************************
3805 *
3806 *  This routine is called only when em_display_debug_stats is enabled.
3807 *  This routine provides a way to take a look at important statistics
3808 *  maintained by the driver and hardware.
3809 *
3810 **********************************************************************/
3811static void
3812em_print_debug_info(struct adapter *adapter)
3813{
3814	device_t dev = adapter->dev;
3815	uint8_t *hw_addr = adapter->hw.hw_addr;
3816
3817	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3818	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3819	    E1000_READ_REG(&adapter->hw, CTRL),
3820	    E1000_READ_REG(&adapter->hw, RCTL));
3821	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3822	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3823	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3824	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3825	    adapter->hw.fc_high_water,
3826	    adapter->hw.fc_low_water);
3827	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3828	    E1000_READ_REG(&adapter->hw, TIDV),
3829	    E1000_READ_REG(&adapter->hw, TADV));
3830	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3831	    E1000_READ_REG(&adapter->hw, RDTR),
3832	    E1000_READ_REG(&adapter->hw, RADV));
3833	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3834	    (long long)adapter->tx_fifo_wrk_cnt,
3835	    (long long)adapter->tx_fifo_reset_cnt);
3836	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3837	    E1000_READ_REG(&adapter->hw, TDH),
3838	    E1000_READ_REG(&adapter->hw, TDT));
3839	device_printf(dev, "Num Tx descriptors avail = %d\n",
3840	    adapter->num_tx_desc_avail);
3841	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3842	    adapter->no_tx_desc_avail1);
3843	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3844	    adapter->no_tx_desc_avail2);
3845	device_printf(dev, "Std mbuf failed = %ld\n",
3846	    adapter->mbuf_alloc_failed);
3847	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3848	    adapter->mbuf_cluster_failed);
3849}
3850
3851static void
3852em_print_hw_stats(struct adapter *adapter)
3853{
3854	device_t dev = adapter->dev;
3855
3856	device_printf(dev, "Excessive collisions = %lld\n",
3857	    (long long)adapter->stats.ecol);
3858	device_printf(dev, "Symbol errors = %lld\n",
3859	    (long long)adapter->stats.symerrs);
3860	device_printf(dev, "Sequence errors = %lld\n",
3861	    (long long)adapter->stats.sec);
3862	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3863
3864	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3865	device_printf(dev, "Receive No Buffers = %lld\n",
3866	    (long long)adapter->stats.rnbc);
3867	/* RLEC is inaccurate on some hardware, calculate our own. */
3868	device_printf(dev, "Receive Length Errors = %lld\n",
3869	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3870	device_printf(dev, "Receive errors = %lld\n",
3871	    (long long)adapter->stats.rxerrc);
3872	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3873	device_printf(dev, "Alignment errors = %lld\n",
3874	    (long long)adapter->stats.algnerrc);
3875	device_printf(dev, "Carrier extension errors = %lld\n",
3876	    (long long)adapter->stats.cexterr);
3877	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3878	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3879
3880	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3881	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3882	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3883	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3884
3885	device_printf(dev, "Good Packets Rcvd = %lld\n",
3886	    (long long)adapter->stats.gprc);
3887	device_printf(dev, "Good Packets Xmtd = %lld\n",
3888	    (long long)adapter->stats.gptc);
3889	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3890	    (long long)adapter->stats.tsctc);
3891	device_printf(dev, "TSO Contexts Failed = %lld\n",
3892	    (long long)adapter->stats.tsctfc);
3893}
3894
3895static int
3896em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3897{
3898	struct adapter *adapter;
3899	int error;
3900	int result;
3901
3902	result = -1;
3903	error = sysctl_handle_int(oidp, &result, 0, req);
3904
3905	if (error || !req->newptr)
3906		return (error);
3907
3908	if (result == 1) {
3909		adapter = (struct adapter *)arg1;
3910		em_print_debug_info(adapter);
3911	}
3912
3913	return (error);
3914}
3915
3916
3917static int
3918em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3919{
3920	struct adapter *adapter;
3921	int error;
3922	int result;
3923
3924	result = -1;
3925	error = sysctl_handle_int(oidp, &result, 0, req);
3926
3927	if (error || !req->newptr)
3928		return (error);
3929
3930	if (result == 1) {
3931		adapter = (struct adapter *)arg1;
3932		em_print_hw_stats(adapter);
3933	}
3934
3935	return (error);
3936}
3937
3938static int
3939em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3940{
3941	struct em_int_delay_info *info;
3942	struct adapter *adapter;
3943	uint32_t regval;
3944	int error;
3945	int usecs;
3946	int ticks;
3947
3948	info = (struct em_int_delay_info *)arg1;
3949	usecs = info->value;
3950	error = sysctl_handle_int(oidp, &usecs, 0, req);
3951	if (error != 0 || req->newptr == NULL)
3952		return (error);
3953	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3954		return (EINVAL);
3955	info->value = usecs;
3956	ticks = E1000_USECS_TO_TICKS(usecs);
3957
3958	adapter = info->adapter;
3959
3960	EM_LOCK(adapter);
3961	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3962	regval = (regval & ~0xffff) | (ticks & 0xffff);
3963	/* Handle a few special cases. */
3964	switch (info->offset) {
3965	case E1000_RDTR:
3966	case E1000_82542_RDTR:
3967		regval |= E1000_RDT_FPDB;
3968		break;
3969	case E1000_TIDV:
3970	case E1000_82542_TIDV:
3971		if (ticks == 0) {
3972			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3973			/* Don't write 0 into the TIDV register. */
3974			regval++;
3975		} else
3976			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3977		break;
3978	}
3979	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3980	EM_UNLOCK(adapter);
3981	return (0);
3982}
3983
3984static void
3985em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3986	const char *description, struct em_int_delay_info *info,
3987	int offset, int value)
3988{
3989	info->adapter = adapter;
3990	info->offset = offset;
3991	info->value = value;
3992	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3993	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3994	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3995	    info, 0, em_sysctl_int_delay, "I", description);
3996}
3997
3998#ifndef DEVICE_POLLING
3999static void
4000em_add_int_process_limit(struct adapter *adapter, const char *name,
4001	const char *description, int *limit, int value)
4002{
4003	*limit = value;
4004	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4005	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4006	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4007}
4008#endif
4009