if_em.c revision 162171
1/**************************************************************************
2
3Copyright (c) 2001-2006, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 162171 2006-09-09 06:19:20Z pdeuskar $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcivar.h>
76#include <dev/pci/pcireg.h>
77#include <dev/em/if_em_hw.h>
78#include <dev/em/if_em.h>
79
80/*********************************************************************
81 *  Set this to one to display debug statistics
82 *********************************************************************/
83int	em_display_debug_stats = 0;
84
85/*********************************************************************
86 *  Driver version
87 *********************************************************************/
88
89char em_driver_version[] = "Version - 6.1.4 - TSO";
90
91
92/*********************************************************************
93 *  PCI Device ID Table
94 *
95 *  Used by probe to select devices to load on
96 *  Last field stores an index into em_strings
97 *  Last entry must be all 0s
98 *
99 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
100 *********************************************************************/
101
102static em_vendor_info_t em_vendor_info_array[] =
103{
104	/* Intel(R) PRO/1000 Network Connection */
105	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
106	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
108	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
110
111	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118
119	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
128
129	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134
135	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
144						PCI_ANY_ID, PCI_ANY_ID, 0},
145
146	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149
150	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
154						PCI_ANY_ID, PCI_ANY_ID, 0},
155
156	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
160
161	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
165						PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
175
176	/* required last entry */
177	{ 0, 0, 0, 0, 0}
178};
179
180/*********************************************************************
181 *  Table of branding strings for all supported NICs.
182 *********************************************************************/
183
184static char *em_strings[] = {
185	"Intel(R) PRO/1000 Network Connection"
186};
187
188/*********************************************************************
189 *  Function prototypes
190 *********************************************************************/
191static int	em_probe(device_t);
192static int	em_attach(device_t);
193static int	em_detach(device_t);
194static int	em_shutdown(device_t);
195static int	em_suspend(device_t);
196static int	em_resume(device_t);
197static void	em_start(struct ifnet *);
198static void	em_start_locked(struct ifnet *ifp);
199static int	em_ioctl(struct ifnet *, u_long, caddr_t);
200static void	em_watchdog(struct ifnet *);
201static void	em_init(void *);
202static void	em_init_locked(struct adapter *);
203static void	em_stop(void *);
204static void	em_media_status(struct ifnet *, struct ifmediareq *);
205static int	em_media_change(struct ifnet *);
206static void	em_identify_hardware(struct adapter *);
207static int	em_allocate_pci_resources(struct adapter *);
208static int	em_allocate_intr(struct adapter *);
209static void	em_free_intr(struct adapter *);
210static void	em_free_pci_resources(struct adapter *);
211static void	em_local_timer(void *);
212static int	em_hardware_init(struct adapter *);
213static void	em_setup_interface(device_t, struct adapter *);
214static int	em_setup_transmit_structures(struct adapter *);
215static void	em_initialize_transmit_unit(struct adapter *);
216static int	em_setup_receive_structures(struct adapter *);
217static void	em_initialize_receive_unit(struct adapter *);
218static void	em_enable_intr(struct adapter *);
219static void	em_disable_intr(struct adapter *);
220static void	em_free_transmit_structures(struct adapter *);
221static void	em_free_receive_structures(struct adapter *);
222static void	em_update_stats_counters(struct adapter *);
223static void	em_txeof(struct adapter *);
224static int	em_allocate_receive_structures(struct adapter *);
225static int	em_allocate_transmit_structures(struct adapter *);
226static int	em_rxeof(struct adapter *, int);
227#ifndef __NO_STRICT_ALIGNMENT
228static int	em_fixup_rx(struct adapter *);
229#endif
230static void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
231		    struct mbuf *);
232static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
233		    uint32_t *, uint32_t *);
234static boolean_t em_tso_setup(struct adapter *, struct mbuf *,
235		    uint32_t *, uint32_t *);
236static void	em_set_promisc(struct adapter *);
237static void	em_disable_promisc(struct adapter *);
238static void	em_set_multi(struct adapter *);
239static void	em_print_hw_stats(struct adapter *);
240static void	em_update_link_status(struct adapter *);
241static int	em_get_buf(struct adapter *, int);
242static void	em_enable_vlans(struct adapter *);
243static void	em_disable_vlans(struct adapter *);
244static int	em_encap(struct adapter *, struct mbuf **);
245static void	em_smartspeed(struct adapter *);
246static int	em_82547_fifo_workaround(struct adapter *, int);
247static void	em_82547_update_fifo_head(struct adapter *, int);
248static int	em_82547_tx_fifo_reset(struct adapter *);
249static void	em_82547_move_tail(void *arg);
250static void	em_82547_move_tail_locked(struct adapter *);
251static int	em_dma_malloc(struct adapter *, bus_size_t,
252		struct em_dma_alloc *, int);
253static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
254static void	em_print_debug_info(struct adapter *);
255static int 	em_is_valid_ether_addr(uint8_t *);
256static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
257static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
258static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
259		    PDESC_ARRAY desc_array);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		const char *, struct em_int_delay_info *, int, int);
263
264/*
265 * Fast interrupt handler and legacy ithread/polling modes are
266 * mutually exclusive.
267 */
268#ifdef DEVICE_POLLING
269static poll_handler_t em_poll;
270static void	em_intr(void *);
271#else
272static void	em_intr_fast(void *);
273static void	em_add_int_process_limit(struct adapter *, const char *,
274		const char *, int *, int);
275static void	em_handle_rxtx(void *context, int pending);
276static void	em_handle_link(void *context, int pending);
277#endif
278
279/*********************************************************************
280 *  FreeBSD Device Interface Entry Points
281 *********************************************************************/
282
283static device_method_t em_methods[] = {
284	/* Device interface */
285	DEVMETHOD(device_probe, em_probe),
286	DEVMETHOD(device_attach, em_attach),
287	DEVMETHOD(device_detach, em_detach),
288	DEVMETHOD(device_shutdown, em_shutdown),
289	DEVMETHOD(device_suspend, em_suspend),
290	DEVMETHOD(device_resume, em_resume),
291	{0, 0}
292};
293
294static driver_t em_driver = {
295	"em", em_methods, sizeof(struct adapter),
296};
297
298static devclass_t em_devclass;
299DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
300MODULE_DEPEND(em, pci, 1, 1, 1);
301MODULE_DEPEND(em, ether, 1, 1, 1);
302
303/*********************************************************************
304 *  Tunable default values.
305 *********************************************************************/
306
307#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
308#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
309#define M_TSO_LEN			66
310
311static int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
312static int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
313static int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
314static int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
315static int em_rxd = EM_DEFAULT_RXD;
316static int em_txd = EM_DEFAULT_TXD;
317static int em_smart_pwr_down = FALSE;
318
319TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
320TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
321TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
322TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
323TUNABLE_INT("hw.em.rxd", &em_rxd);
324TUNABLE_INT("hw.em.txd", &em_txd);
325TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
326#ifndef DEVICE_POLLING
327static int em_rx_process_limit = 100;
328TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
329#endif
330
331/*********************************************************************
332 *  Device identification routine
333 *
334 *  em_probe determines if the driver should be loaded on
335 *  adapter based on PCI vendor/device id of the adapter.
336 *
337 *  return BUS_PROBE_DEFAULT on success, positive on failure
338 *********************************************************************/
339
340static int
341em_probe(device_t dev)
342{
343	char		adapter_name[60];
344	uint16_t	pci_vendor_id = 0;
345	uint16_t	pci_device_id = 0;
346	uint16_t	pci_subvendor_id = 0;
347	uint16_t	pci_subdevice_id = 0;
348	em_vendor_info_t *ent;
349
350	INIT_DEBUGOUT("em_probe: begin");
351
352	pci_vendor_id = pci_get_vendor(dev);
353	if (pci_vendor_id != EM_VENDOR_ID)
354		return (ENXIO);
355
356	pci_device_id = pci_get_device(dev);
357	pci_subvendor_id = pci_get_subvendor(dev);
358	pci_subdevice_id = pci_get_subdevice(dev);
359
360	ent = em_vendor_info_array;
361	while (ent->vendor_id != 0) {
362		if ((pci_vendor_id == ent->vendor_id) &&
363		    (pci_device_id == ent->device_id) &&
364
365		    ((pci_subvendor_id == ent->subvendor_id) ||
366		    (ent->subvendor_id == PCI_ANY_ID)) &&
367
368		    ((pci_subdevice_id == ent->subdevice_id) ||
369		    (ent->subdevice_id == PCI_ANY_ID))) {
370			sprintf(adapter_name, "%s %s",
371				em_strings[ent->index],
372				em_driver_version);
373			device_set_desc_copy(dev, adapter_name);
374			return (BUS_PROBE_DEFAULT);
375		}
376		ent++;
377	}
378
379	return (ENXIO);
380}
381
382/*********************************************************************
383 *  Device initialization routine
384 *
385 *  The attach entry point is called when the driver is being loaded.
386 *  This routine identifies the type of hardware, allocates all resources
387 *  and initializes the hardware.
388 *
389 *  return 0 on success, positive on failure
390 *********************************************************************/
391
392static int
393em_attach(device_t dev)
394{
395	struct adapter	*adapter;
396	int		tsize, rsize;
397	int		error = 0;
398
399	INIT_DEBUGOUT("em_attach: begin");
400
401	adapter = device_get_softc(dev);
402	adapter->dev = adapter->osdep.dev = dev;
403	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
404
405	/* SYSCTL stuff */
406	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
407	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
408	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
409	    em_sysctl_debug_info, "I", "Debug Information");
410
411	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
412	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
413	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
414	    em_sysctl_stats, "I", "Statistics");
415
416	callout_init(&adapter->timer, CALLOUT_MPSAFE);
417	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
418
419	/* Determine hardware revision */
420	em_identify_hardware(adapter);
421
422	/* Set up some sysctls for the tunable interrupt delays */
423	em_add_int_delay_sysctl(adapter, "rx_int_delay",
424	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
425	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
426	em_add_int_delay_sysctl(adapter, "tx_int_delay",
427	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
428	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
429	if (adapter->hw.mac_type >= em_82540) {
430		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
431		    "receive interrupt delay limit in usecs",
432		    &adapter->rx_abs_int_delay,
433		    E1000_REG_OFFSET(&adapter->hw, RADV),
434		    em_rx_abs_int_delay_dflt);
435		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
436		    "transmit interrupt delay limit in usecs",
437		    &adapter->tx_abs_int_delay,
438		    E1000_REG_OFFSET(&adapter->hw, TADV),
439		    em_tx_abs_int_delay_dflt);
440	}
441
442#ifndef DEVICE_POLLING
443	/* Sysctls for limiting the amount of work done in the taskqueue */
444	em_add_int_process_limit(adapter, "rx_processing_limit",
445	    "max number of rx packets to process", &adapter->rx_process_limit,
446	    em_rx_process_limit);
447#endif
448
449	/*
450	 * Validate number of transmit and receive descriptors. It
451	 * must not exceed hardware maximum, and must be multiple
452	 * of EM_DBA_ALIGN.
453	 */
454	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
455	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
456	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
457	    (em_txd < EM_MIN_TXD)) {
458		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
459		    EM_DEFAULT_TXD, em_txd);
460		adapter->num_tx_desc = EM_DEFAULT_TXD;
461	} else
462		adapter->num_tx_desc = em_txd;
463	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
464	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
465	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
466	    (em_rxd < EM_MIN_RXD)) {
467		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
468		    EM_DEFAULT_RXD, em_rxd);
469		adapter->num_rx_desc = EM_DEFAULT_RXD;
470	} else
471		adapter->num_rx_desc = em_rxd;
472
473	adapter->hw.autoneg = DO_AUTO_NEG;
474	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
475	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
476	adapter->hw.tbi_compatibility_en = TRUE;
477	adapter->rx_buffer_len = EM_RXBUFFER_2048;
478
479	adapter->hw.phy_init_script = 1;
480	adapter->hw.phy_reset_disable = FALSE;
481
482#ifndef EM_MASTER_SLAVE
483	adapter->hw.master_slave = em_ms_hw_default;
484#else
485	adapter->hw.master_slave = EM_MASTER_SLAVE;
486#endif
487	/*
488	 * Set the max frame size assuming standard ethernet
489	 * sized frames.
490	 */
491	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
492
493	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
494
495	/*
496	 * This controls when hardware reports transmit completion
497	 * status.
498	 */
499	adapter->hw.report_tx_early = 1;
500	if (em_allocate_pci_resources(adapter)) {
501		device_printf(dev, "Allocation of PCI resources failed\n");
502		error = ENXIO;
503		goto err_pci;
504	}
505
506	/* Initialize eeprom parameters */
507	em_init_eeprom_params(&adapter->hw);
508
509	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
510	    EM_DBA_ALIGN);
511
512	/* Allocate Transmit Descriptor ring */
513	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
514		device_printf(dev, "Unable to allocate tx_desc memory\n");
515		error = ENOMEM;
516		goto err_tx_desc;
517	}
518	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
519
520	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
521	    EM_DBA_ALIGN);
522
523	/* Allocate Receive Descriptor ring */
524	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
525		device_printf(dev, "Unable to allocate rx_desc memory\n");
526		error = ENOMEM;
527		goto err_rx_desc;
528	}
529	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
530
531	/* Initialize the hardware */
532	if (em_hardware_init(adapter)) {
533		device_printf(dev, "Unable to initialize the hardware\n");
534		error = EIO;
535		goto err_hw_init;
536	}
537
538	/* Copy the permanent MAC address out of the EEPROM */
539	if (em_read_mac_addr(&adapter->hw) < 0) {
540		device_printf(dev, "EEPROM read error while reading MAC"
541		    " address\n");
542		error = EIO;
543		goto err_hw_init;
544	}
545
546	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
547		device_printf(dev, "Invalid MAC address\n");
548		error = EIO;
549		goto err_hw_init;
550	}
551
552	/* Setup OS specific network interface */
553	em_setup_interface(dev, adapter);
554
555	em_allocate_intr(adapter);
556
557	/* Initialize statistics */
558	em_clear_hw_cntrs(&adapter->hw);
559	em_update_stats_counters(adapter);
560	adapter->hw.get_link_status = 1;
561	em_update_link_status(adapter);
562
563	/* Indicate SOL/IDER usage */
564	if (em_check_phy_reset_block(&adapter->hw))
565		device_printf(dev,
566		    "PHY reset is blocked due to SOL/IDER session.\n");
567
568	/* Identify 82544 on PCIX */
569	em_get_bus_info(&adapter->hw);
570	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
571		adapter->pcix_82544 = TRUE;
572	else
573		adapter->pcix_82544 = FALSE;
574
575	INIT_DEBUGOUT("em_attach: end");
576
577	return (0);
578
579err_hw_init:
580	em_dma_free(adapter, &adapter->rxdma);
581err_rx_desc:
582	em_dma_free(adapter, &adapter->txdma);
583err_tx_desc:
584err_pci:
585	em_free_intr(adapter);
586	em_free_pci_resources(adapter);
587	EM_LOCK_DESTROY(adapter);
588
589	return (error);
590}
591
592/*********************************************************************
593 *  Device removal routine
594 *
595 *  The detach entry point is called when the driver is being removed.
596 *  This routine stops the adapter and deallocates all the resources
597 *  that were allocated for driver operation.
598 *
599 *  return 0 on success, positive on failure
600 *********************************************************************/
601
602static int
603em_detach(device_t dev)
604{
605	struct adapter	*adapter = device_get_softc(dev);
606	struct ifnet	*ifp = adapter->ifp;
607
608	INIT_DEBUGOUT("em_detach: begin");
609
610#ifdef DEVICE_POLLING
611	if (ifp->if_capenable & IFCAP_POLLING)
612		ether_poll_deregister(ifp);
613#endif
614
615	em_free_intr(adapter);
616	EM_LOCK(adapter);
617	adapter->in_detach = 1;
618	em_stop(adapter);
619	em_phy_hw_reset(&adapter->hw);
620	EM_UNLOCK(adapter);
621	ether_ifdetach(adapter->ifp);
622
623	em_free_pci_resources(adapter);
624	bus_generic_detach(dev);
625	if_free(ifp);
626
627	/* Free Transmit Descriptor ring */
628	if (adapter->tx_desc_base) {
629		em_dma_free(adapter, &adapter->txdma);
630		adapter->tx_desc_base = NULL;
631	}
632
633	/* Free Receive Descriptor ring */
634	if (adapter->rx_desc_base) {
635		em_dma_free(adapter, &adapter->rxdma);
636		adapter->rx_desc_base = NULL;
637	}
638
639	EM_LOCK_DESTROY(adapter);
640
641	return (0);
642}
643
644/*********************************************************************
645 *
646 *  Shutdown entry point
647 *
648 **********************************************************************/
649
650static int
651em_shutdown(device_t dev)
652{
653	struct adapter *adapter = device_get_softc(dev);
654	EM_LOCK(adapter);
655	em_stop(adapter);
656	EM_UNLOCK(adapter);
657	return (0);
658}
659
660/*
661 * Suspend/resume device methods.
662 */
663static int
664em_suspend(device_t dev)
665{
666	struct adapter *adapter = device_get_softc(dev);
667
668	EM_LOCK(adapter);
669	em_stop(adapter);
670	EM_UNLOCK(adapter);
671
672	return bus_generic_suspend(dev);
673}
674
675static int
676em_resume(device_t dev)
677{
678	struct adapter *adapter = device_get_softc(dev);
679	struct ifnet *ifp = adapter->ifp;
680
681	EM_LOCK(adapter);
682	em_init_locked(adapter);
683	if ((ifp->if_flags & IFF_UP) &&
684	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
685		em_start_locked(ifp);
686	EM_UNLOCK(adapter);
687
688	return bus_generic_resume(dev);
689}
690
691
692/*********************************************************************
693 *  Transmit entry point
694 *
695 *  em_start is called by the stack to initiate a transmit.
696 *  The driver will remain in this routine as long as there are
697 *  packets to transmit and transmit resources are available.
698 *  In case resources are not available stack is notified and
699 *  the packet is requeued.
700 **********************************************************************/
701
702static void
703em_start_locked(struct ifnet *ifp)
704{
705	struct adapter	*adapter = ifp->if_softc;
706	struct mbuf	*m_head;
707
708	EM_LOCK_ASSERT(adapter);
709
710	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
711	    IFF_DRV_RUNNING)
712		return;
713	if (!adapter->link_active)
714		return;
715
716	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
717
718		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
719		if (m_head == NULL)
720			break;
721		/*
722		 * em_encap() can modify our pointer, and or make it NULL on
723		 * failure.  In that event, we can't requeue.
724		 */
725		if (em_encap(adapter, &m_head)) {
726			if (m_head == NULL)
727				break;
728			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
729			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
730			break;
731		}
732
733		/* Send a copy of the frame to the BPF listener */
734		BPF_MTAP(ifp, m_head);
735
736		/* Set timeout in case hardware has problems transmitting. */
737		ifp->if_timer = EM_TX_TIMEOUT;
738	}
739}
740
741static void
742em_start(struct ifnet *ifp)
743{
744	struct adapter *adapter = ifp->if_softc;
745
746	EM_LOCK(adapter);
747	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
748		em_start_locked(ifp);
749	EM_UNLOCK(adapter);
750}
751
752/*********************************************************************
753 *  Ioctl entry point
754 *
755 *  em_ioctl is called when the user wants to configure the
756 *  interface.
757 *
758 *  return 0 on success, positive on failure
759 **********************************************************************/
760
761static int
762em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
763{
764	struct adapter	*adapter = ifp->if_softc;
765	struct ifreq *ifr = (struct ifreq *)data;
766	struct ifaddr *ifa = (struct ifaddr *)data;
767	int error = 0;
768
769	if (adapter->in_detach)
770		return (error);
771
772	switch (command) {
773	case SIOCSIFADDR:
774	case SIOCGIFADDR:
775		if (ifa->ifa_addr->sa_family == AF_INET) {
776			/*
777			 * XXX
778			 * Since resetting hardware takes a very long time
779			 * and results in link renegotiation we only
780			 * initialize the hardware only when it is absolutely
781			 * required.
782			 */
783			ifp->if_flags |= IFF_UP;
784			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
785				EM_LOCK(adapter);
786				em_init_locked(adapter);
787				EM_UNLOCK(adapter);
788			}
789			arp_ifinit(ifp, ifa);
790		} else
791			error = ether_ioctl(ifp, command, data);
792		break;
793	case SIOCSIFMTU:
794	    {
795		int max_frame_size;
796		uint16_t eeprom_data = 0;
797
798		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
799
800		EM_LOCK(adapter);
801		switch (adapter->hw.mac_type) {
802		case em_82573:
803			/*
804			 * 82573 only supports jumbo frames
805			 * if ASPM is disabled.
806			 */
807			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
808			    &eeprom_data);
809			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
810				max_frame_size = ETHER_MAX_LEN;
811				break;
812			}
813			/* Allow Jumbo frames - fall thru */
814		case em_82571:
815		case em_82572:
816		case em_80003es2lan:	/* Limit Jumbo Frame size */
817			max_frame_size = 9234;
818			break;
819		case em_ich8lan:
820			/* ICH8 does not support jumbo frames */
821			max_frame_size = ETHER_MAX_LEN;
822			break;
823		default:
824			max_frame_size = MAX_JUMBO_FRAME_SIZE;
825		}
826		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
827		    ETHER_CRC_LEN) {
828			EM_UNLOCK(adapter);
829			error = EINVAL;
830			break;
831		}
832
833		ifp->if_mtu = ifr->ifr_mtu;
834		adapter->hw.max_frame_size =
835		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
836		em_init_locked(adapter);
837		EM_UNLOCK(adapter);
838		break;
839	    }
840	case SIOCSIFFLAGS:
841		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
842		EM_LOCK(adapter);
843		if (ifp->if_flags & IFF_UP) {
844			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
845				if ((ifp->if_flags ^ adapter->if_flags) &
846				    IFF_PROMISC) {
847					em_disable_promisc(adapter);
848					em_set_promisc(adapter);
849				}
850			} else
851				em_init_locked(adapter);
852		} else {
853			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
854				em_stop(adapter);
855			}
856		}
857		adapter->if_flags = ifp->if_flags;
858		EM_UNLOCK(adapter);
859		break;
860	case SIOCADDMULTI:
861	case SIOCDELMULTI:
862		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
863		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
864			EM_LOCK(adapter);
865			em_disable_intr(adapter);
866			em_set_multi(adapter);
867			if (adapter->hw.mac_type == em_82542_rev2_0) {
868				em_initialize_receive_unit(adapter);
869			}
870#ifdef DEVICE_POLLING
871			if (!(ifp->if_capenable & IFCAP_POLLING))
872#endif
873				em_enable_intr(adapter);
874			EM_UNLOCK(adapter);
875		}
876		break;
877	case SIOCSIFMEDIA:
878	case SIOCGIFMEDIA:
879		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
880		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
881		break;
882	case SIOCSIFCAP:
883	    {
884		int mask, reinit;
885
886		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
887		reinit = 0;
888		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
889#ifdef DEVICE_POLLING
890		if (mask & IFCAP_POLLING) {
891			if (ifr->ifr_reqcap & IFCAP_POLLING) {
892				error = ether_poll_register(em_poll, ifp);
893				if (error)
894					return (error);
895				EM_LOCK(adapter);
896				em_disable_intr(adapter);
897				ifp->if_capenable |= IFCAP_POLLING;
898				EM_UNLOCK(adapter);
899			} else {
900				error = ether_poll_deregister(ifp);
901				/* Enable interrupt even in error case */
902				EM_LOCK(adapter);
903				em_enable_intr(adapter);
904				ifp->if_capenable &= ~IFCAP_POLLING;
905				EM_UNLOCK(adapter);
906			}
907		}
908#endif
909		if (mask & IFCAP_HWCSUM) {
910			ifp->if_capenable ^= IFCAP_HWCSUM;
911			reinit = 1;
912		}
913		if (mask & IFCAP_TSO) {
914			ifp->if_capenable ^= IFCAP_TSO;
915			reinit = 1;
916		}
917		if (mask & IFCAP_VLAN_HWTAGGING) {
918			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
919			reinit = 1;
920		}
921		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
922			em_init(adapter);
923		VLAN_CAPABILITIES(ifp);
924		break;
925	    }
926	default:
927		error = ether_ioctl(ifp, command, data);
928		break;
929	}
930
931	return (error);
932}
933
934/*********************************************************************
935 *  Watchdog entry point
936 *
937 *  This routine is called whenever hardware quits transmitting.
938 *
939 **********************************************************************/
940
941static void
942em_watchdog(struct ifnet *ifp)
943{
944	struct adapter *adapter = ifp->if_softc;
945
946	EM_LOCK(adapter);
947	/* If we are in this routine because of pause frames, then
948	 * don't reset the hardware.
949	 */
950	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
951		ifp->if_timer = EM_TX_TIMEOUT;
952		EM_UNLOCK(adapter);
953		return;
954	}
955
956	/*
957	 * Reclaim first as there is a possibility of losing Tx completion
958	 * interrupts. Possible cause of missing Tx completion interrupts
959	 * comes from Tx interrupt moderation mechanism(delayed interrupts)
960	 * or chipset bug.
961	 */
962	em_txeof(adapter);
963	if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
964		EM_UNLOCK(adapter);
965		return;
966	}
967
968	if (em_check_for_link(&adapter->hw) == 0)
969		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
970
971	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
972	adapter->watchdog_events++;
973
974	em_init_locked(adapter);
975	EM_UNLOCK(adapter);
976}
977
978/*********************************************************************
979 *  Init entry point
980 *
981 *  This routine is used in two ways. It is used by the stack as
982 *  init entry point in network interface structure. It is also used
983 *  by the driver as a hw/sw initialization routine to get to a
984 *  consistent state.
985 *
986 *  return 0 on success, positive on failure
987 **********************************************************************/
988
989static void
990em_init_locked(struct adapter *adapter)
991{
992	struct ifnet	*ifp = adapter->ifp;
993	device_t	dev = adapter->dev;
994	uint32_t	pba;
995
996	INIT_DEBUGOUT("em_init: begin");
997
998	EM_LOCK_ASSERT(adapter);
999
1000	em_stop(adapter);
1001
1002	/*
1003	 * Packet Buffer Allocation (PBA)
1004	 * Writing PBA sets the receive portion of the buffer
1005	 * the remainder is used for the transmit buffer.
1006	 *
1007	 * Devices before the 82547 had a Packet Buffer of 64K.
1008	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1009	 * After the 82547 the buffer was reduced to 40K.
1010	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1011	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1012	 */
1013	switch (adapter->hw.mac_type) {
1014	case em_82547:
1015	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1016		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1017			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1018		else
1019			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1020		adapter->tx_fifo_head = 0;
1021		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1022		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1023		break;
1024	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1025	case em_82571: /* 82571: Total Packet Buffer is 48K */
1026	case em_82572: /* 82572: Total Packet Buffer is 48K */
1027			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1028		break;
1029	case em_82573: /* 82573: Total Packet Buffer is 32K */
1030		/* Jumbo frames not supported */
1031			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1032		break;
1033	case em_ich8lan:
1034		pba = E1000_PBA_8K;
1035		break;
1036	default:
1037		/* Devices before 82547 had a Packet Buffer of 64K.   */
1038		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1039			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1040		else
1041			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1042	}
1043
1044	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1045	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1046
1047	/* Get the latest mac address, User can use a LAA */
1048	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1049
1050	/* Initialize the hardware */
1051	if (em_hardware_init(adapter)) {
1052		device_printf(dev, "Unable to initialize the hardware\n");
1053		return;
1054	}
1055	em_update_link_status(adapter);
1056
1057	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1058		em_enable_vlans(adapter);
1059
1060	/* Prepare transmit descriptors and buffers */
1061	if (em_setup_transmit_structures(adapter)) {
1062		device_printf(dev, "Could not setup transmit structures\n");
1063		em_stop(adapter);
1064		return;
1065	}
1066	em_initialize_transmit_unit(adapter);
1067
1068	/* Setup Multicast table */
1069	em_set_multi(adapter);
1070
1071	/* Prepare receive descriptors and buffers */
1072	if (em_setup_receive_structures(adapter)) {
1073		device_printf(dev, "Could not setup receive structures\n");
1074		em_stop(adapter);
1075		return;
1076	}
1077	em_initialize_receive_unit(adapter);
1078
1079	/* Don't lose promiscuous settings */
1080	em_set_promisc(adapter);
1081
1082	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1083	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1084
1085	ifp->if_hwassist = 0;
1086	if (adapter->hw.mac_type >= em_82543) {
1087		if (ifp->if_capenable & IFCAP_TXCSUM)
1088			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1089		if (ifp->if_capenable & IFCAP_TSO)
1090			ifp->if_hwassist |= EM_TCPSEG_FEATURES;
1091	}
1092
1093	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1094	em_clear_hw_cntrs(&adapter->hw);
1095#ifdef DEVICE_POLLING
1096	/*
1097	 * Only enable interrupts if we are not polling, make sure
1098	 * they are off otherwise.
1099	 */
1100	if (ifp->if_capenable & IFCAP_POLLING)
1101		em_disable_intr(adapter);
1102	else
1103#endif /* DEVICE_POLLING */
1104		em_enable_intr(adapter);
1105
1106	/* Don't reset the phy next time init gets called */
1107	adapter->hw.phy_reset_disable = TRUE;
1108}
1109
1110static void
1111em_init(void *arg)
1112{
1113	struct adapter *adapter = arg;
1114
1115	EM_LOCK(adapter);
1116	em_init_locked(adapter);
1117	EM_UNLOCK(adapter);
1118}
1119
1120
1121#ifdef DEVICE_POLLING
1122/*********************************************************************
1123 *
1124 *  Legacy polling routine
1125 *
1126 *********************************************************************/
1127static void
1128em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1129{
1130	struct adapter *adapter = ifp->if_softc;
1131	uint32_t reg_icr;
1132
1133	EM_LOCK(adapter);
1134	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1135		EM_UNLOCK(adapter);
1136		return;
1137	}
1138
1139	if (cmd == POLL_AND_CHECK_STATUS) {
1140		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1141		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1142			callout_stop(&adapter->timer);
1143			adapter->hw.get_link_status = 1;
1144			em_check_for_link(&adapter->hw);
1145			em_update_link_status(adapter);
1146			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1147		}
1148	}
1149	em_rxeof(adapter, count);
1150	em_txeof(adapter);
1151
1152	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1153		em_start_locked(ifp);
1154	EM_UNLOCK(adapter);
1155}
1156
1157/*********************************************************************
1158 *
1159 *  Legacy Interrupt Service routine
1160 *
1161 *********************************************************************/
1162static void
1163em_intr(void *arg)
1164{
1165	struct adapter	*adapter = arg;
1166	struct ifnet	*ifp;
1167	uint32_t	reg_icr;
1168
1169	EM_LOCK(adapter);
1170
1171	ifp = adapter->ifp;
1172
1173	if (ifp->if_capenable & IFCAP_POLLING) {
1174		EM_UNLOCK(adapter);
1175		return;
1176	}
1177
1178	for (;;) {
1179		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1180		if (adapter->hw.mac_type >= em_82571 &&
1181		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1182			break;
1183		else if (reg_icr == 0)
1184			break;
1185
1186		/*
1187		 * XXX: some laptops trigger several spurious interrupts
1188		 * on em(4) when in the resume cycle. The ICR register
1189		 * reports all-ones value in this case. Processing such
1190		 * interrupts would lead to a freeze. I don't know why.
1191		 */
1192		if (reg_icr == 0xffffffff)
1193			break;
1194
1195		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1196			em_rxeof(adapter, -1);
1197			em_txeof(adapter);
1198		}
1199
1200		/* Link status change */
1201		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1202			callout_stop(&adapter->timer);
1203			adapter->hw.get_link_status = 1;
1204			em_check_for_link(&adapter->hw);
1205			em_update_link_status(adapter);
1206			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1207		}
1208
1209		if (reg_icr & E1000_ICR_RXO)
1210			adapter->rx_overruns++;
1211	}
1212
1213	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1214	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1215		em_start_locked(ifp);
1216
1217	EM_UNLOCK(adapter);
1218}
1219
1220#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1221
1222static void
1223em_handle_link(void *context, int pending)
1224{
1225	struct adapter	*adapter = context;
1226	struct ifnet *ifp;
1227
1228	ifp = adapter->ifp;
1229
1230	EM_LOCK(adapter);
1231
1232	callout_stop(&adapter->timer);
1233	adapter->hw.get_link_status = 1;
1234	em_check_for_link(&adapter->hw);
1235	em_update_link_status(adapter);
1236	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1237	EM_UNLOCK(adapter);
1238}
1239
1240static void
1241em_handle_rxtx(void *context, int pending)
1242{
1243	struct adapter	*adapter = context;
1244	struct ifnet	*ifp;
1245
1246	NET_LOCK_GIANT();
1247	ifp = adapter->ifp;
1248
1249	/*
1250	 * TODO:
1251	 * It should be possible to run the tx clean loop without the lock.
1252	 */
1253	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1254		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1255			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1256		EM_LOCK(adapter);
1257		em_txeof(adapter);
1258
1259		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1260			em_start_locked(ifp);
1261		EM_UNLOCK(adapter);
1262	}
1263
1264	em_enable_intr(adapter);
1265	NET_UNLOCK_GIANT();
1266}
1267
1268/*********************************************************************
1269 *
1270 *  Fast Interrupt Service routine
1271 *
1272 *********************************************************************/
1273static void
1274em_intr_fast(void *arg)
1275{
1276	struct adapter	*adapter = arg;
1277	struct ifnet	*ifp;
1278	uint32_t	reg_icr;
1279
1280	ifp = adapter->ifp;
1281
1282	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1283
1284	/* Hot eject?  */
1285	if (reg_icr == 0xffffffff)
1286		return;
1287
1288	/* Definitely not our interrupt.  */
1289	if (reg_icr == 0x0)
1290		return;
1291
1292	/*
1293	 * Starting with the 82571 chip, bit 31 should be used to
1294	 * determine whether the interrupt belongs to us.
1295	 */
1296	if (adapter->hw.mac_type >= em_82571 &&
1297	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1298		return;
1299
1300	/*
1301	 * Mask interrupts until the taskqueue is finished running.  This is
1302	 * cheap, just assume that it is needed.  This also works around the
1303	 * MSI message reordering errata on certain systems.
1304	 */
1305	em_disable_intr(adapter);
1306	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1307
1308	/* Link status change */
1309	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1310		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1311
1312	if (reg_icr & E1000_ICR_RXO)
1313		adapter->rx_overruns++;
1314}
1315#endif /* ! DEVICE_POLLING */
1316
1317/*********************************************************************
1318 *
1319 *  Media Ioctl callback
1320 *
1321 *  This routine is called whenever the user queries the status of
1322 *  the interface using ifconfig.
1323 *
1324 **********************************************************************/
1325static void
1326em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1327{
1328	struct adapter *adapter = ifp->if_softc;
1329
1330	INIT_DEBUGOUT("em_media_status: begin");
1331
1332	EM_LOCK(adapter);
1333	em_check_for_link(&adapter->hw);
1334	em_update_link_status(adapter);
1335
1336	ifmr->ifm_status = IFM_AVALID;
1337	ifmr->ifm_active = IFM_ETHER;
1338
1339	if (!adapter->link_active) {
1340		EM_UNLOCK(adapter);
1341		return;
1342	}
1343
1344	ifmr->ifm_status |= IFM_ACTIVE;
1345
1346	if ((adapter->hw.media_type == em_media_type_fiber) ||
1347	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
1348		if (adapter->hw.mac_type == em_82545)
1349			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1350		else
1351			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1352	} else {
1353		switch (adapter->link_speed) {
1354		case 10:
1355			ifmr->ifm_active |= IFM_10_T;
1356			break;
1357		case 100:
1358			ifmr->ifm_active |= IFM_100_TX;
1359			break;
1360		case 1000:
1361			ifmr->ifm_active |= IFM_1000_T;
1362			break;
1363		}
1364		if (adapter->link_duplex == FULL_DUPLEX)
1365			ifmr->ifm_active |= IFM_FDX;
1366		else
1367			ifmr->ifm_active |= IFM_HDX;
1368	}
1369	EM_UNLOCK(adapter);
1370}
1371
1372/*********************************************************************
1373 *
1374 *  Media Ioctl callback
1375 *
1376 *  This routine is called when the user changes speed/duplex using
1377 *  media/mediopt option with ifconfig.
1378 *
1379 **********************************************************************/
1380static int
1381em_media_change(struct ifnet *ifp)
1382{
1383	struct adapter *adapter = ifp->if_softc;
1384	struct ifmedia  *ifm = &adapter->media;
1385
1386	INIT_DEBUGOUT("em_media_change: begin");
1387
1388	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1389		return (EINVAL);
1390
1391	EM_LOCK(adapter);
1392	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1393	case IFM_AUTO:
1394		adapter->hw.autoneg = DO_AUTO_NEG;
1395		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1396		break;
1397	case IFM_1000_LX:
1398	case IFM_1000_SX:
1399	case IFM_1000_T:
1400		adapter->hw.autoneg = DO_AUTO_NEG;
1401		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1402		break;
1403	case IFM_100_TX:
1404		adapter->hw.autoneg = FALSE;
1405		adapter->hw.autoneg_advertised = 0;
1406		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1407			adapter->hw.forced_speed_duplex = em_100_full;
1408		else
1409			adapter->hw.forced_speed_duplex = em_100_half;
1410		break;
1411	case IFM_10_T:
1412		adapter->hw.autoneg = FALSE;
1413		adapter->hw.autoneg_advertised = 0;
1414		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1415			adapter->hw.forced_speed_duplex = em_10_full;
1416		else
1417			adapter->hw.forced_speed_duplex = em_10_half;
1418		break;
1419	default:
1420		device_printf(adapter->dev, "Unsupported media type\n");
1421	}
1422
1423	/* As the speed/duplex settings my have changed we need to
1424	 * reset the PHY.
1425	 */
1426	adapter->hw.phy_reset_disable = FALSE;
1427
1428	em_init_locked(adapter);
1429	EM_UNLOCK(adapter);
1430
1431	return (0);
1432}
1433
1434/*********************************************************************
1435 *
1436 *  This routine maps the mbufs to tx descriptors.
1437 *
1438 *  return 0 on success, positive on failure
1439 **********************************************************************/
1440static int
1441em_encap(struct adapter *adapter, struct mbuf **m_headp)
1442{
1443	struct ifnet		*ifp = adapter->ifp;
1444	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1445	bus_dmamap_t		map;
1446	struct em_buffer	*tx_buffer, *tx_buffer_last;
1447	struct em_tx_desc	*current_tx_desc;
1448	struct mbuf		*m_head;
1449	struct m_tag		*mtag;
1450	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1451	int			nsegs, i, j;
1452	int			error, do_tso, tso_desc = 0;
1453
1454	m_head = *m_headp;
1455	current_tx_desc = NULL;
1456	txd_upper = txd_lower = txd_used = txd_saved = 0;
1457
1458        do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1459
1460	/*
1461	 * Force a cleanup if number of TX descriptors
1462	 * available hits the threshold.
1463	 */
1464	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1465		em_txeof(adapter);
1466		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1467			adapter->no_tx_desc_avail1++;
1468			return (ENOBUFS);
1469		}
1470	}
1471
1472	/* Find out if we are in vlan mode. */
1473	mtag = VLAN_OUTPUT_TAG(ifp, m_head);
1474
1475	/*
1476	 * When operating in promiscuous mode, hardware encapsulation for
1477	 * packets is disabled.  This means we have to add the vlan
1478	 * encapsulation in the driver, since it will have come down from the
1479	 * VLAN layer with a tag instead of a VLAN header.
1480	 */
1481	if (mtag != NULL && adapter->em_insert_vlan_header) {
1482		struct ether_vlan_header *evl;
1483		struct ether_header eh;
1484
1485		m_head = m_pullup(m_head, sizeof(eh));
1486		if (m_head == NULL) {
1487			*m_headp = NULL;
1488			return (ENOBUFS);
1489		}
1490		eh = *mtod(m_head, struct ether_header *);
1491		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1492		if (m_head == NULL) {
1493			*m_headp = NULL;
1494			return (ENOBUFS);
1495		}
1496		m_head = m_pullup(m_head, sizeof(*evl));
1497		if (m_head == NULL) {
1498			*m_headp = NULL;
1499			return (ENOBUFS);
1500		}
1501		evl = mtod(m_head, struct ether_vlan_header *);
1502		bcopy(&eh, evl, sizeof(*evl));
1503		evl->evl_proto = evl->evl_encap_proto;
1504		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1505		evl->evl_tag = htons(VLAN_TAG_VALUE(mtag));
1506		m_tag_delete(m_head, mtag);
1507		mtag = NULL;
1508		*m_headp = m_head;
1509	}
1510
1511        /*
1512         * TSO workaround:
1513         *  If an mbuf is only header we need
1514         *     to pull 4 bytes of data into it.
1515         */
1516        if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1517                m_head = m_pullup(m_head, M_TSO_LEN + 4);
1518                if (m_head == NULL)
1519                        return (ENOBUFS);
1520        }
1521
1522	/*
1523	 * Map the packet for DMA.
1524	 */
1525	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1526	tx_buffer_last = tx_buffer;
1527	map = tx_buffer->map;
1528	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs,
1529	    &nsegs, BUS_DMA_NOWAIT);
1530	if (error == EFBIG) {
1531		struct mbuf *m;
1532
1533		m = m_defrag(*m_headp, M_DONTWAIT);
1534		if (m == NULL) {
1535			/* Assume m_defrag(9) used only m_get(9). */
1536			adapter->mbuf_alloc_failed++;
1537			m_freem(*m_headp);
1538			*m_headp = NULL;
1539			return (ENOBUFS);
1540		}
1541		*m_headp = m;
1542		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp,
1543		    segs, &nsegs, BUS_DMA_NOWAIT);
1544		if (error != 0) {
1545			adapter->no_tx_dma_setup++;
1546			m_freem(*m_headp);
1547			*m_headp = NULL;
1548			return (error);
1549		}
1550	} else if (error != 0) {
1551		adapter->no_tx_dma_setup++;
1552		return (error);
1553	}
1554	if (nsegs == 0) {
1555		m_freem(*m_headp);
1556		*m_headp = NULL;
1557		return (EIO);
1558	}
1559
1560        /*
1561         * TSO Hardware workaround, if this packet is not
1562         * TSO, and is only a single descriptor long, and
1563         * it follows a TSO burst, then we need to add a
1564         * sentinel descriptor to prevent premature writeback.
1565         */
1566        if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1567                if (nsegs == 1)
1568                        tso_desc = TRUE;
1569                adapter->tx_tso = FALSE;
1570        }
1571
1572	if (nsegs > adapter->num_tx_desc_avail - 2) {
1573		adapter->no_tx_desc_avail2++;
1574		bus_dmamap_unload(adapter->txtag, map);
1575		return (ENOBUFS);
1576	}
1577
1578        /* Do hardware assists */
1579	m_head = *m_headp;
1580        if ( ifp->if_hwassist > 0) {
1581                if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower)) {
1582                        /* we need to make a final sentinel transmit desc */
1583                        tso_desc = TRUE;
1584                } else
1585                        em_transmit_checksum_setup(adapter,  m_head,
1586                            &txd_upper, &txd_lower);
1587        }
1588
1589	i = adapter->next_avail_tx_desc;
1590	if (adapter->pcix_82544)
1591		txd_saved = i;
1592
1593	for (j = 0; j < nsegs; j++) {
1594                bus_size_t seg_len;
1595                bus_addr_t seg_addr;
1596		/* If adapter is 82544 and on PCIX bus. */
1597		if(adapter->pcix_82544) {
1598			DESC_ARRAY	desc_array;
1599			uint32_t	array_elements, counter;
1600
1601			/*
1602			 * Check the Address and Length combination and
1603			 * split the data accordingly
1604			 */
1605			array_elements = em_fill_descriptors(segs[j].ds_addr,
1606			    segs[j].ds_len, &desc_array);
1607			for (counter = 0; counter < array_elements; counter++) {
1608				if (txd_used == adapter->num_tx_desc_avail) {
1609					adapter->next_avail_tx_desc = txd_saved;
1610					adapter->no_tx_desc_avail2++;
1611					bus_dmamap_unload(adapter->txtag, map);
1612					return (ENOBUFS);
1613				}
1614				tx_buffer = &adapter->tx_buffer_area[i];
1615				current_tx_desc = &adapter->tx_desc_base[i];
1616				current_tx_desc->buffer_addr = htole64(
1617					desc_array.descriptor[counter].address);
1618				current_tx_desc->lower.data = htole32(
1619					(adapter->txd_cmd | txd_lower |
1620					(uint16_t)desc_array.descriptor[counter].length));
1621				current_tx_desc->upper.data = htole32((txd_upper));
1622				if (++i == adapter->num_tx_desc)
1623					i = 0;
1624
1625				tx_buffer->m_head = NULL;
1626				txd_used++;
1627			}
1628		} else {
1629                       tx_buffer = &adapter->tx_buffer_area[i];
1630                        current_tx_desc = &adapter->tx_desc_base[i];
1631                        seg_addr = htole64(segs[j].ds_addr);
1632                        seg_len  = segs[j].ds_len;
1633                        /*
1634                        ** TSO Workaround:
1635                        ** If this is the last descriptor, we want to
1636                        ** split it so we have a small final sentinel
1637                        */
1638                        if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1639                                seg_len -= 4;
1640                                current_tx_desc->buffer_addr = seg_addr;
1641                                current_tx_desc->lower.data = htole32(
1642                                adapter->txd_cmd | txd_lower | seg_len);
1643                                current_tx_desc->upper.data =
1644                                    htole32(txd_upper);
1645                                if (++i == adapter->num_tx_desc)
1646                                        i = 0;
1647                                /* Now make the sentinel */
1648                                ++txd_used; /* using an extra txd */
1649                                current_tx_desc = &adapter->tx_desc_base[i];
1650                                tx_buffer = &adapter->tx_buffer_area[i];
1651                                current_tx_desc->buffer_addr =
1652                                    seg_addr + seg_len;
1653                                current_tx_desc->lower.data = htole32(
1654                                adapter->txd_cmd | txd_lower | 4);
1655                                current_tx_desc->upper.data =
1656                                    htole32(txd_upper);
1657                                if (++i == adapter->num_tx_desc)
1658                                        i = 0;
1659                        } else {
1660                                current_tx_desc->buffer_addr = seg_addr;
1661                                current_tx_desc->lower.data = htole32(
1662                                adapter->txd_cmd | txd_lower | seg_len);
1663                                current_tx_desc->upper.data =
1664                                    htole32(txd_upper);
1665                                if (++i == adapter->num_tx_desc)
1666                                        i = 0;
1667                        }
1668                        tx_buffer->m_head = NULL;
1669		}
1670	}
1671
1672	adapter->next_avail_tx_desc = i;
1673	if (adapter->pcix_82544)
1674		adapter->num_tx_desc_avail -= txd_used;
1675	else {
1676		adapter->num_tx_desc_avail -= nsegs;
1677                if (tso_desc) /* TSO used an extra for sentinel */
1678                        adapter->num_tx_desc_avail -= txd_used;
1679        }
1680
1681	if (mtag != NULL) {
1682		/* Set the vlan id. */
1683		current_tx_desc->upper.fields.special =
1684		    htole16(VLAN_TAG_VALUE(mtag));
1685
1686		/* Tell hardware to add tag. */
1687		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1688	}
1689
1690	tx_buffer->m_head = m_head;
1691	tx_buffer_last->map = tx_buffer->map;
1692	tx_buffer->map = map;
1693	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1694
1695	/*
1696	 * Last Descriptor of Packet needs End Of Packet (EOP).
1697	 */
1698	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1699
1700	/*
1701	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1702	 * that this frame is available to transmit.
1703	 */
1704	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1705	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1706	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1707		em_82547_move_tail_locked(adapter);
1708	else {
1709		E1000_WRITE_REG(&adapter->hw, TDT, i);
1710		if (adapter->hw.mac_type == em_82547)
1711			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1712	}
1713
1714	return (0);
1715}
1716
1717/*********************************************************************
1718 *
1719 * 82547 workaround to avoid controller hang in half-duplex environment.
1720 * The workaround is to avoid queuing a large packet that would span
1721 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1722 * in this case. We do that only when FIFO is quiescent.
1723 *
1724 **********************************************************************/
1725static void
1726em_82547_move_tail_locked(struct adapter *adapter)
1727{
1728	uint16_t hw_tdt;
1729	uint16_t sw_tdt;
1730	struct em_tx_desc *tx_desc;
1731	uint16_t length = 0;
1732	boolean_t eop = 0;
1733
1734	EM_LOCK_ASSERT(adapter);
1735
1736	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1737	sw_tdt = adapter->next_avail_tx_desc;
1738
1739	while (hw_tdt != sw_tdt) {
1740		tx_desc = &adapter->tx_desc_base[hw_tdt];
1741		length += tx_desc->lower.flags.length;
1742		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1743		if(++hw_tdt == adapter->num_tx_desc)
1744			hw_tdt = 0;
1745
1746		if (eop) {
1747			if (em_82547_fifo_workaround(adapter, length)) {
1748				adapter->tx_fifo_wrk_cnt++;
1749				callout_reset(&adapter->tx_fifo_timer, 1,
1750					em_82547_move_tail, adapter);
1751				break;
1752			}
1753			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1754			em_82547_update_fifo_head(adapter, length);
1755			length = 0;
1756		}
1757	}
1758}
1759
1760static void
1761em_82547_move_tail(void *arg)
1762{
1763	struct adapter *adapter = arg;
1764
1765	EM_LOCK(adapter);
1766	em_82547_move_tail_locked(adapter);
1767	EM_UNLOCK(adapter);
1768}
1769
1770static int
1771em_82547_fifo_workaround(struct adapter *adapter, int len)
1772{
1773	int fifo_space, fifo_pkt_len;
1774
1775	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1776
1777	if (adapter->link_duplex == HALF_DUPLEX) {
1778		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1779
1780		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1781			if (em_82547_tx_fifo_reset(adapter))
1782				return (0);
1783			else
1784				return (1);
1785		}
1786	}
1787
1788	return (0);
1789}
1790
1791static void
1792em_82547_update_fifo_head(struct adapter *adapter, int len)
1793{
1794	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1795
1796	/* tx_fifo_head is always 16 byte aligned */
1797	adapter->tx_fifo_head += fifo_pkt_len;
1798	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1799		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1800	}
1801}
1802
1803
1804static int
1805em_82547_tx_fifo_reset(struct adapter *adapter)
1806{
1807	uint32_t tctl;
1808
1809	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1810	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1811	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1812	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1813
1814		/* Disable TX unit */
1815		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1816		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1817
1818		/* Reset FIFO pointers */
1819		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1820		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1821		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1822		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1823
1824		/* Re-enable TX unit */
1825		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1826		E1000_WRITE_FLUSH(&adapter->hw);
1827
1828		adapter->tx_fifo_head = 0;
1829		adapter->tx_fifo_reset_cnt++;
1830
1831		return (TRUE);
1832	}
1833	else {
1834		return (FALSE);
1835	}
1836}
1837
1838static void
1839em_set_promisc(struct adapter *adapter)
1840{
1841	struct ifnet	*ifp = adapter->ifp;
1842	uint32_t	reg_rctl;
1843
1844	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1845
1846	if (ifp->if_flags & IFF_PROMISC) {
1847		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1848		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1849		/* Disable VLAN stripping in promiscous mode
1850		 * This enables bridging of vlan tagged frames to occur
1851		 * and also allows vlan tags to be seen in tcpdump
1852		 */
1853		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1854			em_disable_vlans(adapter);
1855		adapter->em_insert_vlan_header = 1;
1856	} else if (ifp->if_flags & IFF_ALLMULTI) {
1857		reg_rctl |= E1000_RCTL_MPE;
1858		reg_rctl &= ~E1000_RCTL_UPE;
1859		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1860		adapter->em_insert_vlan_header = 0;
1861	} else
1862		adapter->em_insert_vlan_header = 0;
1863}
1864
1865static void
1866em_disable_promisc(struct adapter *adapter)
1867{
1868	struct ifnet	*ifp = adapter->ifp;
1869	uint32_t	reg_rctl;
1870
1871	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1872
1873	reg_rctl &=  (~E1000_RCTL_UPE);
1874	reg_rctl &=  (~E1000_RCTL_MPE);
1875	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1876
1877	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1878		em_enable_vlans(adapter);
1879	adapter->em_insert_vlan_header = 0;
1880}
1881
1882
1883/*********************************************************************
1884 *  Multicast Update
1885 *
1886 *  This routine is called whenever multicast address list is updated.
1887 *
1888 **********************************************************************/
1889
1890static void
1891em_set_multi(struct adapter *adapter)
1892{
1893	struct ifnet	*ifp = adapter->ifp;
1894	struct ifmultiaddr *ifma;
1895	uint32_t reg_rctl = 0;
1896	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1897	int mcnt = 0;
1898
1899	IOCTL_DEBUGOUT("em_set_multi: begin");
1900
1901	if (adapter->hw.mac_type == em_82542_rev2_0) {
1902		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1903		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1904			em_pci_clear_mwi(&adapter->hw);
1905		reg_rctl |= E1000_RCTL_RST;
1906		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1907		msec_delay(5);
1908	}
1909
1910	IF_ADDR_LOCK(ifp);
1911	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1912		if (ifma->ifma_addr->sa_family != AF_LINK)
1913			continue;
1914
1915		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1916			break;
1917
1918		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1919		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1920		mcnt++;
1921	}
1922	IF_ADDR_UNLOCK(ifp);
1923
1924	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1925		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1926		reg_rctl |= E1000_RCTL_MPE;
1927		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1928	} else
1929		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1930
1931	if (adapter->hw.mac_type == em_82542_rev2_0) {
1932		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1933		reg_rctl &= ~E1000_RCTL_RST;
1934		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1935		msec_delay(5);
1936		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1937			em_pci_set_mwi(&adapter->hw);
1938	}
1939}
1940
1941
1942/*********************************************************************
1943 *  Timer routine
1944 *
1945 *  This routine checks for link status and updates statistics.
1946 *
1947 **********************************************************************/
1948
1949static void
1950em_local_timer(void *arg)
1951{
1952	struct adapter	*adapter = arg;
1953	struct ifnet	*ifp = adapter->ifp;
1954
1955	EM_LOCK(adapter);
1956
1957	em_check_for_link(&adapter->hw);
1958	em_update_link_status(adapter);
1959	em_update_stats_counters(adapter);
1960	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1961		em_print_hw_stats(adapter);
1962	em_smartspeed(adapter);
1963
1964	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1965
1966	EM_UNLOCK(adapter);
1967}
1968
1969static void
1970em_update_link_status(struct adapter *adapter)
1971{
1972	struct ifnet *ifp = adapter->ifp;
1973	device_t dev = adapter->dev;
1974
1975	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1976		if (adapter->link_active == 0) {
1977			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1978			    &adapter->link_duplex);
1979			/* Check if we may set SPEED_MODE bit on PCI-E */
1980			if ((adapter->link_speed == SPEED_1000) &&
1981			    ((adapter->hw.mac_type == em_82571) ||
1982			    (adapter->hw.mac_type == em_82572))) {
1983				int tarc0;
1984
1985				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1986				tarc0 |= SPEED_MODE_BIT;
1987				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1988			}
1989			if (bootverbose)
1990				device_printf(dev, "Link is up %d Mbps %s\n",
1991				    adapter->link_speed,
1992				    ((adapter->link_duplex == FULL_DUPLEX) ?
1993				    "Full Duplex" : "Half Duplex"));
1994			adapter->link_active = 1;
1995			adapter->smartspeed = 0;
1996			ifp->if_baudrate = adapter->link_speed * 1000000;
1997			if_link_state_change(ifp, LINK_STATE_UP);
1998		}
1999	} else {
2000		if (adapter->link_active == 1) {
2001			ifp->if_baudrate = adapter->link_speed = 0;
2002			adapter->link_duplex = 0;
2003			if (bootverbose)
2004				device_printf(dev, "Link is Down\n");
2005			adapter->link_active = 0;
2006			if_link_state_change(ifp, LINK_STATE_DOWN);
2007		}
2008	}
2009}
2010
2011/*********************************************************************
2012 *
2013 *  This routine disables all traffic on the adapter by issuing a
2014 *  global reset on the MAC and deallocates TX/RX buffers.
2015 *
2016 **********************************************************************/
2017
2018static void
2019em_stop(void *arg)
2020{
2021	struct adapter	*adapter = arg;
2022	struct ifnet	*ifp = adapter->ifp;
2023
2024	EM_LOCK_ASSERT(adapter);
2025
2026	INIT_DEBUGOUT("em_stop: begin");
2027
2028	em_disable_intr(adapter);
2029	em_reset_hw(&adapter->hw);
2030	callout_stop(&adapter->timer);
2031	callout_stop(&adapter->tx_fifo_timer);
2032	em_free_transmit_structures(adapter);
2033	em_free_receive_structures(adapter);
2034
2035	/* Tell the stack that the interface is no longer active */
2036	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2037}
2038
2039
2040/********************************************************************
2041 *
2042 *  Determine hardware revision.
2043 *
2044 **********************************************************************/
2045static void
2046em_identify_hardware(struct adapter *adapter)
2047{
2048	device_t dev = adapter->dev;
2049
2050	/* Make sure our PCI config space has the necessary stuff set */
2051	pci_enable_busmaster(dev);
2052	pci_enable_io(dev, SYS_RES_MEMORY);
2053	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2054
2055	/* Save off the information about this board */
2056	adapter->hw.vendor_id = pci_get_vendor(dev);
2057	adapter->hw.device_id = pci_get_device(dev);
2058	adapter->hw.revision_id = pci_get_revid(dev);
2059	adapter->hw.subsystem_vendor_id = pci_get_subvendor(dev);
2060	adapter->hw.subsystem_id = pci_get_subdevice(dev);
2061
2062	/* Identify the MAC */
2063	if (em_set_mac_type(&adapter->hw))
2064		device_printf(dev, "Unknown MAC Type\n");
2065
2066	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
2067	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
2068		adapter->hw.phy_init_script = TRUE;
2069}
2070
2071static int
2072em_allocate_pci_resources(struct adapter *adapter)
2073{
2074	device_t	dev = adapter->dev;
2075	int		val, rid;
2076
2077	rid = PCIR_BAR(0);
2078	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2079	    &rid, RF_ACTIVE);
2080	if (adapter->res_memory == NULL) {
2081		device_printf(dev, "Unable to allocate bus resource: memory\n");
2082		return (ENXIO);
2083	}
2084	adapter->osdep.mem_bus_space_tag =
2085	rman_get_bustag(adapter->res_memory);
2086	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
2087	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2088
2089	if (adapter->hw.mac_type > em_82543) {
2090		/* Figure our where our IO BAR is ? */
2091		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2092			val = pci_read_config(dev, rid, 4);
2093			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
2094				adapter->io_rid = rid;
2095				break;
2096			}
2097			rid += 4;
2098			/* check for 64bit BAR */
2099			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
2100				rid += 4;
2101		}
2102		if (rid >= PCIR_CIS) {
2103			device_printf(dev, "Unable to locate IO BAR\n");
2104			return (ENXIO);
2105		}
2106		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
2107		    &adapter->io_rid, RF_ACTIVE);
2108		if (adapter->res_ioport == NULL) {
2109			device_printf(dev, "Unable to allocate bus resource: "
2110			    "ioport\n");
2111			return (ENXIO);
2112		}
2113		adapter->hw.io_base = 0;
2114		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2115		adapter->osdep.io_bus_space_handle =
2116		    rman_get_bushandle(adapter->res_ioport);
2117	}
2118
2119	/* For ICH8 we need to find the flash memory. */
2120	if (adapter->hw.mac_type == em_ich8lan) {
2121		rid = EM_FLASH;
2122
2123		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2124		    &rid, RF_ACTIVE);
2125		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2126		adapter->osdep.flash_bus_space_handle =
2127		    rman_get_bushandle(adapter->flash_mem);
2128	}
2129
2130	rid = 0x0;
2131	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2132	    RF_SHAREABLE | RF_ACTIVE);
2133	if (adapter->res_interrupt == NULL) {
2134		device_printf(dev, "Unable to allocate bus resource: "
2135		    "interrupt\n");
2136		return (ENXIO);
2137	}
2138
2139	adapter->hw.back = &adapter->osdep;
2140
2141	return (0);
2142}
2143
2144int
2145em_allocate_intr(struct adapter *adapter)
2146{
2147	device_t dev = adapter->dev;
2148	int error;
2149
2150	/* Manually turn off all interrupts */
2151	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2152
2153#ifdef DEVICE_POLLING
2154	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2155	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2156	    &adapter->int_handler_tag)) != 0) {
2157		device_printf(dev, "Failed to register interrupt handler");
2158		return (error);
2159	}
2160#else
2161	/*
2162	 * Try allocating a fast interrupt and the associated deferred
2163	 * processing contexts.
2164	 */
2165	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2166	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2167	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2168	    taskqueue_thread_enqueue, &adapter->tq);
2169	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2170	    device_get_nameunit(adapter->dev));
2171	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2172	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2173	    &adapter->int_handler_tag)) != 0) {
2174		device_printf(dev, "Failed to register fast interrupt "
2175			    "handler: %d\n", error);
2176		taskqueue_free(adapter->tq);
2177		adapter->tq = NULL;
2178		return (error);
2179	}
2180#endif
2181
2182	em_enable_intr(adapter);
2183	return (0);
2184}
2185
2186static void
2187em_free_intr(struct adapter *adapter)
2188{
2189	device_t dev = adapter->dev;
2190
2191	if (adapter->int_handler_tag != NULL) {
2192		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2193		adapter->int_handler_tag = NULL;
2194	}
2195	if (adapter->tq != NULL) {
2196		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2197		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2198		taskqueue_free(adapter->tq);
2199		adapter->tq = NULL;
2200	}
2201}
2202
2203static void
2204em_free_pci_resources(struct adapter *adapter)
2205{
2206	device_t dev = adapter->dev;
2207
2208	if (adapter->res_interrupt != NULL)
2209		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2210
2211	if (adapter->res_memory != NULL)
2212		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2213		    adapter->res_memory);
2214
2215	if (adapter->flash_mem != NULL)
2216		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2217		    adapter->flash_mem);
2218
2219	if (adapter->res_ioport != NULL)
2220		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2221		    adapter->res_ioport);
2222}
2223
2224/*********************************************************************
2225 *
2226 *  Initialize the hardware to a configuration as specified by the
2227 *  adapter structure. The controller is reset, the EEPROM is
2228 *  verified, the MAC address is set, then the shared initialization
2229 *  routines are called.
2230 *
2231 **********************************************************************/
2232static int
2233em_hardware_init(struct adapter *adapter)
2234{
2235	device_t dev = adapter->dev;
2236	uint16_t rx_buffer_size;
2237
2238	INIT_DEBUGOUT("em_hardware_init: begin");
2239	/* Issue a global reset */
2240	em_reset_hw(&adapter->hw);
2241
2242	/* When hardware is reset, fifo_head is also reset */
2243	adapter->tx_fifo_head = 0;
2244
2245	/* Make sure we have a good EEPROM before we read from it */
2246	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2247		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2248		return (EIO);
2249	}
2250
2251	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2252		device_printf(dev, "EEPROM read error while reading part "
2253		    "number\n");
2254		return (EIO);
2255	}
2256
2257	/* Set up smart power down as default off on newer adapters. */
2258	if (!em_smart_pwr_down &&
2259	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2260		uint16_t phy_tmp = 0;
2261
2262		/* Speed up time to link by disabling smart power down. */
2263		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2264		phy_tmp &= ~IGP02E1000_PM_SPD;
2265		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2266	}
2267
2268	/*
2269	 * These parameters control the automatic generation (Tx) and
2270	 * response (Rx) to Ethernet PAUSE frames.
2271	 * - High water mark should allow for at least two frames to be
2272	 *   received after sending an XOFF.
2273	 * - Low water mark works best when it is very near the high water mark.
2274	 *   This allows the receiver to restart by sending XON when it has
2275	 *   drained a bit. Here we use an arbitary value of 1500 which will
2276	 *   restart after one full frame is pulled from the buffer. There
2277	 *   could be several smaller frames in the buffer and if so they will
2278	 *   not trigger the XON until their total number reduces the buffer
2279	 *   by 1500.
2280	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2281	 */
2282	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2283
2284	adapter->hw.fc_high_water = rx_buffer_size -
2285	    roundup2(adapter->hw.max_frame_size, 1024);
2286	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2287	if (adapter->hw.mac_type == em_80003es2lan)
2288		adapter->hw.fc_pause_time = 0xFFFF;
2289	else
2290		adapter->hw.fc_pause_time = 0x1000;
2291	adapter->hw.fc_send_xon = TRUE;
2292	adapter->hw.fc = em_fc_full;
2293
2294	if (em_init_hw(&adapter->hw) < 0) {
2295		device_printf(dev, "Hardware Initialization Failed");
2296		return (EIO);
2297	}
2298
2299	em_check_for_link(&adapter->hw);
2300
2301	return (0);
2302}
2303
2304/*********************************************************************
2305 *
2306 *  Setup networking device structure and register an interface.
2307 *
2308 **********************************************************************/
2309static void
2310em_setup_interface(device_t dev, struct adapter *adapter)
2311{
2312	struct ifnet   *ifp;
2313	INIT_DEBUGOUT("em_setup_interface: begin");
2314
2315	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2316	if (ifp == NULL)
2317		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2318	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2319	ifp->if_mtu = ETHERMTU;
2320	ifp->if_init =  em_init;
2321	ifp->if_softc = adapter;
2322	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2323	ifp->if_ioctl = em_ioctl;
2324	ifp->if_start = em_start;
2325	ifp->if_watchdog = em_watchdog;
2326	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2327	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2328	IFQ_SET_READY(&ifp->if_snd);
2329
2330	ether_ifattach(ifp, adapter->hw.mac_addr);
2331
2332	ifp->if_capabilities = ifp->if_capenable = 0;
2333
2334	if (adapter->hw.mac_type >= em_82543) {
2335		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2336		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2337	}
2338
2339        /* Enable TSO if available */
2340        if ((adapter->hw.mac_type > em_82544) &&
2341            (adapter->hw.mac_type != em_82547)) {
2342                ifp->if_capabilities |= IFCAP_TSO;
2343                ifp->if_capenable |= IFCAP_TSO;
2344        }
2345
2346	/*
2347	 * Tell the upper layer(s) we support long frames.
2348	 */
2349	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2350	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2351	ifp->if_capenable |= IFCAP_VLAN_MTU;
2352
2353#ifdef DEVICE_POLLING
2354	ifp->if_capabilities |= IFCAP_POLLING;
2355#endif
2356
2357	/*
2358	 * Specify the media types supported by this adapter and register
2359	 * callbacks to update media and link information
2360	 */
2361	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change,
2362	    em_media_status);
2363	if ((adapter->hw.media_type == em_media_type_fiber) ||
2364	    (adapter->hw.media_type == em_media_type_internal_serdes)) {
2365		u_char fiber_type = IFM_1000_SX;	/* default type; */
2366
2367		if (adapter->hw.mac_type == em_82545)
2368			fiber_type = IFM_1000_LX;
2369		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2370		    0, NULL);
2371		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2372	} else {
2373		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2374		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2375			    0, NULL);
2376		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2377			    0, NULL);
2378		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2379			    0, NULL);
2380		if (adapter->hw.phy_type != em_phy_ife) {
2381			ifmedia_add(&adapter->media,
2382				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2383			ifmedia_add(&adapter->media,
2384				IFM_ETHER | IFM_1000_T, 0, NULL);
2385		}
2386	}
2387	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2388	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2389}
2390
2391
2392/*********************************************************************
2393 *
2394 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2395 *
2396 **********************************************************************/
2397static void
2398em_smartspeed(struct adapter *adapter)
2399{
2400	uint16_t phy_tmp;
2401
2402	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2403	    adapter->hw.autoneg == 0 ||
2404	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2405		return;
2406
2407	if (adapter->smartspeed == 0) {
2408		/* If Master/Slave config fault is asserted twice,
2409		 * we assume back-to-back */
2410		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2411		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2412			return;
2413		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2414		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2415			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2416			if(phy_tmp & CR_1000T_MS_ENABLE) {
2417				phy_tmp &= ~CR_1000T_MS_ENABLE;
2418				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2419				    phy_tmp);
2420				adapter->smartspeed++;
2421				if(adapter->hw.autoneg &&
2422				   !em_phy_setup_autoneg(&adapter->hw) &&
2423				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2424				    &phy_tmp)) {
2425					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2426						    MII_CR_RESTART_AUTO_NEG);
2427					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2428					    phy_tmp);
2429				}
2430			}
2431		}
2432		return;
2433	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2434		/* If still no link, perhaps using 2/3 pair cable */
2435		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2436		phy_tmp |= CR_1000T_MS_ENABLE;
2437		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2438		if(adapter->hw.autoneg &&
2439		   !em_phy_setup_autoneg(&adapter->hw) &&
2440		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2441			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2442				    MII_CR_RESTART_AUTO_NEG);
2443			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2444		}
2445	}
2446	/* Restart process after EM_SMARTSPEED_MAX iterations */
2447	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2448		adapter->smartspeed = 0;
2449}
2450
2451
2452/*
2453 * Manage DMA'able memory.
2454 */
2455static void
2456em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2457{
2458	if (error)
2459		return;
2460	*(bus_addr_t *) arg = segs[0].ds_addr;
2461}
2462
2463static int
2464em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2465	int mapflags)
2466{
2467	int error;
2468
2469	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2470				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2471				BUS_SPACE_MAXADDR,	/* lowaddr */
2472				BUS_SPACE_MAXADDR,	/* highaddr */
2473				NULL, NULL,		/* filter, filterarg */
2474				size,			/* maxsize */
2475				1,			/* nsegments */
2476				size,			/* maxsegsize */
2477				0,			/* flags */
2478				NULL,			/* lockfunc */
2479				NULL,			/* lockarg */
2480				&dma->dma_tag);
2481	if (error) {
2482		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2483		    __func__, error);
2484		goto fail_0;
2485	}
2486
2487	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2488	    BUS_DMA_NOWAIT, &dma->dma_map);
2489	if (error) {
2490		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2491		    __func__, (uintmax_t)size, error);
2492		goto fail_2;
2493	}
2494
2495	dma->dma_paddr = 0;
2496	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2497	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2498	if (error || dma->dma_paddr == 0) {
2499		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2500		    __func__, error);
2501		goto fail_3;
2502	}
2503
2504	return (0);
2505
2506fail_3:
2507	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2508fail_2:
2509	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2510	bus_dma_tag_destroy(dma->dma_tag);
2511fail_0:
2512	dma->dma_map = NULL;
2513	dma->dma_tag = NULL;
2514
2515	return (error);
2516}
2517
2518static void
2519em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2520{
2521	if (dma->dma_tag == NULL)
2522		return;
2523	if (dma->dma_map != NULL) {
2524		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2525		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2526		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2527		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2528		dma->dma_map = NULL;
2529	}
2530	bus_dma_tag_destroy(dma->dma_tag);
2531	dma->dma_tag = NULL;
2532}
2533
2534
2535/*********************************************************************
2536 *
2537 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2538 *  the information needed to transmit a packet on the wire.
2539 *
2540 **********************************************************************/
2541static int
2542em_allocate_transmit_structures(struct adapter *adapter)
2543{
2544	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2545	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2546	if (adapter->tx_buffer_area == NULL) {
2547		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2548		return (ENOMEM);
2549	}
2550
2551	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2552
2553	return (0);
2554}
2555
2556/*********************************************************************
2557 *
2558 *  Allocate and initialize transmit structures.
2559 *
2560 **********************************************************************/
2561static int
2562em_setup_transmit_structures(struct adapter *adapter)
2563{
2564        struct ifnet   *ifp = adapter->ifp;
2565	device_t dev = adapter->dev;
2566	struct em_buffer *tx_buffer;
2567	bus_size_t size, segsize;
2568	int error, i;
2569
2570	/*
2571	 * Setup DMA descriptor areas.
2572	 */
2573	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2574
2575        /* Overrides for TSO - want large sizes */
2576        if (ifp->if_hwassist & EM_TCPSEG_FEATURES) {
2577                size = EM_TSO_SIZE;
2578                segsize = PAGE_SIZE;
2579        }
2580
2581	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
2582				1, 0,			/* alignment, bounds */
2583				BUS_SPACE_MAXADDR,	/* lowaddr */
2584				BUS_SPACE_MAXADDR,	/* highaddr */
2585				NULL, NULL,		/* filter, filterarg */
2586				size,			/* maxsize */
2587				EM_MAX_SCATTER,		/* nsegments */
2588				segsize,		/* maxsegsize */
2589				0,			/* flags */
2590				NULL,		/* lockfunc */
2591				NULL,		/* lockarg */
2592				&adapter->txtag)) != 0) {
2593		device_printf(dev, "Unable to allocate TX DMA tag\n");
2594		goto fail;
2595	}
2596
2597	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2598		goto fail;
2599
2600	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2601	tx_buffer = adapter->tx_buffer_area;
2602	for (i = 0; i < adapter->num_tx_desc; i++) {
2603		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2604		if (error != 0) {
2605			device_printf(dev, "Unable to create TX DMA map\n");
2606			goto fail;
2607		}
2608		tx_buffer++;
2609	}
2610
2611	adapter->next_avail_tx_desc = 0;
2612	adapter->oldest_used_tx_desc = 0;
2613
2614	/* Set number of descriptors available */
2615	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2616
2617	/* Set checksum context */
2618	adapter->active_checksum_context = OFFLOAD_NONE;
2619	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2620	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2621
2622	return (0);
2623
2624fail:
2625	em_free_transmit_structures(adapter);
2626	return (error);
2627}
2628
2629/*********************************************************************
2630 *
2631 *  Enable transmit unit.
2632 *
2633 **********************************************************************/
2634static void
2635em_initialize_transmit_unit(struct adapter *adapter)
2636{
2637	uint32_t	reg_tctl, reg_tarc;
2638	uint32_t	reg_tipg = 0;
2639	uint64_t	bus_addr;
2640
2641	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2642	/* Setup the Base and Length of the Tx Descriptor Ring */
2643	bus_addr = adapter->txdma.dma_paddr;
2644	E1000_WRITE_REG(&adapter->hw, TDLEN,
2645	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2646	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2647	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2648
2649	/* Setup the HW Tx Head and Tail descriptor pointers */
2650	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2651	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2652
2653
2654	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2655	    E1000_READ_REG(&adapter->hw, TDLEN));
2656
2657	/* Set the default values for the Tx Inter Packet Gap timer */
2658	switch (adapter->hw.mac_type) {
2659	case em_82542_rev2_0:
2660	case em_82542_rev2_1:
2661		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2662		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2663		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2664		break;
2665	case em_80003es2lan:
2666		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2667		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2668		    E1000_TIPG_IPGR2_SHIFT;
2669		break;
2670	default:
2671		if ((adapter->hw.media_type == em_media_type_fiber) ||
2672		    (adapter->hw.media_type == em_media_type_internal_serdes))
2673			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2674		else
2675			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2676		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2677		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2678	}
2679
2680	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2681	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2682	if(adapter->hw.mac_type >= em_82540)
2683		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2684
2685	/* Do adapter specific tweaks before we enable the transmitter. */
2686	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2687		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2688		reg_tarc |= (1 << 25);
2689		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2690		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2691		reg_tarc |= (1 << 25);
2692		reg_tarc &= ~(1 << 28);
2693		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2694	} else if (adapter->hw.mac_type == em_80003es2lan) {
2695		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2696		reg_tarc |= 1;
2697		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2698		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2699		reg_tarc |= 1;
2700		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2701	}
2702
2703	/* Program the Transmit Control Register */
2704	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2705		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2706	if (adapter->hw.mac_type >= em_82571)
2707		reg_tctl |= E1000_TCTL_MULR;
2708	if (adapter->link_duplex == FULL_DUPLEX) {
2709		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2710	} else {
2711		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2712	}
2713	/* This write will effectively turn on the transmit unit. */
2714	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2715
2716	/* Setup Transmit Descriptor Settings for this adapter */
2717	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2718
2719	if (adapter->tx_int_delay.value > 0)
2720		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2721}
2722
2723/*********************************************************************
2724 *
2725 *  Free all transmit related data structures.
2726 *
2727 **********************************************************************/
2728static void
2729em_free_transmit_structures(struct adapter *adapter)
2730{
2731	struct em_buffer *tx_buffer;
2732	int i;
2733
2734	INIT_DEBUGOUT("free_transmit_structures: begin");
2735
2736	if (adapter->tx_buffer_area != NULL) {
2737		tx_buffer = adapter->tx_buffer_area;
2738		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2739			if (tx_buffer->m_head != NULL) {
2740				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2741				    BUS_DMASYNC_POSTWRITE);
2742				bus_dmamap_unload(adapter->txtag,
2743				    tx_buffer->map);
2744				m_freem(tx_buffer->m_head);
2745				tx_buffer->m_head = NULL;
2746			} else if (tx_buffer->map != NULL)
2747				bus_dmamap_unload(adapter->txtag,
2748				    tx_buffer->map);
2749			if (tx_buffer->map != NULL) {
2750				bus_dmamap_destroy(adapter->txtag,
2751				    tx_buffer->map);
2752				tx_buffer->map = NULL;
2753			}
2754		}
2755	}
2756	if (adapter->tx_buffer_area != NULL) {
2757		free(adapter->tx_buffer_area, M_DEVBUF);
2758		adapter->tx_buffer_area = NULL;
2759	}
2760	if (adapter->txtag != NULL) {
2761		bus_dma_tag_destroy(adapter->txtag);
2762		adapter->txtag = NULL;
2763	}
2764}
2765
2766/*********************************************************************
2767 *
2768 *  The offload context needs to be set when we transfer the first
2769 *  packet of a particular protocol (TCP/UDP). We change the
2770 *  context only if the protocol type changes.
2771 *
2772 **********************************************************************/
2773static void
2774em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2775    uint32_t *txd_upper, uint32_t *txd_lower)
2776{
2777	struct em_context_desc *TXD;
2778	struct em_buffer *tx_buffer;
2779	int curr_txd;
2780
2781	if (mp->m_pkthdr.csum_flags) {
2782
2783		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2784			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2785			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2786			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2787				return;
2788			else
2789				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2790
2791		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2792			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2793			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2794			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2795				return;
2796			else
2797				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2798		} else {
2799			*txd_upper = 0;
2800			*txd_lower = 0;
2801			return;
2802		}
2803	} else {
2804		*txd_upper = 0;
2805		*txd_lower = 0;
2806		return;
2807	}
2808
2809	/* If we reach this point, the checksum offload context
2810	 * needs to be reset.
2811	 */
2812	curr_txd = adapter->next_avail_tx_desc;
2813	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2814	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2815
2816	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2817	TXD->lower_setup.ip_fields.ipcso =
2818		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2819	TXD->lower_setup.ip_fields.ipcse =
2820		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2821
2822	TXD->upper_setup.tcp_fields.tucss =
2823		ETHER_HDR_LEN + sizeof(struct ip);
2824	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2825
2826	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2827		TXD->upper_setup.tcp_fields.tucso =
2828			ETHER_HDR_LEN + sizeof(struct ip) +
2829			offsetof(struct tcphdr, th_sum);
2830	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2831		TXD->upper_setup.tcp_fields.tucso =
2832			ETHER_HDR_LEN + sizeof(struct ip) +
2833			offsetof(struct udphdr, uh_sum);
2834	}
2835
2836	TXD->tcp_seg_setup.data = htole32(0);
2837	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2838
2839	tx_buffer->m_head = NULL;
2840
2841	if (++curr_txd == adapter->num_tx_desc)
2842		curr_txd = 0;
2843
2844	adapter->num_tx_desc_avail--;
2845	adapter->next_avail_tx_desc = curr_txd;
2846}
2847
2848/**********************************************************************
2849 *
2850 *  Setup work for hardware segmentation offload (TSO)
2851 *
2852 **********************************************************************/
2853static boolean_t
2854em_tso_setup(struct adapter *adapter,
2855             struct mbuf *mp,
2856             uint32_t *txd_upper,
2857             uint32_t *txd_lower)
2858{
2859        struct em_context_desc *TXD;
2860        struct em_buffer *tx_buffer;
2861        struct ip *ip;
2862        struct tcphdr *th;
2863        int curr_txd, hdr_len, ip_hlen, tcp_hlen;
2864
2865        if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
2866            (mp->m_pkthdr.len <= E1000_TX_BUFFER_SIZE)) {
2867                return FALSE;
2868        }
2869
2870        *txd_lower = (E1000_TXD_CMD_DEXT |
2871                      E1000_TXD_DTYP_D |
2872                      E1000_TXD_CMD_TSE);
2873
2874        *txd_upper = (E1000_TXD_POPTS_IXSM |
2875                      E1000_TXD_POPTS_TXSM) << 8;
2876
2877        curr_txd = adapter->next_avail_tx_desc;
2878        tx_buffer = &adapter->tx_buffer_area[curr_txd];
2879        TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2880
2881        mp->m_data += sizeof(struct ether_header);
2882        ip = mtod(mp, struct ip *);
2883        ip->ip_len = 0;
2884        ip->ip_sum = 0;
2885        ip_hlen = ip->ip_hl << 2 ;
2886        th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2887        tcp_hlen = th->th_off << 2;
2888
2889        hdr_len = ETHER_HDR_LEN + ip_hlen + tcp_hlen;
2890	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2891	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2892
2893        mp->m_data -= sizeof(struct ether_header);
2894        TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2895        TXD->lower_setup.ip_fields.ipcso =
2896                ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2897        TXD->lower_setup.ip_fields.ipcse =
2898                htole16(ETHER_HDR_LEN + ip_hlen - 1);
2899
2900        TXD->upper_setup.tcp_fields.tucss =
2901                ETHER_HDR_LEN + ip_hlen;
2902        TXD->upper_setup.tcp_fields.tucse = 0;
2903        TXD->upper_setup.tcp_fields.tucso =
2904                ETHER_HDR_LEN + ip_hlen +
2905                offsetof(struct tcphdr, th_sum);
2906        TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
2907        TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
2908        TXD->cmd_and_length = htole32(adapter->txd_cmd |
2909                                E1000_TXD_CMD_DEXT |
2910                                E1000_TXD_CMD_TSE |
2911                                E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP |
2912                                (mp->m_pkthdr.len - (hdr_len)));
2913
2914        tx_buffer->m_head = NULL;
2915
2916        if (++curr_txd == adapter->num_tx_desc)
2917                curr_txd = 0;
2918
2919        adapter->num_tx_desc_avail--;
2920        adapter->next_avail_tx_desc = curr_txd;
2921        adapter->tx_tso = TRUE;
2922
2923        return TRUE;
2924}
2925
2926/**********************************************************************
2927 *
2928 *  Examine each tx_buffer in the used queue. If the hardware is done
2929 *  processing the packet then free associated resources. The
2930 *  tx_buffer is put back on the free queue.
2931 *
2932 **********************************************************************/
2933static void
2934em_txeof(struct adapter *adapter)
2935{
2936	int i, num_avail;
2937	struct em_buffer *tx_buffer;
2938	struct em_tx_desc   *tx_desc;
2939	struct ifnet   *ifp = adapter->ifp;
2940
2941	EM_LOCK_ASSERT(adapter);
2942
2943	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2944		return;
2945
2946	num_avail = adapter->num_tx_desc_avail;
2947	i = adapter->oldest_used_tx_desc;
2948
2949	tx_buffer = &adapter->tx_buffer_area[i];
2950	tx_desc = &adapter->tx_desc_base[i];
2951
2952	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2953	    BUS_DMASYNC_POSTREAD);
2954	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2955
2956		tx_desc->upper.data = 0;
2957		num_avail++;
2958
2959		if (tx_buffer->m_head) {
2960			ifp->if_opackets++;
2961			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2962			    BUS_DMASYNC_POSTWRITE);
2963			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2964
2965			m_freem(tx_buffer->m_head);
2966			tx_buffer->m_head = NULL;
2967		}
2968
2969		if (++i == adapter->num_tx_desc)
2970			i = 0;
2971
2972		tx_buffer = &adapter->tx_buffer_area[i];
2973		tx_desc = &adapter->tx_desc_base[i];
2974	}
2975	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2976	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2977
2978	adapter->oldest_used_tx_desc = i;
2979
2980	/*
2981	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2982	 * that it is OK to send packets.
2983	 * If there are no pending descriptors, clear the timeout. Otherwise,
2984	 * if some descriptors have been freed, restart the timeout.
2985	 */
2986	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2987		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2988		if (num_avail == adapter->num_tx_desc)
2989			ifp->if_timer = 0;
2990		else if (num_avail != adapter->num_tx_desc_avail)
2991			ifp->if_timer = EM_TX_TIMEOUT;
2992	}
2993	adapter->num_tx_desc_avail = num_avail;
2994}
2995
2996/*********************************************************************
2997 *
2998 *  Get a buffer from system mbuf buffer pool.
2999 *
3000 **********************************************************************/
3001static int
3002em_get_buf(struct adapter *adapter, int i)
3003{
3004	struct mbuf		*m;
3005	bus_dma_segment_t	segs[1];
3006	bus_dmamap_t		map;
3007	struct em_buffer	*rx_buffer;
3008	int			error, nsegs;
3009
3010	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3011	if (m == NULL) {
3012		adapter->mbuf_cluster_failed++;
3013		return (ENOBUFS);
3014	}
3015	m->m_len = m->m_pkthdr.len = MCLBYTES;
3016	if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3017		m_adj(m, ETHER_ALIGN);
3018
3019	/*
3020	 * Using memory from the mbuf cluster pool, invoke the
3021	 * bus_dma machinery to arrange the memory mapping.
3022	 */
3023	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap,
3024	    m, segs, &nsegs, BUS_DMA_NOWAIT);
3025	if (error != 0) {
3026		m_free(m);
3027		return (error);
3028	}
3029	/* If nsegs is wrong then the stack is corrupt. */
3030	KASSERT(nsegs == 1, ("Too many segments returned!"));
3031
3032	rx_buffer = &adapter->rx_buffer_area[i];
3033	if (rx_buffer->m_head != NULL)
3034		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3035
3036	map = rx_buffer->map;
3037	rx_buffer->map = adapter->rx_sparemap;
3038	adapter->rx_sparemap = map;
3039	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3040	rx_buffer->m_head = m;
3041
3042	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3043
3044	return (0);
3045}
3046
3047/*********************************************************************
3048 *
3049 *  Allocate memory for rx_buffer structures. Since we use one
3050 *  rx_buffer per received packet, the maximum number of rx_buffer's
3051 *  that we'll need is equal to the number of receive descriptors
3052 *  that we've allocated.
3053 *
3054 **********************************************************************/
3055static int
3056em_allocate_receive_structures(struct adapter *adapter)
3057{
3058	device_t dev = adapter->dev;
3059	struct em_buffer *rx_buffer;
3060	int i, error;
3061
3062	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
3063	    M_DEVBUF, M_NOWAIT);
3064	if (adapter->rx_buffer_area == NULL) {
3065		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3066		return (ENOMEM);
3067	}
3068
3069	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
3070
3071	error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3072				1, 0,			/* alignment, bounds */
3073				BUS_SPACE_MAXADDR,	/* lowaddr */
3074				BUS_SPACE_MAXADDR,	/* highaddr */
3075				NULL, NULL,		/* filter, filterarg */
3076				MCLBYTES,		/* maxsize */
3077				1,			/* nsegments */
3078				MCLBYTES,		/* maxsegsize */
3079				0,			/* flags */
3080				NULL,			/* lockfunc */
3081				NULL,			/* lockarg */
3082				&adapter->rxtag);
3083	if (error) {
3084		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3085		    __func__, error);
3086		goto fail;
3087	}
3088
3089	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3090	    &adapter->rx_sparemap);
3091	if (error) {
3092		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3093		    __func__, error);
3094		goto fail;
3095	}
3096	rx_buffer = adapter->rx_buffer_area;
3097	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3098		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3099		    &rx_buffer->map);
3100		if (error) {
3101			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3102			    __func__, error);
3103			goto fail;
3104		}
3105	}
3106
3107	for (i = 0; i < adapter->num_rx_desc; i++) {
3108		error = em_get_buf(adapter, i);
3109		if (error)
3110			goto fail;
3111	}
3112	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3113	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3114
3115	return (0);
3116
3117fail:
3118	em_free_receive_structures(adapter);
3119	return (error);
3120}
3121
3122/*********************************************************************
3123 *
3124 *  Allocate and initialize receive structures.
3125 *
3126 **********************************************************************/
3127static int
3128em_setup_receive_structures(struct adapter *adapter)
3129{
3130	int error;
3131
3132	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
3133
3134	if ((error = em_allocate_receive_structures(adapter)) != 0)
3135		return (error);
3136
3137	/* Setup our descriptor pointers */
3138	adapter->next_rx_desc_to_check = 0;
3139
3140	return (0);
3141}
3142
3143/*********************************************************************
3144 *
3145 *  Enable receive unit.
3146 *
3147 **********************************************************************/
3148static void
3149em_initialize_receive_unit(struct adapter *adapter)
3150{
3151	struct ifnet	*ifp = adapter->ifp;
3152	uint64_t	bus_addr;
3153	uint32_t	reg_rctl;
3154	uint32_t	reg_rxcsum;
3155
3156	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3157
3158	/*
3159	 * Make sure receives are disabled while setting
3160	 * up the descriptor ring
3161	 */
3162	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
3163
3164	/* Set the Receive Delay Timer Register */
3165	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
3166
3167	if(adapter->hw.mac_type >= em_82540) {
3168		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
3169
3170		/*
3171		 * Set the interrupt throttling rate. Value is calculated
3172		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3173		 */
3174#define MAX_INTS_PER_SEC	8000
3175#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3176		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
3177	}
3178
3179	/* Setup the Base and Length of the Rx Descriptor Ring */
3180	bus_addr = adapter->rxdma.dma_paddr;
3181	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
3182			sizeof(struct em_rx_desc));
3183	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
3184	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
3185
3186	/* Setup the HW Rx Head and Tail Descriptor Pointers */
3187	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
3188	E1000_WRITE_REG(&adapter->hw, RDH, 0);
3189
3190	/* Setup the Receive Control Register */
3191	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3192		   E1000_RCTL_RDMTS_HALF |
3193		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
3194
3195	if (adapter->hw.tbi_compatibility_on == TRUE)
3196		reg_rctl |= E1000_RCTL_SBP;
3197
3198
3199	switch (adapter->rx_buffer_len) {
3200	default:
3201	case EM_RXBUFFER_2048:
3202		reg_rctl |= E1000_RCTL_SZ_2048;
3203		break;
3204	case EM_RXBUFFER_4096:
3205		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3206		break;
3207	case EM_RXBUFFER_8192:
3208		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3209		break;
3210	case EM_RXBUFFER_16384:
3211		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
3212		break;
3213	}
3214
3215	if (ifp->if_mtu > ETHERMTU)
3216		reg_rctl |= E1000_RCTL_LPE;
3217
3218	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3219	if ((adapter->hw.mac_type >= em_82543) &&
3220	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3221		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3222		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3223		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3224	}
3225
3226	/* Enable Receives */
3227	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3228}
3229
3230/*********************************************************************
3231 *
3232 *  Free receive related data structures.
3233 *
3234 **********************************************************************/
3235static void
3236em_free_receive_structures(struct adapter *adapter)
3237{
3238	struct em_buffer *rx_buffer;
3239	int i;
3240
3241	INIT_DEBUGOUT("free_receive_structures: begin");
3242
3243	if (adapter->rx_sparemap) {
3244		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3245		adapter->rx_sparemap = NULL;
3246	}
3247	if (adapter->rx_buffer_area != NULL) {
3248		rx_buffer = adapter->rx_buffer_area;
3249		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3250			if (rx_buffer->m_head != NULL) {
3251				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3252				    BUS_DMASYNC_POSTREAD);
3253				bus_dmamap_unload(adapter->rxtag,
3254				    rx_buffer->map);
3255				m_freem(rx_buffer->m_head);
3256				rx_buffer->m_head = NULL;
3257			} else if (rx_buffer->map != NULL)
3258				bus_dmamap_unload(adapter->rxtag,
3259				    rx_buffer->map);
3260			if (rx_buffer->map != NULL) {
3261				bus_dmamap_destroy(adapter->rxtag,
3262				    rx_buffer->map);
3263				rx_buffer->map = NULL;
3264			}
3265		}
3266	}
3267	if (adapter->rx_buffer_area != NULL) {
3268		free(adapter->rx_buffer_area, M_DEVBUF);
3269		adapter->rx_buffer_area = NULL;
3270	}
3271	if (adapter->rxtag != NULL) {
3272		bus_dma_tag_destroy(adapter->rxtag);
3273		adapter->rxtag = NULL;
3274	}
3275}
3276
3277/*********************************************************************
3278 *
3279 *  This routine executes in interrupt context. It replenishes
3280 *  the mbufs in the descriptor and sends data which has been
3281 *  dma'ed into host memory to upper layer.
3282 *
3283 *  We loop at most count times if count is > 0, or until done if
3284 *  count < 0.
3285 *
3286 *********************************************************************/
3287static int
3288em_rxeof(struct adapter *adapter, int count)
3289{
3290	struct ifnet	*ifp;
3291	struct mbuf	*mp;
3292	uint8_t		accept_frame = 0;
3293	uint8_t		eop = 0;
3294	uint16_t 	len, desc_len, prev_len_adj;
3295	int		i;
3296
3297	/* Pointer to the receive descriptor being examined. */
3298	struct em_rx_desc   *current_desc;
3299	uint8_t		status;
3300
3301	ifp = adapter->ifp;
3302	i = adapter->next_rx_desc_to_check;
3303	current_desc = &adapter->rx_desc_base[i];
3304	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3305	    BUS_DMASYNC_POSTREAD);
3306
3307	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3308		return (0);
3309
3310	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3311	    (count != 0) &&
3312	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3313		struct mbuf *m = NULL;
3314
3315		mp = adapter->rx_buffer_area[i].m_head;
3316		/*
3317		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3318		 * needs to access the last received byte in the mbuf.
3319		 */
3320		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3321		    BUS_DMASYNC_POSTREAD);
3322
3323		accept_frame = 1;
3324		prev_len_adj = 0;
3325		desc_len = le16toh(current_desc->length);
3326		status = current_desc->status;
3327		if (status & E1000_RXD_STAT_EOP) {
3328			count--;
3329			eop = 1;
3330			if (desc_len < ETHER_CRC_LEN) {
3331				len = 0;
3332				prev_len_adj = ETHER_CRC_LEN - desc_len;
3333			} else
3334				len = desc_len - ETHER_CRC_LEN;
3335		} else {
3336			eop = 0;
3337			len = desc_len;
3338		}
3339
3340		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3341			uint8_t		last_byte;
3342			uint32_t	pkt_len = desc_len;
3343
3344			if (adapter->fmp != NULL)
3345				pkt_len += adapter->fmp->m_pkthdr.len;
3346
3347			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3348			if (TBI_ACCEPT(&adapter->hw, status,
3349			    current_desc->errors, pkt_len, last_byte)) {
3350				em_tbi_adjust_stats(&adapter->hw,
3351				    &adapter->stats, pkt_len,
3352				    adapter->hw.mac_addr);
3353				if (len > 0)
3354					len--;
3355			} else
3356				accept_frame = 0;
3357		}
3358
3359		if (accept_frame) {
3360			if (em_get_buf(adapter, i) != 0) {
3361				ifp->if_iqdrops++;
3362				goto discard;
3363			}
3364
3365			/* Assign correct length to the current fragment */
3366			mp->m_len = len;
3367
3368			if (adapter->fmp == NULL) {
3369				mp->m_pkthdr.len = len;
3370				adapter->fmp = mp; /* Store the first mbuf */
3371				adapter->lmp = mp;
3372			} else {
3373				/* Chain mbuf's together */
3374				mp->m_flags &= ~M_PKTHDR;
3375				/*
3376				 * Adjust length of previous mbuf in chain if
3377				 * we received less than 4 bytes in the last
3378				 * descriptor.
3379				 */
3380				if (prev_len_adj > 0) {
3381					adapter->lmp->m_len -= prev_len_adj;
3382					adapter->fmp->m_pkthdr.len -=
3383					    prev_len_adj;
3384				}
3385				adapter->lmp->m_next = mp;
3386				adapter->lmp = adapter->lmp->m_next;
3387				adapter->fmp->m_pkthdr.len += len;
3388			}
3389
3390			if (eop) {
3391				adapter->fmp->m_pkthdr.rcvif = ifp;
3392				ifp->if_ipackets++;
3393				em_receive_checksum(adapter, current_desc,
3394				    adapter->fmp);
3395#ifndef __NO_STRICT_ALIGNMENT
3396				if (adapter->hw.max_frame_size >
3397				    (MCLBYTES - ETHER_ALIGN) &&
3398				    em_fixup_rx(adapter) != 0)
3399					goto skip;
3400#endif
3401				if (status & E1000_RXD_STAT_VP)
3402					VLAN_INPUT_TAG(ifp, adapter->fmp,
3403					    (le16toh(current_desc->special) &
3404					    E1000_RXD_SPC_VLAN_MASK));
3405#ifndef __NO_STRICT_ALIGNMENT
3406skip:
3407#endif
3408				m = adapter->fmp;
3409				adapter->fmp = NULL;
3410				adapter->lmp = NULL;
3411			}
3412		} else {
3413			ifp->if_ierrors++;
3414discard:
3415			/* Reuse loaded DMA map and just update mbuf chain */
3416			mp = adapter->rx_buffer_area[i].m_head;
3417			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3418			mp->m_data = mp->m_ext.ext_buf;
3419			mp->m_next = NULL;
3420			if (adapter->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3421				m_adj(mp, ETHER_ALIGN);
3422			if (adapter->fmp != NULL) {
3423				m_freem(adapter->fmp);
3424				adapter->fmp = NULL;
3425				adapter->lmp = NULL;
3426			}
3427			m = NULL;
3428		}
3429
3430		/* Zero out the receive descriptors status. */
3431		current_desc->status = 0;
3432		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3433		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3434
3435		/* Advance our pointers to the next descriptor. */
3436		if (++i == adapter->num_rx_desc)
3437			i = 0;
3438		if (m != NULL) {
3439			adapter->next_rx_desc_to_check = i;
3440#ifdef DEVICE_POLLING
3441			EM_UNLOCK(adapter);
3442			(*ifp->if_input)(ifp, m);
3443			EM_LOCK(adapter);
3444#else
3445			(*ifp->if_input)(ifp, m);
3446#endif
3447			i = adapter->next_rx_desc_to_check;
3448		}
3449		current_desc = &adapter->rx_desc_base[i];
3450	}
3451	adapter->next_rx_desc_to_check = i;
3452
3453	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3454	if (--i < 0)
3455		i = adapter->num_rx_desc - 1;
3456	E1000_WRITE_REG(&adapter->hw, RDT, i);
3457	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3458		return (0);
3459
3460	return (1);
3461}
3462
3463#ifndef __NO_STRICT_ALIGNMENT
3464/*
3465 * When jumbo frames are enabled we should realign entire payload on
3466 * architecures with strict alignment. This is serious design mistake of 8254x
3467 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3468 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3469 * payload. On architecures without strict alignment restrictions 8254x still
3470 * performs unaligned memory access which would reduce the performance too.
3471 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3472 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3473 * existing mbuf chain.
3474 *
3475 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3476 * not used at all on architectures with strict alignment.
3477 */
3478static int
3479em_fixup_rx(struct adapter *adapter)
3480{
3481	struct mbuf *m, *n;
3482	int error;
3483
3484	error = 0;
3485	m = adapter->fmp;
3486	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3487		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3488		m->m_data += ETHER_HDR_LEN;
3489	} else {
3490		MGETHDR(n, M_DONTWAIT, MT_DATA);
3491		if (n != NULL) {
3492			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3493			m->m_data += ETHER_HDR_LEN;
3494			m->m_len -= ETHER_HDR_LEN;
3495			n->m_len = ETHER_HDR_LEN;
3496			M_MOVE_PKTHDR(n, m);
3497			n->m_next = m;
3498			adapter->fmp = n;
3499		} else {
3500			adapter->ifp->if_iqdrops++;
3501			adapter->mbuf_alloc_failed++;
3502			m_freem(adapter->fmp);
3503			adapter->fmp = NULL;
3504			adapter->lmp = NULL;
3505			error = ENOBUFS;
3506		}
3507	}
3508
3509	return (error);
3510}
3511#endif
3512
3513/*********************************************************************
3514 *
3515 *  Verify that the hardware indicated that the checksum is valid.
3516 *  Inform the stack about the status of checksum so that stack
3517 *  doesn't spend time verifying the checksum.
3518 *
3519 *********************************************************************/
3520static void
3521em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3522		    struct mbuf *mp)
3523{
3524	/* 82543 or newer only */
3525	if ((adapter->hw.mac_type < em_82543) ||
3526	    /* Ignore Checksum bit is set */
3527	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3528		mp->m_pkthdr.csum_flags = 0;
3529		return;
3530	}
3531
3532	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3533		/* Did it pass? */
3534		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3535			/* IP Checksum Good */
3536			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3537			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3538
3539		} else {
3540			mp->m_pkthdr.csum_flags = 0;
3541		}
3542	}
3543
3544	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3545		/* Did it pass? */
3546		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3547			mp->m_pkthdr.csum_flags |=
3548			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3549			mp->m_pkthdr.csum_data = htons(0xffff);
3550		}
3551	}
3552}
3553
3554
3555static void
3556em_enable_vlans(struct adapter *adapter)
3557{
3558	uint32_t ctrl;
3559
3560	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3561
3562	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3563	ctrl |= E1000_CTRL_VME;
3564	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3565}
3566
3567static void
3568em_disable_vlans(struct adapter *adapter)
3569{
3570	uint32_t ctrl;
3571
3572	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3573	ctrl &= ~E1000_CTRL_VME;
3574	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3575}
3576
3577static void
3578em_enable_intr(struct adapter *adapter)
3579{
3580	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3581}
3582
3583static void
3584em_disable_intr(struct adapter *adapter)
3585{
3586	/*
3587	 * The first version of 82542 had an errata where when link was forced
3588	 * it would stay up even up even if the cable was disconnected.
3589	 * Sequence errors were used to detect the disconnect and then the
3590	 * driver would unforce the link. This code in the in the ISR. For this
3591	 * to work correctly the Sequence error interrupt had to be enabled
3592	 * all the time.
3593	 */
3594
3595	if (adapter->hw.mac_type == em_82542_rev2_0)
3596	    E1000_WRITE_REG(&adapter->hw, IMC,
3597		(0xffffffff & ~E1000_IMC_RXSEQ));
3598	else
3599	    E1000_WRITE_REG(&adapter->hw, IMC,
3600		0xffffffff);
3601}
3602
3603static int
3604em_is_valid_ether_addr(uint8_t *addr)
3605{
3606	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3607
3608	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3609		return (FALSE);
3610	}
3611
3612	return (TRUE);
3613}
3614
3615void
3616em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3617{
3618	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3619}
3620
3621void
3622em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3623{
3624	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3625}
3626
3627void
3628em_pci_set_mwi(struct em_hw *hw)
3629{
3630	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3631	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3632}
3633
3634void
3635em_pci_clear_mwi(struct em_hw *hw)
3636{
3637	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3638	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3639}
3640
3641/*********************************************************************
3642* 82544 Coexistence issue workaround.
3643*    There are 2 issues.
3644*       1. Transmit Hang issue.
3645*    To detect this issue, following equation can be used...
3646*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3647*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3648*
3649*       2. DAC issue.
3650*    To detect this issue, following equation can be used...
3651*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3652*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3653*
3654*
3655*    WORKAROUND:
3656*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3657*
3658*** *********************************************************************/
3659static uint32_t
3660em_fill_descriptors (bus_addr_t address, uint32_t length,
3661		PDESC_ARRAY desc_array)
3662{
3663	/* Since issue is sensitive to length and address.*/
3664	/* Let us first check the address...*/
3665	uint32_t safe_terminator;
3666	if (length <= 4) {
3667		desc_array->descriptor[0].address = address;
3668		desc_array->descriptor[0].length = length;
3669		desc_array->elements = 1;
3670		return (desc_array->elements);
3671	}
3672	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3673	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3674	if (safe_terminator == 0   ||
3675	(safe_terminator > 4   &&
3676	safe_terminator < 9)   ||
3677	(safe_terminator > 0xC &&
3678	safe_terminator <= 0xF)) {
3679		desc_array->descriptor[0].address = address;
3680		desc_array->descriptor[0].length = length;
3681		desc_array->elements = 1;
3682		return (desc_array->elements);
3683	}
3684
3685	desc_array->descriptor[0].address = address;
3686	desc_array->descriptor[0].length = length - 4;
3687	desc_array->descriptor[1].address = address + (length - 4);
3688	desc_array->descriptor[1].length = 4;
3689	desc_array->elements = 2;
3690	return (desc_array->elements);
3691}
3692
3693/**********************************************************************
3694 *
3695 *  Update the board statistics counters.
3696 *
3697 **********************************************************************/
3698static void
3699em_update_stats_counters(struct adapter *adapter)
3700{
3701	struct ifnet   *ifp;
3702
3703	if(adapter->hw.media_type == em_media_type_copper ||
3704	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3705		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3706		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3707	}
3708	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3709	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3710	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3711	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3712
3713	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3714	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3715	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3716	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3717	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3718	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3719	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3720	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3721	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3722	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3723	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3724	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3725	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3726	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3727	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3728	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3729	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3730	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3731	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3732	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3733
3734	/* For the 64-bit byte counters the low dword must be read first. */
3735	/* Both registers clear on the read of the high dword */
3736
3737	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3738	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3739	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3740	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3741
3742	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3743	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3744	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3745	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3746	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3747
3748	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3749	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3750	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3751	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3752
3753	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3754	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3755	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3756	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3757	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3758	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3759	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3760	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3761	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3762	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3763
3764	if (adapter->hw.mac_type >= em_82543) {
3765		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3766		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3767		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3768		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3769		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3770		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3771	}
3772	ifp = adapter->ifp;
3773
3774	ifp->if_collisions = adapter->stats.colc;
3775
3776	/* Rx Errors */
3777	ifp->if_ierrors = adapter->stats.rxerrc + adapter->stats.crcerrs +
3778	    adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc +
3779	    adapter->stats.mpc + adapter->stats.cexterr;
3780
3781	/* Tx Errors */
3782	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3783	    adapter->watchdog_events;
3784}
3785
3786
3787/**********************************************************************
3788 *
3789 *  This routine is called only when em_display_debug_stats is enabled.
3790 *  This routine provides a way to take a look at important statistics
3791 *  maintained by the driver and hardware.
3792 *
3793 **********************************************************************/
3794static void
3795em_print_debug_info(struct adapter *adapter)
3796{
3797	device_t dev = adapter->dev;
3798	uint8_t *hw_addr = adapter->hw.hw_addr;
3799
3800	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3801	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3802	    E1000_READ_REG(&adapter->hw, CTRL),
3803	    E1000_READ_REG(&adapter->hw, RCTL));
3804	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3805	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3806	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3807	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3808	    adapter->hw.fc_high_water,
3809	    adapter->hw.fc_low_water);
3810	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3811	    E1000_READ_REG(&adapter->hw, TIDV),
3812	    E1000_READ_REG(&adapter->hw, TADV));
3813	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3814	    E1000_READ_REG(&adapter->hw, RDTR),
3815	    E1000_READ_REG(&adapter->hw, RADV));
3816	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3817	    (long long)adapter->tx_fifo_wrk_cnt,
3818	    (long long)adapter->tx_fifo_reset_cnt);
3819	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3820	    E1000_READ_REG(&adapter->hw, TDH),
3821	    E1000_READ_REG(&adapter->hw, TDT));
3822	device_printf(dev, "Num Tx descriptors avail = %d\n",
3823	    adapter->num_tx_desc_avail);
3824	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3825	    adapter->no_tx_desc_avail1);
3826	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3827	    adapter->no_tx_desc_avail2);
3828	device_printf(dev, "Std mbuf failed = %ld\n",
3829	    adapter->mbuf_alloc_failed);
3830	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3831	    adapter->mbuf_cluster_failed);
3832}
3833
3834static void
3835em_print_hw_stats(struct adapter *adapter)
3836{
3837	device_t dev = adapter->dev;
3838
3839	device_printf(dev, "Excessive collisions = %lld\n",
3840	    (long long)adapter->stats.ecol);
3841	device_printf(dev, "Symbol errors = %lld\n",
3842	    (long long)adapter->stats.symerrs);
3843	device_printf(dev, "Sequence errors = %lld\n",
3844	    (long long)adapter->stats.sec);
3845	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3846
3847	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3848	device_printf(dev, "Receive No Buffers = %lld\n",
3849	    (long long)adapter->stats.rnbc);
3850	/* RLEC is inaccurate on some hardware, calculate our own. */
3851	device_printf(dev, "Receive Length Errors = %lld\n",
3852	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3853	device_printf(dev, "Receive errors = %lld\n",
3854	    (long long)adapter->stats.rxerrc);
3855	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3856	device_printf(dev, "Alignment errors = %lld\n",
3857	    (long long)adapter->stats.algnerrc);
3858	device_printf(dev, "Carrier extension errors = %lld\n",
3859	    (long long)adapter->stats.cexterr);
3860	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3861	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3862
3863	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3864	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3865	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3866	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3867
3868	device_printf(dev, "Good Packets Rcvd = %lld\n",
3869	    (long long)adapter->stats.gprc);
3870	device_printf(dev, "Good Packets Xmtd = %lld\n",
3871	    (long long)adapter->stats.gptc);
3872        device_printf(dev, "TSO Contexts Xmtd = %lld\n",
3873            (long long)adapter->stats.tsctc);
3874        device_printf(dev, "TSO Contexts Failed = %lld\n",
3875            (long long)adapter->stats.tsctfc);
3876}
3877
3878static int
3879em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3880{
3881	struct adapter *adapter;
3882	int error;
3883	int result;
3884
3885	result = -1;
3886	error = sysctl_handle_int(oidp, &result, 0, req);
3887
3888	if (error || !req->newptr)
3889		return (error);
3890
3891	if (result == 1) {
3892		adapter = (struct adapter *)arg1;
3893		em_print_debug_info(adapter);
3894	}
3895
3896	return (error);
3897}
3898
3899
3900static int
3901em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3902{
3903	struct adapter *adapter;
3904	int error;
3905	int result;
3906
3907	result = -1;
3908	error = sysctl_handle_int(oidp, &result, 0, req);
3909
3910	if (error || !req->newptr)
3911		return (error);
3912
3913	if (result == 1) {
3914		adapter = (struct adapter *)arg1;
3915		em_print_hw_stats(adapter);
3916	}
3917
3918	return (error);
3919}
3920
3921static int
3922em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3923{
3924	struct em_int_delay_info *info;
3925	struct adapter *adapter;
3926	uint32_t regval;
3927	int error;
3928	int usecs;
3929	int ticks;
3930
3931	info = (struct em_int_delay_info *)arg1;
3932	usecs = info->value;
3933	error = sysctl_handle_int(oidp, &usecs, 0, req);
3934	if (error != 0 || req->newptr == NULL)
3935		return (error);
3936	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3937		return (EINVAL);
3938	info->value = usecs;
3939	ticks = E1000_USECS_TO_TICKS(usecs);
3940
3941	adapter = info->adapter;
3942
3943	EM_LOCK(adapter);
3944	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3945	regval = (regval & ~0xffff) | (ticks & 0xffff);
3946	/* Handle a few special cases. */
3947	switch (info->offset) {
3948	case E1000_RDTR:
3949	case E1000_82542_RDTR:
3950		regval |= E1000_RDT_FPDB;
3951		break;
3952	case E1000_TIDV:
3953	case E1000_82542_TIDV:
3954		if (ticks == 0) {
3955			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3956			/* Don't write 0 into the TIDV register. */
3957			regval++;
3958		} else
3959			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3960		break;
3961	}
3962	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3963	EM_UNLOCK(adapter);
3964	return (0);
3965}
3966
3967static void
3968em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3969	const char *description, struct em_int_delay_info *info,
3970	int offset, int value)
3971{
3972	info->adapter = adapter;
3973	info->offset = offset;
3974	info->value = value;
3975	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3976	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3977	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3978	    info, 0, em_sysctl_int_delay, "I", description);
3979}
3980
3981#ifndef DEVICE_POLLING
3982static void
3983em_add_int_process_limit(struct adapter *adapter, const char *name,
3984	const char *description, int *limit, int value)
3985{
3986	*limit = value;
3987	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3988	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3989	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3990}
3991#endif
3992