if_em.c revision 160956
1185029Spjd/**************************************************************************
2185029Spjd
3185029SpjdCopyright (c) 2001-2006, Intel Corporation
4185029SpjdAll rights reserved.
5185029Spjd
6185029SpjdRedistribution and use in source and binary forms, with or without
7185029Spjdmodification, are permitted provided that the following conditions are met:
8185029Spjd
9185029Spjd 1. Redistributions of source code must retain the above copyright notice,
10185029Spjd    this list of conditions and the following disclaimer.
11185029Spjd
12185029Spjd 2. Redistributions in binary form must reproduce the above copyright
13185029Spjd    notice, this list of conditions and the following disclaimer in the
14185029Spjd    documentation and/or other materials provided with the distribution.
15185029Spjd
16185029Spjd 3. Neither the name of the Intel Corporation nor the names of its
17185029Spjd    contributors may be used to endorse or promote products derived from
18185029Spjd    this software without specific prior written permission.
19185029Spjd
20185029SpjdTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21185029SpjdAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22185029SpjdIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23185029SpjdARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24185029SpjdLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25185029SpjdCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26185029SpjdSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27185029SpjdINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28185029SpjdCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29185029SpjdARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30185029SpjdPOSSIBILITY OF SUCH DAMAGE.
31185029Spjd
32185029Spjd***************************************************************************/
33185029Spjd
34185029Spjd/*$FreeBSD: head/sys/dev/em/if_em.c 160956 2006-08-03 19:05:04Z pdeuskar $*/
35191806Sjamie
36185029Spjd#ifdef HAVE_KERNEL_OPTION_HEADERS
37185029Spjd#include "opt_device_polling.h"
38185029Spjd#endif
39188894Sjamie
40188894Sjamie#include <sys/param.h>
41185029Spjd#include <sys/systm.h>
42185029Spjd#include <sys/bus.h>
43185029Spjd#include <sys/endian.h>
44185029Spjd#include <sys/kernel.h>
45185029Spjd#include <sys/kthread.h>
46185029Spjd#include <sys/malloc.h>
47185029Spjd#include <sys/mbuf.h>
48185029Spjd#include <sys/module.h>
49185029Spjd#include <sys/rman.h>
50185029Spjd#include <sys/socket.h>
51185029Spjd#include <sys/sockio.h>
52185029Spjd#include <sys/sysctl.h>
53185029Spjd#include <sys/taskqueue.h>
54185029Spjd
55185029Spjd#include <machine/bus.h>
56185029Spjd#include <machine/resource.h>
57185029Spjd
58185029Spjd#include <net/bpf.h>
59185029Spjd#include <net/ethernet.h>
60185029Spjd#include <net/if.h>
61188894Sjamie#include <net/if_arp.h>
62188894Sjamie#include <net/if_dl.h>
63188894Sjamie#include <net/if_media.h>
64185029Spjd
65185029Spjd#include <net/if_types.h>
66188894Sjamie#include <net/if_vlan_var.h>
67188894Sjamie
68188894Sjamie#include <netinet/in_systm.h>
69188894Sjamie#include <netinet/in.h>
70188894Sjamie#include <netinet/if_ether.h>
71185029Spjd#include <netinet/ip.h>
72188894Sjamie#include <netinet/tcp.h>
73188894Sjamie#include <netinet/udp.h>
74188894Sjamie
75188894Sjamie#include <dev/pci/pcivar.h>
76191673Sjamie#include <dev/pci/pcireg.h>
77191806Sjamie#include <dev/em/if_em_hw.h>
78191673Sjamie#include <dev/em/if_em.h>
79185029Spjd
80188894Sjamie/*********************************************************************
81188894Sjamie *  Set this to one to display debug statistics
82188894Sjamie *********************************************************************/
83188894Sjamieint	em_display_debug_stats = 0;
84185029Spjd
85185029Spjd/*********************************************************************
86185029Spjd *  Driver version
87185029Spjd *********************************************************************/
88185029Spjd
89185029Spjdchar em_driver_version[] = "Version - 6.0.5";
90185029Spjd
91188894Sjamie
92185029Spjd/*********************************************************************
93185029Spjd *  PCI Device ID Table
94188894Sjamie *
95185029Spjd *  Used by probe to select devices to load on
96185029Spjd *  Last field stores an index into em_strings
97185029Spjd *  Last entry must be all 0s
98185029Spjd *
99185029Spjd *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
100185029Spjd *********************************************************************/
101185029Spjd
102185029Spjdstatic em_vendor_info_t em_vendor_info_array[] =
103185029Spjd{
104185029Spjd	/* Intel(R) PRO/1000 Network Connection */
105188894Sjamie	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
106185029Spjd	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
107185029Spjd	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
108185029Spjd	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
109185029Spjd	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
110185029Spjd
111185029Spjd	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
112185029Spjd	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
113185029Spjd	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
114185029Spjd	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
115185029Spjd	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
116185029Spjd	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
117185029Spjd	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118185029Spjd
119185029Spjd	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
120185029Spjd
121188894Sjamie	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
122188894Sjamie	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
123188894Sjamie
124188894Sjamie	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125188894Sjamie	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126188894Sjamie	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127188894Sjamie	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
128188894Sjamie
129188894Sjamie	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130188894Sjamie	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131185029Spjd	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132188894Sjamie	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133185029Spjd	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134185029Spjd
135185029Spjd	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136188894Sjamie	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137185029Spjd	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
138188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
141188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
142188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
143188894Sjamie	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
144185029Spjd						PCI_ANY_ID, PCI_ANY_ID, 0},
145185029Spjd
146185029Spjd	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
147185029Spjd	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148185029Spjd	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
149185029Spjd
150185029Spjd	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151185029Spjd	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
152185029Spjd	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
153185029Spjd
154185029Spjd	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155185029Spjd	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
156188894Sjamie	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
157188894Sjamie	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
158185029Spjd
159185029Spjd	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
160185029Spjd	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
161188894Sjamie	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162188894Sjamie	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
163188894Sjamie						PCI_ANY_ID, PCI_ANY_ID, 0},
164188894Sjamie	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
165185029Spjd						PCI_ANY_ID, PCI_ANY_ID, 0},
166185029Spjd	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
167185029Spjd						PCI_ANY_ID, PCI_ANY_ID, 0},
168185029Spjd	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
169185029Spjd						PCI_ANY_ID, PCI_ANY_ID, 0},
170185029Spjd	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
171185029Spjd	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
172185029Spjd	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
173185029Spjd
174188894Sjamie	/* required last entry */
175188894Sjamie	{ 0, 0, 0, 0, 0}
176188894Sjamie};
177188894Sjamie
178185029Spjd/*********************************************************************
179185029Spjd *  Table of branding strings for all supported NICs.
180185029Spjd *********************************************************************/
181185029Spjd
182188894Sjamiestatic char *em_strings[] = {
183188894Sjamie	"Intel(R) PRO/1000 Network Connection"
184188894Sjamie};
185185029Spjd
186185029Spjd/*********************************************************************
187185029Spjd *  Function prototypes
188185029Spjd *********************************************************************/
189185029Spjdstatic int	em_probe(device_t);
190185029Spjdstatic int	em_attach(device_t);
191188894Sjamiestatic int	em_detach(device_t);
192188894Sjamiestatic int	em_shutdown(device_t);
193185029Spjdstatic int	em_suspend(device_t);
194185029Spjdstatic int	em_resume(device_t);
195185029Spjdstatic void	em_start(struct ifnet *);
196185029Spjdstatic void	em_start_locked(struct ifnet *ifp);
197185029Spjdstatic int	em_ioctl(struct ifnet *, u_long, caddr_t);
198188894Sjamiestatic void	em_watchdog(struct ifnet *);
199185029Spjdstatic void	em_init(void *);
200185029Spjdstatic void	em_init_locked(struct adapter *);
201185029Spjdstatic void	em_stop(void *);
202185029Spjdstatic void	em_media_status(struct ifnet *, struct ifmediareq *);
203185029Spjdstatic int	em_media_change(struct ifnet *);
204188894Sjamiestatic void	em_identify_hardware(struct adapter *);
205188894Sjamiestatic int	em_allocate_pci_resources(struct adapter *);
206188894Sjamiestatic int	em_allocate_intr(struct adapter *);
207188894Sjamiestatic void	em_free_intr(struct adapter *);
208188894Sjamiestatic void	em_free_pci_resources(struct adapter *);
209188894Sjamiestatic void	em_local_timer(void *);
210188894Sjamiestatic int	em_hardware_init(struct adapter *);
211188894Sjamiestatic void	em_setup_interface(device_t, struct adapter *);
212188894Sjamiestatic int	em_setup_transmit_structures(struct adapter *);
213188894Sjamiestatic void	em_initialize_transmit_unit(struct adapter *);
214188894Sjamiestatic int	em_setup_receive_structures(struct adapter *);
215188894Sjamiestatic void	em_initialize_receive_unit(struct adapter *);
216188894Sjamiestatic void	em_enable_intr(struct adapter *);
217188894Sjamiestatic void	em_disable_intr(struct adapter *);
218188894Sjamiestatic void	em_free_transmit_structures(struct adapter *);
219188894Sjamiestatic void	em_free_receive_structures(struct adapter *);
220188894Sjamiestatic void	em_update_stats_counters(struct adapter *);
221188894Sjamiestatic void	em_txeof(struct adapter *);
222188894Sjamiestatic int	em_allocate_receive_structures(struct adapter *);
223188894Sjamiestatic int	em_allocate_transmit_structures(struct adapter *);
224188894Sjamiestatic int	em_rxeof(struct adapter *, int);
225188894Sjamie#ifndef __NO_STRICT_ALIGNMENT
226188894Sjamiestatic int	em_fixup_rx(struct adapter *);
227188894Sjamie#endif
228188894Sjamiestatic void	em_receive_checksum(struct adapter *, struct em_rx_desc *,
229188894Sjamie		    struct mbuf *);
230185029Spjdstatic void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
231188894Sjamie		    uint32_t *, uint32_t *);
232188894Sjamiestatic void	em_set_promisc(struct adapter *);
233188894Sjamiestatic void	em_disable_promisc(struct adapter *);
234188894Sjamiestatic void	em_set_multi(struct adapter *);
235188894Sjamiestatic void	em_print_hw_stats(struct adapter *);
236188894Sjamiestatic void	em_update_link_status(struct adapter *);
237188894Sjamiestatic int	em_get_buf(int i, struct adapter *, struct mbuf *);
238188894Sjamiestatic void	em_enable_vlans(struct adapter *);
239188894Sjamiestatic void	em_disable_vlans(struct adapter *);
240188894Sjamiestatic int	em_encap(struct adapter *, struct mbuf **);
241188894Sjamiestatic void	em_smartspeed(struct adapter *);
242188894Sjamiestatic int	em_82547_fifo_workaround(struct adapter *, int);
243188894Sjamiestatic void	em_82547_update_fifo_head(struct adapter *, int);
244188894Sjamiestatic int	em_82547_tx_fifo_reset(struct adapter *);
245185029Spjdstatic void	em_82547_move_tail(void *arg);
246185029Spjdstatic void	em_82547_move_tail_locked(struct adapter *);
247185029Spjdstatic int	em_dma_malloc(struct adapter *, bus_size_t,
248185029Spjd		struct em_dma_alloc *, int);
249188894Sjamiestatic void	em_dma_free(struct adapter *, struct em_dma_alloc *);
250185029Spjdstatic void	em_print_debug_info(struct adapter *);
251185029Spjdstatic int 	em_is_valid_ether_addr(uint8_t *);
252185029Spjdstatic int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
253185029Spjdstatic int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
254185029Spjdstatic uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
255185029Spjd		    PDESC_ARRAY desc_array);
256188894Sjamiestatic int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
257188894Sjamiestatic void	em_add_int_delay_sysctl(struct adapter *, const char *,
258185029Spjd		const char *, struct em_int_delay_info *, int, int);
259185029Spjd
260185029Spjd/*
261185029Spjd * Fast interrupt handler and legacy ithread/polling modes are
262185029Spjd * mutually exclusive.
263188894Sjamie */
264185029Spjd#ifdef DEVICE_POLLING
265188894Sjamiestatic poll_handler_t em_poll;
266185029Spjdstatic void	em_intr(void *);
267188894Sjamie#else
268188894Sjamiestatic void	em_intr_fast(void *);
269188894Sjamiestatic void	em_add_int_process_limit(struct adapter *, const char *,
270188894Sjamie		const char *, int *, int);
271185029Spjdstatic void	em_handle_rxtx(void *context, int pending);
272188894Sjamiestatic void	em_handle_link(void *context, int pending);
273188894Sjamie#endif
274185029Spjd
275185029Spjd/*********************************************************************
276185029Spjd *  FreeBSD Device Interface Entry Points
277185029Spjd *********************************************************************/
278185029Spjd
279188894Sjamiestatic device_method_t em_methods[] = {
280188894Sjamie	/* Device interface */
281188894Sjamie	DEVMETHOD(device_probe, em_probe),
282188894Sjamie	DEVMETHOD(device_attach, em_attach),
283188894Sjamie	DEVMETHOD(device_detach, em_detach),
284188894Sjamie	DEVMETHOD(device_shutdown, em_shutdown),
285188894Sjamie	DEVMETHOD(device_suspend, em_suspend),
286188894Sjamie	DEVMETHOD(device_resume, em_resume),
287188894Sjamie	{0, 0}
288188894Sjamie};
289185029Spjd
290185029Spjdstatic driver_t em_driver = {
291185029Spjd	"em", em_methods, sizeof(struct adapter),
292185029Spjd};
293185029Spjd
294185029Spjdstatic devclass_t em_devclass;
295185029SpjdDRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
296185029SpjdMODULE_DEPEND(em, pci, 1, 1, 1);
297185029SpjdMODULE_DEPEND(em, ether, 1, 1, 1);
298185029Spjd
299185029Spjd/*********************************************************************
300185029Spjd *  Tunable default values.
301191711Sjamie *********************************************************************/
302191711Sjamie
303191711Sjamie#define E1000_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
304191711Sjamie#define E1000_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
305185029Spjd
306185029Spjdstatic int em_tx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TIDV);
307188894Sjamiestatic int em_rx_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RDTR);
308188894Sjamiestatic int em_tx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_TADV);
309185029Spjdstatic int em_rx_abs_int_delay_dflt = E1000_TICKS_TO_USECS(EM_RADV);
310185029Spjdstatic int em_rxd = EM_DEFAULT_RXD;
311185029Spjdstatic int em_txd = EM_DEFAULT_TXD;
312185029Spjdstatic int em_smart_pwr_down = FALSE;
313185029Spjd
314185029SpjdTUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
315188894SjamieTUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
316188894SjamieTUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
317185029SpjdTUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
318188894SjamieTUNABLE_INT("hw.em.rxd", &em_rxd);
319188894SjamieTUNABLE_INT("hw.em.txd", &em_txd);
320185029SpjdTUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
321185029Spjd#ifndef DEVICE_POLLING
322185029Spjdstatic int em_rx_process_limit = 100;
323185029SpjdTUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
324185029Spjd#endif
325185029Spjd
326185029Spjd/*********************************************************************
327185029Spjd *  Device identification routine
328185029Spjd *
329185029Spjd *  em_probe determines if the driver should be loaded on
330185029Spjd *  adapter based on PCI vendor/device id of the adapter.
331185029Spjd *
332185029Spjd *  return BUS_PROBE_DEFAULT on success, positive on failure
333185029Spjd *********************************************************************/
334185029Spjd
335185029Spjdstatic int
336185029Spjdem_probe(device_t dev)
337185029Spjd{
338188894Sjamie	char		adapter_name[60];
339188894Sjamie	uint16_t	pci_vendor_id = 0;
340188894Sjamie	uint16_t	pci_device_id = 0;
341188894Sjamie	uint16_t	pci_subvendor_id = 0;
342188894Sjamie	uint16_t	pci_subdevice_id = 0;
343188894Sjamie	em_vendor_info_t *ent;
344188894Sjamie
345188894Sjamie	INIT_DEBUGOUT("em_probe: begin");
346188894Sjamie
347188894Sjamie	pci_vendor_id = pci_get_vendor(dev);
348188894Sjamie	if (pci_vendor_id != EM_VENDOR_ID)
349188894Sjamie		return (ENXIO);
350188894Sjamie
351188894Sjamie	pci_device_id = pci_get_device(dev);
352188894Sjamie	pci_subvendor_id = pci_get_subvendor(dev);
353191673Sjamie	pci_subdevice_id = pci_get_subdevice(dev);
354188894Sjamie
355191673Sjamie	ent = em_vendor_info_array;
356188894Sjamie	while (ent->vendor_id != 0) {
357188894Sjamie		if ((pci_vendor_id == ent->vendor_id) &&
358188894Sjamie		    (pci_device_id == ent->device_id) &&
359188894Sjamie
360188894Sjamie		    ((pci_subvendor_id == ent->subvendor_id) ||
361188894Sjamie		    (ent->subvendor_id == PCI_ANY_ID)) &&
362188894Sjamie
363185029Spjd		    ((pci_subdevice_id == ent->subdevice_id) ||
364185029Spjd		    (ent->subdevice_id == PCI_ANY_ID))) {
365185029Spjd			sprintf(adapter_name, "%s %s",
366188894Sjamie				em_strings[ent->index],
367185029Spjd				em_driver_version);
368185029Spjd			device_set_desc_copy(dev, adapter_name);
369185029Spjd			return (BUS_PROBE_DEFAULT);
370185029Spjd		}
371185029Spjd		ent++;
372185029Spjd	}
373185029Spjd
374185029Spjd	return (ENXIO);
375185029Spjd}
376185029Spjd
377188894Sjamie/*********************************************************************
378188894Sjamie *  Device initialization routine
379188894Sjamie *
380188894Sjamie *  The attach entry point is called when the driver is being loaded.
381188894Sjamie *  This routine identifies the type of hardware, allocates all resources
382188894Sjamie *  and initializes the hardware.
383188894Sjamie *
384188894Sjamie *  return 0 on success, positive on failure
385185029Spjd *********************************************************************/
386185029Spjd
387185029Spjdstatic int
388185029Spjdem_attach(device_t dev)
389185029Spjd{
390185029Spjd	struct adapter	*adapter;
391185029Spjd	int		tsize, rsize;
392185029Spjd	int		error = 0;
393185029Spjd
394185029Spjd	INIT_DEBUGOUT("em_attach: begin");
395185029Spjd
396188894Sjamie	adapter = device_get_softc(dev);
397193030Srwatson	adapter->dev = adapter->osdep.dev = dev;
398188894Sjamie	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
399185029Spjd
400188894Sjamie	/* SYSCTL stuff */
401185029Spjd	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
402185029Spjd	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
403185029Spjd	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
404	    em_sysctl_debug_info, "I", "Debug Information");
405
406	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
407	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
408	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
409	    em_sysctl_stats, "I", "Statistics");
410
411	callout_init(&adapter->timer, CALLOUT_MPSAFE);
412	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
413
414	/* Determine hardware revision */
415	em_identify_hardware(adapter);
416
417	/* Set up some sysctls for the tunable interrupt delays */
418	em_add_int_delay_sysctl(adapter, "rx_int_delay",
419	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
420	    E1000_REG_OFFSET(&adapter->hw, RDTR), em_rx_int_delay_dflt);
421	em_add_int_delay_sysctl(adapter, "tx_int_delay",
422	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
423	    E1000_REG_OFFSET(&adapter->hw, TIDV), em_tx_int_delay_dflt);
424	if (adapter->hw.mac_type >= em_82540) {
425		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
426		    "receive interrupt delay limit in usecs",
427		    &adapter->rx_abs_int_delay,
428		    E1000_REG_OFFSET(&adapter->hw, RADV),
429		    em_rx_abs_int_delay_dflt);
430		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
431		    "transmit interrupt delay limit in usecs",
432		    &adapter->tx_abs_int_delay,
433		    E1000_REG_OFFSET(&adapter->hw, TADV),
434		    em_tx_abs_int_delay_dflt);
435	}
436
437#ifndef DEVICE_POLLING
438	/* Sysctls for limiting the amount of work done in the taskqueue */
439	em_add_int_process_limit(adapter, "rx_processing_limit",
440	    "max number of rx packets to process", &adapter->rx_process_limit,
441	    em_rx_process_limit);
442#endif
443
444	/*
445	 * Validate number of transmit and receive descriptors. It
446	 * must not exceed hardware maximum, and must be multiple
447	 * of EM_DBA_ALIGN.
448	 */
449	if (((em_txd * sizeof(struct em_tx_desc)) % EM_DBA_ALIGN) != 0 ||
450	    (adapter->hw.mac_type >= em_82544 && em_txd > EM_MAX_TXD) ||
451	    (adapter->hw.mac_type < em_82544 && em_txd > EM_MAX_TXD_82543) ||
452	    (em_txd < EM_MIN_TXD)) {
453		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
454		    EM_DEFAULT_TXD, em_txd);
455		adapter->num_tx_desc = EM_DEFAULT_TXD;
456	} else
457		adapter->num_tx_desc = em_txd;
458	if (((em_rxd * sizeof(struct em_rx_desc)) % EM_DBA_ALIGN) != 0 ||
459	    (adapter->hw.mac_type >= em_82544 && em_rxd > EM_MAX_RXD) ||
460	    (adapter->hw.mac_type < em_82544 && em_rxd > EM_MAX_RXD_82543) ||
461	    (em_rxd < EM_MIN_RXD)) {
462		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
463		    EM_DEFAULT_RXD, em_rxd);
464		adapter->num_rx_desc = EM_DEFAULT_RXD;
465	} else
466		adapter->num_rx_desc = em_rxd;
467
468	adapter->hw.autoneg = DO_AUTO_NEG;
469	adapter->hw.wait_autoneg_complete = WAIT_FOR_AUTO_NEG_DEFAULT;
470	adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
471	adapter->hw.tbi_compatibility_en = TRUE;
472	adapter->rx_buffer_len = EM_RXBUFFER_2048;
473
474	adapter->hw.phy_init_script = 1;
475	adapter->hw.phy_reset_disable = FALSE;
476
477#ifndef EM_MASTER_SLAVE
478	adapter->hw.master_slave = em_ms_hw_default;
479#else
480	adapter->hw.master_slave = EM_MASTER_SLAVE;
481#endif
482	/*
483	 * Set the max frame size assuming standard ethernet
484	 * sized frames.
485	 */
486	adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
487
488	adapter->hw.min_frame_size = MINIMUM_ETHERNET_PACKET_SIZE + ETHER_CRC_LEN;
489
490	/*
491	 * This controls when hardware reports transmit completion
492	 * status.
493	 */
494	adapter->hw.report_tx_early = 1;
495	if (em_allocate_pci_resources(adapter)) {
496		device_printf(dev, "Allocation of PCI resources failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	/* Initialize eeprom parameters */
502	em_init_eeprom_params(&adapter->hw);
503
504	tsize = roundup2(adapter->num_tx_desc * sizeof(struct em_tx_desc),
505	    EM_DBA_ALIGN);
506
507	/* Allocate Transmit Descriptor ring */
508	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
509		device_printf(dev, "Unable to allocate tx_desc memory\n");
510		error = ENOMEM;
511		goto err_tx_desc;
512	}
513	adapter->tx_desc_base = (struct em_tx_desc *)adapter->txdma.dma_vaddr;
514
515	rsize = roundup2(adapter->num_rx_desc * sizeof(struct em_rx_desc),
516	    EM_DBA_ALIGN);
517
518	/* Allocate Receive Descriptor ring */
519	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
520		device_printf(dev, "Unable to allocate rx_desc memory\n");
521		error = ENOMEM;
522		goto err_rx_desc;
523	}
524	adapter->rx_desc_base = (struct em_rx_desc *)adapter->rxdma.dma_vaddr;
525
526	/* Initialize the hardware */
527	if (em_hardware_init(adapter)) {
528		device_printf(dev, "Unable to initialize the hardware\n");
529		error = EIO;
530		goto err_hw_init;
531	}
532
533	/* Copy the permanent MAC address out of the EEPROM */
534	if (em_read_mac_addr(&adapter->hw) < 0) {
535		device_printf(dev, "EEPROM read error while reading MAC"
536		    " address\n");
537		error = EIO;
538		goto err_hw_init;
539	}
540
541	if (!em_is_valid_ether_addr(adapter->hw.mac_addr)) {
542		device_printf(dev, "Invalid MAC address\n");
543		error = EIO;
544		goto err_hw_init;
545	}
546
547	/* Setup OS specific network interface */
548	em_setup_interface(dev, adapter);
549
550	em_allocate_intr(adapter);
551
552	/* Initialize statistics */
553	em_clear_hw_cntrs(&adapter->hw);
554	em_update_stats_counters(adapter);
555	adapter->hw.get_link_status = 1;
556	em_update_link_status(adapter);
557
558	/* Indicate SOL/IDER usage */
559	if (em_check_phy_reset_block(&adapter->hw))
560		device_printf(dev,
561		    "PHY reset is blocked due to SOL/IDER session.\n");
562
563	/* Identify 82544 on PCIX */
564	em_get_bus_info(&adapter->hw);
565	if(adapter->hw.bus_type == em_bus_type_pcix && adapter->hw.mac_type == em_82544)
566		adapter->pcix_82544 = TRUE;
567	else
568		adapter->pcix_82544 = FALSE;
569
570	INIT_DEBUGOUT("em_attach: end");
571
572	return (0);
573
574err_hw_init:
575	em_dma_free(adapter, &adapter->rxdma);
576err_rx_desc:
577	em_dma_free(adapter, &adapter->txdma);
578err_tx_desc:
579err_pci:
580	em_free_intr(adapter);
581	em_free_pci_resources(adapter);
582	EM_LOCK_DESTROY(adapter);
583
584	return (error);
585}
586
587/*********************************************************************
588 *  Device removal routine
589 *
590 *  The detach entry point is called when the driver is being removed.
591 *  This routine stops the adapter and deallocates all the resources
592 *  that were allocated for driver operation.
593 *
594 *  return 0 on success, positive on failure
595 *********************************************************************/
596
597static int
598em_detach(device_t dev)
599{
600	struct adapter	*adapter = device_get_softc(dev);
601	struct ifnet	*ifp = adapter->ifp;
602
603	INIT_DEBUGOUT("em_detach: begin");
604
605#ifdef DEVICE_POLLING
606	if (ifp->if_capenable & IFCAP_POLLING)
607		ether_poll_deregister(ifp);
608#endif
609
610	em_free_intr(adapter);
611	EM_LOCK(adapter);
612	adapter->in_detach = 1;
613	em_stop(adapter);
614	em_phy_hw_reset(&adapter->hw);
615	EM_UNLOCK(adapter);
616	ether_ifdetach(adapter->ifp);
617
618	em_free_pci_resources(adapter);
619	bus_generic_detach(dev);
620	if_free(ifp);
621
622	/* Free Transmit Descriptor ring */
623	if (adapter->tx_desc_base) {
624		em_dma_free(adapter, &adapter->txdma);
625		adapter->tx_desc_base = NULL;
626	}
627
628	/* Free Receive Descriptor ring */
629	if (adapter->rx_desc_base) {
630		em_dma_free(adapter, &adapter->rxdma);
631		adapter->rx_desc_base = NULL;
632	}
633
634	EM_LOCK_DESTROY(adapter);
635
636	return (0);
637}
638
639/*********************************************************************
640 *
641 *  Shutdown entry point
642 *
643 **********************************************************************/
644
645static int
646em_shutdown(device_t dev)
647{
648	struct adapter *adapter = device_get_softc(dev);
649	EM_LOCK(adapter);
650	em_stop(adapter);
651	EM_UNLOCK(adapter);
652	return (0);
653}
654
655/*
656 * Suspend/resume device methods.
657 */
658static int
659em_suspend(device_t dev)
660{
661	struct adapter *adapter = device_get_softc(dev);
662
663	EM_LOCK(adapter);
664	em_stop(adapter);
665	EM_UNLOCK(adapter);
666
667	return bus_generic_suspend(dev);
668}
669
670static int
671em_resume(device_t dev)
672{
673	struct adapter *adapter = device_get_softc(dev);
674	struct ifnet *ifp = adapter->ifp;
675
676	EM_LOCK(adapter);
677	em_init_locked(adapter);
678	if ((ifp->if_flags & IFF_UP) &&
679	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
680		em_start_locked(ifp);
681	EM_UNLOCK(adapter);
682
683	return bus_generic_resume(dev);
684}
685
686
687/*********************************************************************
688 *  Transmit entry point
689 *
690 *  em_start is called by the stack to initiate a transmit.
691 *  The driver will remain in this routine as long as there are
692 *  packets to transmit and transmit resources are available.
693 *  In case resources are not available stack is notified and
694 *  the packet is requeued.
695 **********************************************************************/
696
697static void
698em_start_locked(struct ifnet *ifp)
699{
700	struct adapter	*adapter = ifp->if_softc;
701	struct mbuf	*m_head;
702
703	EM_LOCK_ASSERT(adapter);
704
705	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
706	    IFF_DRV_RUNNING)
707		return;
708	if (!adapter->link_active)
709		return;
710
711	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
712
713		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
714		if (m_head == NULL)
715			break;
716		/*
717		 * em_encap() can modify our pointer, and or make it NULL on
718		 * failure.  In that event, we can't requeue.
719		 */
720		if (em_encap(adapter, &m_head)) {
721			if (m_head == NULL)
722				break;
723			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
724			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
725			break;
726		}
727
728		/* Send a copy of the frame to the BPF listener */
729		BPF_MTAP(ifp, m_head);
730
731		/* Set timeout in case hardware has problems transmitting. */
732		ifp->if_timer = EM_TX_TIMEOUT;
733	}
734}
735
736static void
737em_start(struct ifnet *ifp)
738{
739	struct adapter *adapter = ifp->if_softc;
740
741	EM_LOCK(adapter);
742	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
743		em_start_locked(ifp);
744	EM_UNLOCK(adapter);
745}
746
747/*********************************************************************
748 *  Ioctl entry point
749 *
750 *  em_ioctl is called when the user wants to configure the
751 *  interface.
752 *
753 *  return 0 on success, positive on failure
754 **********************************************************************/
755
756static int
757em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
758{
759	struct adapter	*adapter = ifp->if_softc;
760	struct ifreq *ifr = (struct ifreq *)data;
761	struct ifaddr *ifa = (struct ifaddr *)data;
762	int error = 0;
763
764	if (adapter->in_detach)
765		return (error);
766
767	switch (command) {
768	case SIOCSIFADDR:
769	case SIOCGIFADDR:
770		if (ifa->ifa_addr->sa_family == AF_INET) {
771			/*
772			 * XXX
773			 * Since resetting hardware takes a very long time
774			 * and results in link renegotiation we only
775			 * initialize the hardware only when it is absolutely
776			 * required.
777			 */
778			ifp->if_flags |= IFF_UP;
779			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
780				EM_LOCK(adapter);
781				em_init_locked(adapter);
782				EM_UNLOCK(adapter);
783			}
784			arp_ifinit(ifp, ifa);
785		} else
786			error = ether_ioctl(ifp, command, data);
787		break;
788	case SIOCSIFMTU:
789	    {
790		int max_frame_size;
791		uint16_t eeprom_data = 0;
792
793		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
794
795		EM_LOCK(adapter);
796		switch (adapter->hw.mac_type) {
797		case em_82573:
798			/*
799			 * 82573 only supports jumbo frames
800			 * if ASPM is disabled.
801			 */
802			em_read_eeprom(&adapter->hw, EEPROM_INIT_3GIO_3, 1,
803			    &eeprom_data);
804			if (eeprom_data & EEPROM_WORD1A_ASPM_MASK) {
805				max_frame_size = ETHER_MAX_LEN;
806				break;
807			}
808			/* Allow Jumbo frames - fall thru */
809		case em_82571:
810		case em_82572:
811		case em_80003es2lan:	/* Limit Jumbo Frame size */
812			max_frame_size = 9234;
813			break;
814		case em_ich8lan:
815			/* ICH8 does not support jumbo frames */
816			max_frame_size = ETHER_MAX_LEN;
817			break;
818		default:
819			max_frame_size = MAX_JUMBO_FRAME_SIZE;
820		}
821		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
822		    ETHER_CRC_LEN) {
823			EM_UNLOCK(adapter);
824			error = EINVAL;
825			break;
826		}
827
828		ifp->if_mtu = ifr->ifr_mtu;
829		adapter->hw.max_frame_size =
830		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
831		em_init_locked(adapter);
832		EM_UNLOCK(adapter);
833		break;
834	    }
835	case SIOCSIFFLAGS:
836		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)");
837		EM_LOCK(adapter);
838		if (ifp->if_flags & IFF_UP) {
839			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
840				if ((ifp->if_flags ^ adapter->if_flags) &
841				    IFF_PROMISC) {
842					em_disable_promisc(adapter);
843					em_set_promisc(adapter);
844				}
845			} else
846				em_init_locked(adapter);
847		} else {
848			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
849				em_stop(adapter);
850			}
851		}
852		adapter->if_flags = ifp->if_flags;
853		EM_UNLOCK(adapter);
854		break;
855	case SIOCADDMULTI:
856	case SIOCDELMULTI:
857		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
858		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
859			EM_LOCK(adapter);
860			em_disable_intr(adapter);
861			em_set_multi(adapter);
862			if (adapter->hw.mac_type == em_82542_rev2_0) {
863				em_initialize_receive_unit(adapter);
864			}
865#ifdef DEVICE_POLLING
866			if (!(ifp->if_capenable & IFCAP_POLLING))
867#endif
868				em_enable_intr(adapter);
869			EM_UNLOCK(adapter);
870		}
871		break;
872	case SIOCSIFMEDIA:
873	case SIOCGIFMEDIA:
874		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)");
875		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
876		break;
877	case SIOCSIFCAP:
878	    {
879		int mask, reinit;
880
881		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
882		reinit = 0;
883		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
884#ifdef DEVICE_POLLING
885		if (mask & IFCAP_POLLING) {
886			if (ifr->ifr_reqcap & IFCAP_POLLING) {
887				error = ether_poll_register(em_poll, ifp);
888				if (error)
889					return (error);
890				EM_LOCK(adapter);
891				em_disable_intr(adapter);
892				ifp->if_capenable |= IFCAP_POLLING;
893				EM_UNLOCK(adapter);
894			} else {
895				error = ether_poll_deregister(ifp);
896				/* Enable interrupt even in error case */
897				EM_LOCK(adapter);
898				em_enable_intr(adapter);
899				ifp->if_capenable &= ~IFCAP_POLLING;
900				EM_UNLOCK(adapter);
901			}
902		}
903#endif
904		if (mask & IFCAP_HWCSUM) {
905			ifp->if_capenable ^= IFCAP_HWCSUM;
906			reinit = 1;
907		}
908		if (mask & IFCAP_VLAN_HWTAGGING) {
909			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
910			reinit = 1;
911		}
912		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
913			em_init(adapter);
914		VLAN_CAPABILITIES(ifp);
915		break;
916	    }
917	default:
918		error = ether_ioctl(ifp, command, data);
919		break;
920	}
921
922	return (error);
923}
924
925/*********************************************************************
926 *  Watchdog entry point
927 *
928 *  This routine is called whenever hardware quits transmitting.
929 *
930 **********************************************************************/
931
932static void
933em_watchdog(struct ifnet *ifp)
934{
935	struct adapter *adapter = ifp->if_softc;
936
937	EM_LOCK(adapter);
938	/* If we are in this routine because of pause frames, then
939	 * don't reset the hardware.
940	 */
941	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
942		ifp->if_timer = EM_TX_TIMEOUT;
943		EM_UNLOCK(adapter);
944		return;
945	}
946
947	if (em_check_for_link(&adapter->hw) == 0)
948		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
949
950	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
951	adapter->watchdog_events++;
952
953	em_init_locked(adapter);
954	EM_UNLOCK(adapter);
955}
956
957/*********************************************************************
958 *  Init entry point
959 *
960 *  This routine is used in two ways. It is used by the stack as
961 *  init entry point in network interface structure. It is also used
962 *  by the driver as a hw/sw initialization routine to get to a
963 *  consistent state.
964 *
965 *  return 0 on success, positive on failure
966 **********************************************************************/
967
968static void
969em_init_locked(struct adapter *adapter)
970{
971	struct ifnet	*ifp = adapter->ifp;
972	device_t	dev = adapter->dev;
973	uint32_t	pba;
974
975	INIT_DEBUGOUT("em_init: begin");
976
977	EM_LOCK_ASSERT(adapter);
978
979	em_stop(adapter);
980
981	/*
982	 * Packet Buffer Allocation (PBA)
983	 * Writing PBA sets the receive portion of the buffer
984	 * the remainder is used for the transmit buffer.
985	 *
986	 * Devices before the 82547 had a Packet Buffer of 64K.
987	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
988	 * After the 82547 the buffer was reduced to 40K.
989	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
990	 *   Note: default does not leave enough room for Jumbo Frame >10k.
991	 */
992	switch (adapter->hw.mac_type) {
993	case em_82547:
994	case em_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
995		if (adapter->hw.max_frame_size > EM_RXBUFFER_8192)
996			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
997		else
998			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
999		adapter->tx_fifo_head = 0;
1000		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1001		adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1002		break;
1003	case em_80003es2lan: /* 80003es2lan: Total Packet Buffer is 48K */
1004	case em_82571: /* 82571: Total Packet Buffer is 48K */
1005	case em_82572: /* 82572: Total Packet Buffer is 48K */
1006			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1007		break;
1008	case em_82573: /* 82573: Total Packet Buffer is 32K */
1009		/* Jumbo frames not supported */
1010			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1011		break;
1012	case em_ich8lan:
1013		pba = E1000_PBA_8K;
1014		break;
1015	default:
1016		/* Devices before 82547 had a Packet Buffer of 64K.   */
1017		if(adapter->hw.max_frame_size > EM_RXBUFFER_8192)
1018			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1019		else
1020			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1021	}
1022
1023	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1024	E1000_WRITE_REG(&adapter->hw, PBA, pba);
1025
1026	/* Get the latest mac address, User can use a LAA */
1027	bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac_addr, ETHER_ADDR_LEN);
1028
1029	/* Initialize the hardware */
1030	if (em_hardware_init(adapter)) {
1031		device_printf(dev, "Unable to initialize the hardware\n");
1032		return;
1033	}
1034	em_update_link_status(adapter);
1035
1036	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1037		em_enable_vlans(adapter);
1038
1039	/* Prepare transmit descriptors and buffers */
1040	if (em_setup_transmit_structures(adapter)) {
1041		device_printf(dev, "Could not setup transmit structures\n");
1042		em_stop(adapter);
1043		return;
1044	}
1045	em_initialize_transmit_unit(adapter);
1046
1047	/* Setup Multicast table */
1048	em_set_multi(adapter);
1049
1050	/* Prepare receive descriptors and buffers */
1051	if (em_setup_receive_structures(adapter)) {
1052		device_printf(dev, "Could not setup receive structures\n");
1053		em_stop(adapter);
1054		return;
1055	}
1056	em_initialize_receive_unit(adapter);
1057
1058	/* Don't loose promiscuous settings */
1059	em_set_promisc(adapter);
1060
1061	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1062	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1063
1064	if (adapter->hw.mac_type >= em_82543) {
1065		if (ifp->if_capenable & IFCAP_TXCSUM)
1066			ifp->if_hwassist = EM_CHECKSUM_FEATURES;
1067		else
1068			ifp->if_hwassist = 0;
1069	}
1070
1071	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1072	em_clear_hw_cntrs(&adapter->hw);
1073#ifdef DEVICE_POLLING
1074	/*
1075	 * Only enable interrupts if we are not polling, make sure
1076	 * they are off otherwise.
1077	 */
1078	if (ifp->if_capenable & IFCAP_POLLING)
1079		em_disable_intr(adapter);
1080	else
1081#endif /* DEVICE_POLLING */
1082		em_enable_intr(adapter);
1083
1084	/* Don't reset the phy next time init gets called */
1085	adapter->hw.phy_reset_disable = TRUE;
1086}
1087
1088static void
1089em_init(void *arg)
1090{
1091	struct adapter *adapter = arg;
1092
1093	EM_LOCK(adapter);
1094	em_init_locked(adapter);
1095	EM_UNLOCK(adapter);
1096}
1097
1098
1099#ifdef DEVICE_POLLING
1100/*********************************************************************
1101 *
1102 *  Legacy polling routine
1103 *
1104 *********************************************************************/
1105static void
1106em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1107{
1108	struct adapter *adapter = ifp->if_softc;
1109	uint32_t reg_icr;
1110
1111	EM_LOCK(adapter);
1112	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1113		EM_UNLOCK(adapter);
1114		return;
1115	}
1116
1117	if (cmd == POLL_AND_CHECK_STATUS) {
1118		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1119		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1120			callout_stop(&adapter->timer);
1121			adapter->hw.get_link_status = 1;
1122			em_check_for_link(&adapter->hw);
1123			em_update_link_status(adapter);
1124			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1125		}
1126	}
1127	em_rxeof(adapter, count);
1128	em_txeof(adapter);
1129
1130	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1131		em_start_locked(ifp);
1132	EM_UNLOCK(adapter);
1133}
1134
1135/*********************************************************************
1136 *
1137 *  Legacy Interrupt Service routine
1138 *
1139 *********************************************************************/
1140static void
1141em_intr(void *arg)
1142{
1143	struct adapter	*adapter = arg;
1144	struct ifnet	*ifp;
1145	uint32_t	reg_icr;
1146
1147	EM_LOCK(adapter);
1148
1149	ifp = adapter->ifp;
1150
1151	if (ifp->if_capenable & IFCAP_POLLING) {
1152		EM_UNLOCK(adapter);
1153		return;
1154	}
1155
1156	for (;;) {
1157		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1158		if (adapter->hw.mac_type >= em_82571 &&
1159		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1160			break;
1161		else if (reg_icr == 0)
1162			break;
1163
1164		/*
1165		 * XXX: some laptops trigger several spurious interrupts
1166		 * on em(4) when in the resume cycle. The ICR register
1167		 * reports all-ones value in this case. Processing such
1168		 * interrupts would lead to a freeze. I don't know why.
1169		 */
1170		if (reg_icr == 0xffffffff)
1171			break;
1172
1173		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1174			em_rxeof(adapter, -1);
1175			em_txeof(adapter);
1176		}
1177
1178		/* Link status change */
1179		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1180			callout_stop(&adapter->timer);
1181			adapter->hw.get_link_status = 1;
1182			em_check_for_link(&adapter->hw);
1183			em_update_link_status(adapter);
1184			callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1185		}
1186
1187		if (reg_icr & E1000_ICR_RXO)
1188			adapter->rx_overruns++;
1189	}
1190
1191	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1192	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1193		em_start_locked(ifp);
1194
1195	EM_UNLOCK(adapter);
1196}
1197
1198#else  /* if not DEVICE_POLLING, then fast interrupt routines only */
1199
1200static void
1201em_handle_link(void *context, int pending)
1202{
1203	struct adapter	*adapter = context;
1204	struct ifnet *ifp;
1205
1206	ifp = adapter->ifp;
1207
1208	EM_LOCK(adapter);
1209
1210	callout_stop(&adapter->timer);
1211	adapter->hw.get_link_status = 1;
1212	em_check_for_link(&adapter->hw);
1213	em_update_link_status(adapter);
1214	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1215	EM_UNLOCK(adapter);
1216}
1217
1218static void
1219em_handle_rxtx(void *context, int pending)
1220{
1221	struct adapter	*adapter = context;
1222	struct ifnet	*ifp;
1223
1224	NET_LOCK_GIANT();
1225	ifp = adapter->ifp;
1226
1227	/*
1228	 * TODO:
1229	 * It should be possible to run the tx clean loop without the lock.
1230	 */
1231	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1232		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1233			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1234		EM_LOCK(adapter);
1235		em_txeof(adapter);
1236
1237		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1238			em_start_locked(ifp);
1239		EM_UNLOCK(adapter);
1240	}
1241
1242	em_enable_intr(adapter);
1243	NET_UNLOCK_GIANT();
1244}
1245
1246/*********************************************************************
1247 *
1248 *  Fast Interrupt Service routine
1249 *
1250 *********************************************************************/
1251static void
1252em_intr_fast(void *arg)
1253{
1254	struct adapter	*adapter = arg;
1255	struct ifnet	*ifp;
1256	uint32_t	reg_icr;
1257
1258	ifp = adapter->ifp;
1259
1260	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
1261
1262	/* Hot eject?  */
1263	if (reg_icr == 0xffffffff)
1264		return;
1265
1266	/* Definitely not our interrupt.  */
1267	if (reg_icr == 0x0)
1268		return;
1269
1270	/*
1271	 * Starting with the 82571 chip, bit 31 should be used to
1272	 * determine whether the interrupt belongs to us.
1273	 */
1274	if (adapter->hw.mac_type >= em_82571 &&
1275	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1276		return;
1277
1278	/*
1279	 * Mask interrupts until the taskqueue is finished running.  This is
1280	 * cheap, just assume that it is needed.  This also works around the
1281	 * MSI message reordering errata on certain systems.
1282	 */
1283	em_disable_intr(adapter);
1284	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1285
1286	/* Link status change */
1287	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1288		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1289
1290	if (reg_icr & E1000_ICR_RXO)
1291		adapter->rx_overruns++;
1292}
1293#endif /* ! DEVICE_POLLING */
1294
1295/*********************************************************************
1296 *
1297 *  Media Ioctl callback
1298 *
1299 *  This routine is called whenever the user queries the status of
1300 *  the interface using ifconfig.
1301 *
1302 **********************************************************************/
1303static void
1304em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1305{
1306	struct adapter *adapter = ifp->if_softc;
1307
1308	INIT_DEBUGOUT("em_media_status: begin");
1309
1310	em_check_for_link(&adapter->hw);
1311	em_update_link_status(adapter);
1312
1313	ifmr->ifm_status = IFM_AVALID;
1314	ifmr->ifm_active = IFM_ETHER;
1315
1316	if (!adapter->link_active)
1317		return;
1318
1319	ifmr->ifm_status |= IFM_ACTIVE;
1320
1321	if (adapter->hw.media_type == em_media_type_fiber) {
1322		ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1323	} else {
1324		switch (adapter->link_speed) {
1325		case 10:
1326			ifmr->ifm_active |= IFM_10_T;
1327			break;
1328		case 100:
1329			ifmr->ifm_active |= IFM_100_TX;
1330			break;
1331		case 1000:
1332			ifmr->ifm_active |= IFM_1000_T;
1333			break;
1334		}
1335		if (adapter->link_duplex == FULL_DUPLEX)
1336			ifmr->ifm_active |= IFM_FDX;
1337		else
1338			ifmr->ifm_active |= IFM_HDX;
1339	}
1340}
1341
1342/*********************************************************************
1343 *
1344 *  Media Ioctl callback
1345 *
1346 *  This routine is called when the user changes speed/duplex using
1347 *  media/mediopt option with ifconfig.
1348 *
1349 **********************************************************************/
1350static int
1351em_media_change(struct ifnet *ifp)
1352{
1353	struct adapter *adapter = ifp->if_softc;
1354	struct ifmedia  *ifm = &adapter->media;
1355
1356	INIT_DEBUGOUT("em_media_change: begin");
1357
1358	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1359		return (EINVAL);
1360
1361	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1362	case IFM_AUTO:
1363		adapter->hw.autoneg = DO_AUTO_NEG;
1364		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1365		break;
1366	case IFM_1000_SX:
1367	case IFM_1000_T:
1368		adapter->hw.autoneg = DO_AUTO_NEG;
1369		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
1370		break;
1371	case IFM_100_TX:
1372		adapter->hw.autoneg = FALSE;
1373		adapter->hw.autoneg_advertised = 0;
1374		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1375			adapter->hw.forced_speed_duplex = em_100_full;
1376		else
1377			adapter->hw.forced_speed_duplex = em_100_half;
1378		break;
1379	case IFM_10_T:
1380		adapter->hw.autoneg = FALSE;
1381		adapter->hw.autoneg_advertised = 0;
1382		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1383			adapter->hw.forced_speed_duplex = em_10_full;
1384		else
1385			adapter->hw.forced_speed_duplex = em_10_half;
1386		break;
1387	default:
1388		device_printf(adapter->dev, "Unsupported media type\n");
1389	}
1390
1391	/* As the speed/duplex settings my have changed we need to
1392	 * reset the PHY.
1393	 */
1394	adapter->hw.phy_reset_disable = FALSE;
1395
1396	em_init(adapter);
1397
1398	return (0);
1399}
1400
1401/*********************************************************************
1402 *
1403 *  This routine maps the mbufs to tx descriptors.
1404 *
1405 *  return 0 on success, positive on failure
1406 **********************************************************************/
1407static int
1408em_encap(struct adapter *adapter, struct mbuf **m_headp)
1409{
1410	struct ifnet		*ifp = adapter->ifp;
1411	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1412	bus_dmamap_t		map;
1413	struct em_buffer	*tx_buffer, *tx_buffer_last;
1414	struct em_tx_desc	*current_tx_desc;
1415	struct mbuf		*m_head;
1416	struct m_tag		*mtag;
1417	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1418	int			nsegs, i, j;
1419	int			error;
1420
1421	m_head = *m_headp;
1422	current_tx_desc = NULL;
1423	txd_used = txd_saved = 0;
1424
1425	/*
1426	 * Force a cleanup if number of TX descriptors
1427	 * available hits the threshold.
1428	 */
1429	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1430		em_txeof(adapter);
1431		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1432			adapter->no_tx_desc_avail1++;
1433			return (ENOBUFS);
1434		}
1435	}
1436
1437	/* Find out if we are in vlan mode. */
1438	mtag = VLAN_OUTPUT_TAG(ifp, m_head);
1439
1440	/*
1441	 * When operating in promiscuous mode, hardware encapsulation for
1442	 * packets is disabled.  This means we have to add the vlan
1443	 * encapsulation in the driver, since it will have come down from the
1444	 * VLAN layer with a tag instead of a VLAN header.
1445	 */
1446	if (mtag != NULL && adapter->em_insert_vlan_header) {
1447		struct ether_vlan_header *evl;
1448		struct ether_header eh;
1449
1450		m_head = m_pullup(m_head, sizeof(eh));
1451		if (m_head == NULL) {
1452			*m_headp = NULL;
1453			return (ENOBUFS);
1454		}
1455		eh = *mtod(m_head, struct ether_header *);
1456		M_PREPEND(m_head, sizeof(*evl), M_DONTWAIT);
1457		if (m_head == NULL) {
1458			*m_headp = NULL;
1459			return (ENOBUFS);
1460		}
1461		m_head = m_pullup(m_head, sizeof(*evl));
1462		if (m_head == NULL) {
1463			*m_headp = NULL;
1464			return (ENOBUFS);
1465		}
1466		evl = mtod(m_head, struct ether_vlan_header *);
1467		bcopy(&eh, evl, sizeof(*evl));
1468		evl->evl_proto = evl->evl_encap_proto;
1469		evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1470		evl->evl_tag = htons(VLAN_TAG_VALUE(mtag));
1471		m_tag_delete(m_head, mtag);
1472		mtag = NULL;
1473		*m_headp = m_head;
1474	}
1475
1476	/*
1477	 * Map the packet for DMA.
1478	 */
1479	tx_buffer = &adapter->tx_buffer_area[adapter->next_avail_tx_desc];
1480	tx_buffer_last = tx_buffer;
1481	map = tx_buffer->map;
1482	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, m_head, segs, &nsegs,
1483	    BUS_DMA_NOWAIT);
1484	if (error != 0) {
1485		adapter->no_tx_dma_setup++;
1486		return (error);
1487	}
1488	KASSERT(nsegs != 0, ("em_encap: empty packet"));
1489
1490	if (nsegs > adapter->num_tx_desc_avail) {
1491		adapter->no_tx_desc_avail2++;
1492		error = ENOBUFS;
1493		goto encap_fail;
1494	}
1495
1496	if (ifp->if_hwassist > 0)
1497		em_transmit_checksum_setup(adapter,  m_head, &txd_upper, &txd_lower);
1498	else
1499		txd_upper = txd_lower = 0;
1500
1501	i = adapter->next_avail_tx_desc;
1502	if (adapter->pcix_82544) {
1503		txd_saved = i;
1504		txd_used = 0;
1505	}
1506	for (j = 0; j < nsegs; j++) {
1507		/* If adapter is 82544 and on PCIX bus. */
1508		if(adapter->pcix_82544) {
1509			DESC_ARRAY	desc_array;
1510			uint32_t	array_elements, counter;
1511
1512			/*
1513			 * Check the Address and Length combination and
1514			 * split the data accordingly
1515			 */
1516			array_elements = em_fill_descriptors(segs[j].ds_addr,
1517			    segs[j].ds_len, &desc_array);
1518			for (counter = 0; counter < array_elements; counter++) {
1519				if (txd_used == adapter->num_tx_desc_avail) {
1520					adapter->next_avail_tx_desc = txd_saved;
1521					adapter->no_tx_desc_avail2++;
1522					error = ENOBUFS;
1523					goto encap_fail;
1524				}
1525				tx_buffer = &adapter->tx_buffer_area[i];
1526				current_tx_desc = &adapter->tx_desc_base[i];
1527				current_tx_desc->buffer_addr = htole64(
1528					desc_array.descriptor[counter].address);
1529				current_tx_desc->lower.data = htole32(
1530					(adapter->txd_cmd | txd_lower |
1531					(uint16_t)desc_array.descriptor[counter].length));
1532				current_tx_desc->upper.data = htole32((txd_upper));
1533				if (++i == adapter->num_tx_desc)
1534					i = 0;
1535
1536				tx_buffer->m_head = NULL;
1537				txd_used++;
1538			}
1539		} else {
1540			tx_buffer = &adapter->tx_buffer_area[i];
1541			current_tx_desc = &adapter->tx_desc_base[i];
1542
1543			current_tx_desc->buffer_addr = htole64(segs[j].ds_addr);
1544			current_tx_desc->lower.data = htole32(
1545				adapter->txd_cmd | txd_lower | segs[j].ds_len);
1546			current_tx_desc->upper.data = htole32(txd_upper);
1547
1548			if (++i == adapter->num_tx_desc)
1549				i = 0;
1550
1551			tx_buffer->m_head = NULL;
1552		}
1553	}
1554
1555	adapter->next_avail_tx_desc = i;
1556	if (adapter->pcix_82544)
1557		adapter->num_tx_desc_avail -= txd_used;
1558	else
1559		adapter->num_tx_desc_avail -= nsegs;
1560
1561	if (mtag != NULL) {
1562		/* Set the vlan id. */
1563		current_tx_desc->upper.fields.special =
1564		    htole16(VLAN_TAG_VALUE(mtag));
1565
1566		/* Tell hardware to add tag. */
1567		current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_VLE);
1568	}
1569
1570	tx_buffer->m_head = m_head;
1571	tx_buffer_last->map = tx_buffer->map;
1572	tx_buffer->map = map;
1573	bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1574
1575	/*
1576	 * Last Descriptor of Packet needs End Of Packet (EOP).
1577	 */
1578	current_tx_desc->lower.data |= htole32(E1000_TXD_CMD_EOP);
1579
1580	/*
1581	 * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000
1582	 * that this frame is available to transmit.
1583	 */
1584	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1585	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1586	if (adapter->hw.mac_type == em_82547 && adapter->link_duplex == HALF_DUPLEX)
1587		em_82547_move_tail_locked(adapter);
1588	else {
1589		E1000_WRITE_REG(&adapter->hw, TDT, i);
1590		if (adapter->hw.mac_type == em_82547)
1591			em_82547_update_fifo_head(adapter, m_head->m_pkthdr.len);
1592	}
1593
1594	return (0);
1595
1596encap_fail:
1597	bus_dmamap_unload(adapter->txtag, map);
1598	return (error);
1599}
1600
1601/*********************************************************************
1602 *
1603 * 82547 workaround to avoid controller hang in half-duplex environment.
1604 * The workaround is to avoid queuing a large packet that would span
1605 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1606 * in this case. We do that only when FIFO is quiescent.
1607 *
1608 **********************************************************************/
1609static void
1610em_82547_move_tail_locked(struct adapter *adapter)
1611{
1612	uint16_t hw_tdt;
1613	uint16_t sw_tdt;
1614	struct em_tx_desc *tx_desc;
1615	uint16_t length = 0;
1616	boolean_t eop = 0;
1617
1618	EM_LOCK_ASSERT(adapter);
1619
1620	hw_tdt = E1000_READ_REG(&adapter->hw, TDT);
1621	sw_tdt = adapter->next_avail_tx_desc;
1622
1623	while (hw_tdt != sw_tdt) {
1624		tx_desc = &adapter->tx_desc_base[hw_tdt];
1625		length += tx_desc->lower.flags.length;
1626		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1627		if(++hw_tdt == adapter->num_tx_desc)
1628			hw_tdt = 0;
1629
1630		if (eop) {
1631			if (em_82547_fifo_workaround(adapter, length)) {
1632				adapter->tx_fifo_wrk_cnt++;
1633				callout_reset(&adapter->tx_fifo_timer, 1,
1634					em_82547_move_tail, adapter);
1635				break;
1636			}
1637			E1000_WRITE_REG(&adapter->hw, TDT, hw_tdt);
1638			em_82547_update_fifo_head(adapter, length);
1639			length = 0;
1640		}
1641	}
1642}
1643
1644static void
1645em_82547_move_tail(void *arg)
1646{
1647	struct adapter *adapter = arg;
1648
1649	EM_LOCK(adapter);
1650	em_82547_move_tail_locked(adapter);
1651	EM_UNLOCK(adapter);
1652}
1653
1654static int
1655em_82547_fifo_workaround(struct adapter *adapter, int len)
1656{
1657	int fifo_space, fifo_pkt_len;
1658
1659	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1660
1661	if (adapter->link_duplex == HALF_DUPLEX) {
1662		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1663
1664		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1665			if (em_82547_tx_fifo_reset(adapter))
1666				return (0);
1667			else
1668				return (1);
1669		}
1670	}
1671
1672	return (0);
1673}
1674
1675static void
1676em_82547_update_fifo_head(struct adapter *adapter, int len)
1677{
1678	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1679
1680	/* tx_fifo_head is always 16 byte aligned */
1681	adapter->tx_fifo_head += fifo_pkt_len;
1682	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1683		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1684	}
1685}
1686
1687
1688static int
1689em_82547_tx_fifo_reset(struct adapter *adapter)
1690{
1691	uint32_t tctl;
1692
1693	if ((E1000_READ_REG(&adapter->hw, TDT) == E1000_READ_REG(&adapter->hw, TDH)) &&
1694	    (E1000_READ_REG(&adapter->hw, TDFT) == E1000_READ_REG(&adapter->hw, TDFH)) &&
1695	    (E1000_READ_REG(&adapter->hw, TDFTS) == E1000_READ_REG(&adapter->hw, TDFHS))&&
1696	    (E1000_READ_REG(&adapter->hw, TDFPC) == 0)) {
1697
1698		/* Disable TX unit */
1699		tctl = E1000_READ_REG(&adapter->hw, TCTL);
1700		E1000_WRITE_REG(&adapter->hw, TCTL, tctl & ~E1000_TCTL_EN);
1701
1702		/* Reset FIFO pointers */
1703		E1000_WRITE_REG(&adapter->hw, TDFT,  adapter->tx_head_addr);
1704		E1000_WRITE_REG(&adapter->hw, TDFH,  adapter->tx_head_addr);
1705		E1000_WRITE_REG(&adapter->hw, TDFTS, adapter->tx_head_addr);
1706		E1000_WRITE_REG(&adapter->hw, TDFHS, adapter->tx_head_addr);
1707
1708		/* Re-enable TX unit */
1709		E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
1710		E1000_WRITE_FLUSH(&adapter->hw);
1711
1712		adapter->tx_fifo_head = 0;
1713		adapter->tx_fifo_reset_cnt++;
1714
1715		return (TRUE);
1716	}
1717	else {
1718		return (FALSE);
1719	}
1720}
1721
1722static void
1723em_set_promisc(struct adapter *adapter)
1724{
1725	struct ifnet	*ifp = adapter->ifp;
1726	uint32_t	reg_rctl;
1727
1728	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1729
1730	if (ifp->if_flags & IFF_PROMISC) {
1731		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1732		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1733		/* Disable VLAN stripping in promiscous mode
1734		 * This enables bridging of vlan tagged frames to occur
1735		 * and also allows vlan tags to be seen in tcpdump
1736		 */
1737		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1738			em_disable_vlans(adapter);
1739		adapter->em_insert_vlan_header = 1;
1740	} else if (ifp->if_flags & IFF_ALLMULTI) {
1741		reg_rctl |= E1000_RCTL_MPE;
1742		reg_rctl &= ~E1000_RCTL_UPE;
1743		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1744		adapter->em_insert_vlan_header = 0;
1745	} else
1746		adapter->em_insert_vlan_header = 0;
1747}
1748
1749static void
1750em_disable_promisc(struct adapter *adapter)
1751{
1752	struct ifnet	*ifp = adapter->ifp;
1753	uint32_t	reg_rctl;
1754
1755	reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1756
1757	reg_rctl &=  (~E1000_RCTL_UPE);
1758	reg_rctl &=  (~E1000_RCTL_MPE);
1759	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1760
1761	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1762		em_enable_vlans(adapter);
1763	adapter->em_insert_vlan_header = 0;
1764}
1765
1766
1767/*********************************************************************
1768 *  Multicast Update
1769 *
1770 *  This routine is called whenever multicast address list is updated.
1771 *
1772 **********************************************************************/
1773
1774static void
1775em_set_multi(struct adapter *adapter)
1776{
1777	struct ifnet	*ifp = adapter->ifp;
1778	struct ifmultiaddr *ifma;
1779	uint32_t reg_rctl = 0;
1780	uint8_t  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_LENGTH_OF_ADDRESS];
1781	int mcnt = 0;
1782
1783	IOCTL_DEBUGOUT("em_set_multi: begin");
1784
1785	if (adapter->hw.mac_type == em_82542_rev2_0) {
1786		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1787		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1788			em_pci_clear_mwi(&adapter->hw);
1789		reg_rctl |= E1000_RCTL_RST;
1790		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1791		msec_delay(5);
1792	}
1793
1794	IF_ADDR_LOCK(ifp);
1795	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1796		if (ifma->ifma_addr->sa_family != AF_LINK)
1797			continue;
1798
1799		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1800			break;
1801
1802		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1803		    &mta[mcnt*ETH_LENGTH_OF_ADDRESS], ETH_LENGTH_OF_ADDRESS);
1804		mcnt++;
1805	}
1806	IF_ADDR_UNLOCK(ifp);
1807
1808	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1809		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1810		reg_rctl |= E1000_RCTL_MPE;
1811		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1812	} else
1813		em_mc_addr_list_update(&adapter->hw, mta, mcnt, 0, 1);
1814
1815	if (adapter->hw.mac_type == em_82542_rev2_0) {
1816		reg_rctl = E1000_READ_REG(&adapter->hw, RCTL);
1817		reg_rctl &= ~E1000_RCTL_RST;
1818		E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
1819		msec_delay(5);
1820		if (adapter->hw.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1821			em_pci_set_mwi(&adapter->hw);
1822	}
1823}
1824
1825
1826/*********************************************************************
1827 *  Timer routine
1828 *
1829 *  This routine checks for link status and updates statistics.
1830 *
1831 **********************************************************************/
1832
1833static void
1834em_local_timer(void *arg)
1835{
1836	struct adapter	*adapter = arg;
1837	struct ifnet	*ifp = adapter->ifp;
1838
1839	EM_LOCK(adapter);
1840
1841	em_check_for_link(&adapter->hw);
1842	em_update_link_status(adapter);
1843	em_update_stats_counters(adapter);
1844	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1845		em_print_hw_stats(adapter);
1846	em_smartspeed(adapter);
1847
1848	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1849
1850	EM_UNLOCK(adapter);
1851}
1852
1853static void
1854em_update_link_status(struct adapter *adapter)
1855{
1856	struct ifnet *ifp = adapter->ifp;
1857	device_t dev = adapter->dev;
1858
1859	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU) {
1860		if (adapter->link_active == 0) {
1861			em_get_speed_and_duplex(&adapter->hw, &adapter->link_speed,
1862			    &adapter->link_duplex);
1863			/* Check if we may set SPEED_MODE bit on PCI-E */
1864			if ((adapter->link_speed == SPEED_1000) &&
1865			    ((adapter->hw.mac_type == em_82571) ||
1866			    (adapter->hw.mac_type == em_82572))) {
1867				int tarc0;
1868
1869				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
1870				tarc0 |= SPEED_MODE_BIT;
1871				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
1872			}
1873			if (bootverbose)
1874				device_printf(dev, "Link is up %d Mbps %s\n",
1875				    adapter->link_speed,
1876				    ((adapter->link_duplex == FULL_DUPLEX) ?
1877				    "Full Duplex" : "Half Duplex"));
1878			adapter->link_active = 1;
1879			adapter->smartspeed = 0;
1880			ifp->if_baudrate = adapter->link_speed * 1000000;
1881			if_link_state_change(ifp, LINK_STATE_UP);
1882		}
1883	} else {
1884		if (adapter->link_active == 1) {
1885			ifp->if_baudrate = adapter->link_speed = 0;
1886			adapter->link_duplex = 0;
1887			if (bootverbose)
1888				device_printf(dev, "Link is Down\n");
1889			adapter->link_active = 0;
1890			if_link_state_change(ifp, LINK_STATE_DOWN);
1891		}
1892	}
1893}
1894
1895/*********************************************************************
1896 *
1897 *  This routine disables all traffic on the adapter by issuing a
1898 *  global reset on the MAC and deallocates TX/RX buffers.
1899 *
1900 **********************************************************************/
1901
1902static void
1903em_stop(void *arg)
1904{
1905	struct adapter	*adapter = arg;
1906	struct ifnet	*ifp = adapter->ifp;
1907
1908	EM_LOCK_ASSERT(adapter);
1909
1910	INIT_DEBUGOUT("em_stop: begin");
1911
1912	em_disable_intr(adapter);
1913	em_reset_hw(&adapter->hw);
1914	callout_stop(&adapter->timer);
1915	callout_stop(&adapter->tx_fifo_timer);
1916	em_free_transmit_structures(adapter);
1917	em_free_receive_structures(adapter);
1918
1919	/* Tell the stack that the interface is no longer active */
1920	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1921}
1922
1923
1924/*********************************************************************
1925 *
1926 *  Determine hardware revision.
1927 *
1928 **********************************************************************/
1929static void
1930em_identify_hardware(struct adapter *adapter)
1931{
1932	device_t dev = adapter->dev;
1933
1934	/* Make sure our PCI config space has the necessary stuff set */
1935	adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1936	if ((adapter->hw.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
1937	    (adapter->hw.pci_cmd_word & PCIM_CMD_MEMEN)) {
1938		device_printf(dev, "Memory Access and/or Bus Master bits "
1939		    "were not set!\n");
1940		adapter->hw.pci_cmd_word |=
1941		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1942		pci_write_config(dev, PCIR_COMMAND, adapter->hw.pci_cmd_word, 2);
1943	}
1944
1945	/* Save off the information about this board */
1946	adapter->hw.vendor_id = pci_get_vendor(dev);
1947	adapter->hw.device_id = pci_get_device(dev);
1948	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1949	adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
1950	adapter->hw.subsystem_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
1951
1952	/* Identify the MAC */
1953	if (em_set_mac_type(&adapter->hw))
1954		device_printf(dev, "Unknown MAC Type\n");
1955
1956	if(adapter->hw.mac_type == em_82541 || adapter->hw.mac_type == em_82541_rev_2 ||
1957	   adapter->hw.mac_type == em_82547 || adapter->hw.mac_type == em_82547_rev_2)
1958		adapter->hw.phy_init_script = TRUE;
1959}
1960
1961static int
1962em_allocate_pci_resources(struct adapter *adapter)
1963{
1964	device_t	dev = adapter->dev;
1965	int		val, rid;
1966
1967	rid = PCIR_BAR(0);
1968	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1969	    &rid, RF_ACTIVE);
1970	if (adapter->res_memory == NULL) {
1971		device_printf(dev, "Unable to allocate bus resource: memory\n");
1972		return (ENXIO);
1973	}
1974	adapter->osdep.mem_bus_space_tag =
1975	rman_get_bustag(adapter->res_memory);
1976	adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory);
1977	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1978
1979	if (adapter->hw.mac_type > em_82543) {
1980		/* Figure our where our IO BAR is ? */
1981		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
1982			val = pci_read_config(dev, rid, 4);
1983			if (E1000_BAR_TYPE(val) == E1000_BAR_TYPE_IO) {
1984				adapter->io_rid = rid;
1985				break;
1986			}
1987			rid += 4;
1988			/* check for 64bit BAR */
1989			if (E1000_BAR_MEM_TYPE(val) == E1000_BAR_MEM_TYPE_64BIT)
1990				rid += 4;
1991		}
1992		if (rid >= PCIR_CIS) {
1993			device_printf(dev, "Unable to locate IO BAR\n");
1994			return (ENXIO);
1995		}
1996		adapter->res_ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT,
1997		    &adapter->io_rid, RF_ACTIVE);
1998		if (adapter->res_ioport == NULL) {
1999			device_printf(dev, "Unable to allocate bus resource: "
2000			    "ioport\n");
2001			return (ENXIO);
2002		}
2003		adapter->hw.io_base = 0;
2004		adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->res_ioport);
2005		adapter->osdep.io_bus_space_handle =
2006		    rman_get_bushandle(adapter->res_ioport);
2007	}
2008
2009	/* For ICH8 we need to find the flash memory. */
2010	if (adapter->hw.mac_type == em_ich8lan) {
2011		rid = EM_FLASH;
2012
2013		adapter->flash_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2014		    &rid, RF_ACTIVE);
2015		adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash_mem);
2016		adapter->osdep.flash_bus_space_handle =
2017		    rman_get_bushandle(adapter->flash_mem);
2018	}
2019
2020	rid = 0x0;
2021	adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2022	    RF_SHAREABLE | RF_ACTIVE);
2023	if (adapter->res_interrupt == NULL) {
2024		device_printf(dev, "Unable to allocate bus resource: "
2025		    "interrupt\n");
2026		return (ENXIO);
2027	}
2028
2029	adapter->hw.back = &adapter->osdep;
2030
2031	return (0);
2032}
2033
2034int
2035em_allocate_intr(struct adapter *adapter)
2036{
2037	device_t dev = adapter->dev;
2038	int error;
2039
2040	/* Manually turn off all interrupts */
2041	E1000_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
2042
2043#ifdef DEVICE_POLLING
2044	if (adapter->int_handler_tag == NULL && (error = bus_setup_intr(dev,
2045	    adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter,
2046	    &adapter->int_handler_tag)) != 0) {
2047		device_printf(dev, "Failed to register interrupt handler");
2048		return (error);
2049	}
2050#else
2051	/*
2052	 * Try allocating a fast interrupt and the associated deferred
2053	 * processing contexts.
2054	 */
2055	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2056	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2057	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2058	    taskqueue_thread_enqueue, &adapter->tq);
2059	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2060	    device_get_nameunit(adapter->dev));
2061	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2062	    INTR_TYPE_NET | INTR_FAST, em_intr_fast, adapter,
2063	    &adapter->int_handler_tag)) != 0) {
2064		device_printf(dev, "Failed to register fast interrupt "
2065			    "handler: %d\n", error);
2066		taskqueue_free(adapter->tq);
2067		adapter->tq = NULL;
2068		return (error);
2069	}
2070#endif
2071
2072	em_enable_intr(adapter);
2073	return (0);
2074}
2075
2076static void
2077em_free_intr(struct adapter *adapter)
2078{
2079	device_t dev = adapter->dev;
2080
2081	if (adapter->res_interrupt != NULL) {
2082		bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag);
2083		adapter->int_handler_tag = NULL;
2084	}
2085	if (adapter->tq != NULL) {
2086		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2087		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2088		taskqueue_free(adapter->tq);
2089		adapter->tq = NULL;
2090	}
2091}
2092
2093static void
2094em_free_pci_resources(struct adapter *adapter)
2095{
2096	device_t dev = adapter->dev;
2097
2098	if (adapter->res_interrupt != NULL)
2099		bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt);
2100
2101	if (adapter->res_memory != NULL)
2102		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
2103		    adapter->res_memory);
2104
2105	if (adapter->flash_mem != NULL)
2106		bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH,
2107		    adapter->flash_mem);
2108
2109	if (adapter->res_ioport != NULL)
2110		bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid,
2111		    adapter->res_ioport);
2112}
2113
2114/*********************************************************************
2115 *
2116 *  Initialize the hardware to a configuration as specified by the
2117 *  adapter structure. The controller is reset, the EEPROM is
2118 *  verified, the MAC address is set, then the shared initialization
2119 *  routines are called.
2120 *
2121 **********************************************************************/
2122static int
2123em_hardware_init(struct adapter *adapter)
2124{
2125	device_t dev = adapter->dev;
2126	uint16_t rx_buffer_size;
2127
2128	INIT_DEBUGOUT("em_hardware_init: begin");
2129	/* Issue a global reset */
2130	em_reset_hw(&adapter->hw);
2131
2132	/* When hardware is reset, fifo_head is also reset */
2133	adapter->tx_fifo_head = 0;
2134
2135	/* Make sure we have a good EEPROM before we read from it */
2136	if (em_validate_eeprom_checksum(&adapter->hw) < 0) {
2137		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
2138		return (EIO);
2139	}
2140
2141	if (em_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
2142		device_printf(dev, "EEPROM read error while reading part "
2143		    "number\n");
2144		return (EIO);
2145	}
2146
2147	/* Set up smart power down as default off on newer adapters. */
2148	if (!em_smart_pwr_down &&
2149	    (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572)) {
2150		uint16_t phy_tmp = 0;
2151
2152		/* Speed up time to link by disabling smart power down. */
2153		em_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2154		phy_tmp &= ~IGP02E1000_PM_SPD;
2155		em_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2156	}
2157
2158	/*
2159	 * These parameters control the automatic generation (Tx) and
2160	 * response (Rx) to Ethernet PAUSE frames.
2161	 * - High water mark should allow for at least two frames to be
2162	 *   received after sending an XOFF.
2163	 * - Low water mark works best when it is very near the high water mark.
2164	 *   This allows the receiver to restart by sending XON when it has
2165	 *   drained a bit. Here we use an arbitary value of 1500 which will
2166	 *   restart after one full frame is pulled from the buffer. There
2167	 *   could be several smaller frames in the buffer and if so they will
2168	 *   not trigger the XON until their total number reduces the buffer
2169	 *   by 1500.
2170	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2171	 */
2172	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff) << 10 );
2173
2174	adapter->hw.fc_high_water = rx_buffer_size -
2175	    roundup2(adapter->hw.max_frame_size, 1024);
2176	adapter->hw.fc_low_water = adapter->hw.fc_high_water - 1500;
2177	if (adapter->hw.mac_type == em_80003es2lan)
2178		adapter->hw.fc_pause_time = 0xFFFF;
2179	else
2180		adapter->hw.fc_pause_time = 0x1000;
2181	adapter->hw.fc_send_xon = TRUE;
2182	adapter->hw.fc = em_fc_full;
2183
2184	if (em_init_hw(&adapter->hw) < 0) {
2185		device_printf(dev, "Hardware Initialization Failed");
2186		return (EIO);
2187	}
2188
2189	em_check_for_link(&adapter->hw);
2190
2191	return (0);
2192}
2193
2194/*********************************************************************
2195 *
2196 *  Setup networking device structure and register an interface.
2197 *
2198 **********************************************************************/
2199static void
2200em_setup_interface(device_t dev, struct adapter *adapter)
2201{
2202	struct ifnet   *ifp;
2203	INIT_DEBUGOUT("em_setup_interface: begin");
2204
2205	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2206	if (ifp == NULL)
2207		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2208	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2209	ifp->if_mtu = ETHERMTU;
2210	ifp->if_init =  em_init;
2211	ifp->if_softc = adapter;
2212	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2213	ifp->if_ioctl = em_ioctl;
2214	ifp->if_start = em_start;
2215	ifp->if_watchdog = em_watchdog;
2216	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2217	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2218	IFQ_SET_READY(&ifp->if_snd);
2219
2220	ether_ifattach(ifp, adapter->hw.mac_addr);
2221
2222	ifp->if_capabilities = ifp->if_capenable = 0;
2223
2224	if (adapter->hw.mac_type >= em_82543) {
2225		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2226		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2227	}
2228
2229	/*
2230	 * Tell the upper layer(s) we support long frames.
2231	 */
2232	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2233	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2234	ifp->if_capenable |= IFCAP_VLAN_MTU;
2235
2236#ifdef DEVICE_POLLING
2237	ifp->if_capabilities |= IFCAP_POLLING;
2238#endif
2239
2240	/*
2241	 * Specify the media types supported by this adapter and register
2242	 * callbacks to update media and link information
2243	 */
2244	ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status);
2245	if (adapter->hw.media_type == em_media_type_fiber) {
2246		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2247			    0, NULL);
2248		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX,
2249			    0, NULL);
2250	} else {
2251		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2252		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2253			    0, NULL);
2254		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2255			    0, NULL);
2256		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2257			    0, NULL);
2258		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
2259			    0, NULL);
2260		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL);
2261	}
2262	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2263	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2264}
2265
2266
2267/*********************************************************************
2268 *
2269 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2270 *
2271 **********************************************************************/
2272static void
2273em_smartspeed(struct adapter *adapter)
2274{
2275	uint16_t phy_tmp;
2276
2277	if (adapter->link_active || (adapter->hw.phy_type != em_phy_igp) ||
2278	    adapter->hw.autoneg == 0 ||
2279	    (adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2280		return;
2281
2282	if (adapter->smartspeed == 0) {
2283		/* If Master/Slave config fault is asserted twice,
2284		 * we assume back-to-back */
2285		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2286		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2287			return;
2288		em_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2289		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2290			em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2291			if(phy_tmp & CR_1000T_MS_ENABLE) {
2292				phy_tmp &= ~CR_1000T_MS_ENABLE;
2293				em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2294				    phy_tmp);
2295				adapter->smartspeed++;
2296				if(adapter->hw.autoneg &&
2297				   !em_phy_setup_autoneg(&adapter->hw) &&
2298				   !em_read_phy_reg(&adapter->hw, PHY_CTRL,
2299				    &phy_tmp)) {
2300					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2301						    MII_CR_RESTART_AUTO_NEG);
2302					em_write_phy_reg(&adapter->hw, PHY_CTRL,
2303					    phy_tmp);
2304				}
2305			}
2306		}
2307		return;
2308	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2309		/* If still no link, perhaps using 2/3 pair cable */
2310		em_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2311		phy_tmp |= CR_1000T_MS_ENABLE;
2312		em_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2313		if(adapter->hw.autoneg &&
2314		   !em_phy_setup_autoneg(&adapter->hw) &&
2315		   !em_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_tmp)) {
2316			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2317				    MII_CR_RESTART_AUTO_NEG);
2318			em_write_phy_reg(&adapter->hw, PHY_CTRL, phy_tmp);
2319		}
2320	}
2321	/* Restart process after EM_SMARTSPEED_MAX iterations */
2322	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2323		adapter->smartspeed = 0;
2324}
2325
2326
2327/*
2328 * Manage DMA'able memory.
2329 */
2330static void
2331em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2332{
2333	if (error)
2334		return;
2335	*(bus_addr_t *) arg = segs[0].ds_addr;
2336}
2337
2338static int
2339em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma,
2340	int mapflags)
2341{
2342	int error;
2343
2344	error = bus_dma_tag_create(NULL,		/* parent */
2345				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2346				BUS_SPACE_MAXADDR,	/* lowaddr */
2347				BUS_SPACE_MAXADDR,	/* highaddr */
2348				NULL, NULL,		/* filter, filterarg */
2349				size,			/* maxsize */
2350				1,			/* nsegments */
2351				size,			/* maxsegsize */
2352				0,			/* flags */
2353				NULL,			/* lockfunc */
2354				NULL,			/* lockarg */
2355				&dma->dma_tag);
2356	if (error) {
2357		device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n",
2358		    __func__, error);
2359		goto fail_0;
2360	}
2361
2362	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2363	    BUS_DMA_NOWAIT, &dma->dma_map);
2364	if (error) {
2365		device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2366		    __func__, (uintmax_t)size, error);
2367		goto fail_2;
2368	}
2369
2370	dma->dma_paddr = 0;
2371	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2372	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2373	if (error || dma->dma_paddr == 0) {
2374		device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n",
2375		    __func__, error);
2376		goto fail_3;
2377	}
2378
2379	return (0);
2380
2381fail_3:
2382	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2383fail_2:
2384	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2385	bus_dma_tag_destroy(dma->dma_tag);
2386fail_0:
2387	dma->dma_map = NULL;
2388	dma->dma_tag = NULL;
2389
2390	return (error);
2391}
2392
2393static void
2394em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2395{
2396	if (dma->dma_tag == NULL)
2397		return;
2398	if (dma->dma_map != NULL) {
2399		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2400		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2401		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2402		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2403		dma->dma_map = NULL;
2404	}
2405	bus_dma_tag_destroy(dma->dma_tag);
2406	dma->dma_tag = NULL;
2407}
2408
2409
2410/*********************************************************************
2411 *
2412 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2413 *  the information needed to transmit a packet on the wire.
2414 *
2415 **********************************************************************/
2416static int
2417em_allocate_transmit_structures(struct adapter *adapter)
2418{
2419	adapter->tx_buffer_area =  malloc(sizeof(struct em_buffer) *
2420	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT);
2421	if (adapter->tx_buffer_area == NULL) {
2422		device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n");
2423		return (ENOMEM);
2424	}
2425
2426	bzero(adapter->tx_buffer_area, sizeof(struct em_buffer) * adapter->num_tx_desc);
2427
2428	return (0);
2429}
2430
2431/*********************************************************************
2432 *
2433 *  Allocate and initialize transmit structures.
2434 *
2435 **********************************************************************/
2436static int
2437em_setup_transmit_structures(struct adapter *adapter)
2438{
2439	device_t dev = adapter->dev;
2440	struct em_buffer *tx_buffer;
2441	bus_size_t size;
2442	int error, i;
2443
2444	/*
2445	 * Setup DMA descriptor areas.
2446	 */
2447	size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
2448	if ((error = bus_dma_tag_create(NULL,		/* parent */
2449				1, 0,			/* alignment, bounds */
2450				BUS_SPACE_MAXADDR,	/* lowaddr */
2451				BUS_SPACE_MAXADDR,	/* highaddr */
2452				NULL, NULL,		/* filter, filterarg */
2453				size,			/* maxsize */
2454				EM_MAX_SCATTER,		/* nsegments */
2455				size,			/* maxsegsize */
2456				0,			/* flags */
2457				NULL,		/* lockfunc */
2458				NULL,		/* lockarg */
2459				&adapter->txtag)) != 0) {
2460		device_printf(dev, "Unable to allocate TX DMA tag\n");
2461		goto fail;
2462	}
2463
2464	if ((error = em_allocate_transmit_structures(adapter)) != 0)
2465		goto fail;
2466
2467	bzero(adapter->tx_desc_base, (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
2468	tx_buffer = adapter->tx_buffer_area;
2469	for (i = 0; i < adapter->num_tx_desc; i++) {
2470		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2471		if (error != 0) {
2472			device_printf(dev, "Unable to create TX DMA map\n");
2473			goto fail;
2474		}
2475		tx_buffer++;
2476	}
2477
2478	adapter->next_avail_tx_desc = 0;
2479	adapter->oldest_used_tx_desc = 0;
2480
2481	/* Set number of descriptors available */
2482	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2483
2484	/* Set checksum context */
2485	adapter->active_checksum_context = OFFLOAD_NONE;
2486	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2487	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2488
2489	return (0);
2490
2491fail:
2492	em_free_transmit_structures(adapter);
2493	return (error);
2494}
2495
2496/*********************************************************************
2497 *
2498 *  Enable transmit unit.
2499 *
2500 **********************************************************************/
2501static void
2502em_initialize_transmit_unit(struct adapter *adapter)
2503{
2504	uint32_t	reg_tctl, reg_tarc;
2505	uint32_t	reg_tipg = 0;
2506	uint64_t	bus_addr;
2507
2508	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
2509	/* Setup the Base and Length of the Tx Descriptor Ring */
2510	bus_addr = adapter->txdma.dma_paddr;
2511	E1000_WRITE_REG(&adapter->hw, TDLEN,
2512	    adapter->num_tx_desc * sizeof(struct em_tx_desc));
2513	E1000_WRITE_REG(&adapter->hw, TDBAH, (uint32_t)(bus_addr >> 32));
2514	E1000_WRITE_REG(&adapter->hw, TDBAL, (uint32_t)bus_addr);
2515
2516	/* Setup the HW Tx Head and Tail descriptor pointers */
2517	E1000_WRITE_REG(&adapter->hw, TDT, 0);
2518	E1000_WRITE_REG(&adapter->hw, TDH, 0);
2519
2520
2521	HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, TDBAL),
2522	    E1000_READ_REG(&adapter->hw, TDLEN));
2523
2524	/* Set the default values for the Tx Inter Packet Gap timer */
2525	switch (adapter->hw.mac_type) {
2526	case em_82542_rev2_0:
2527	case em_82542_rev2_1:
2528		reg_tipg = DEFAULT_82542_TIPG_IPGT;
2529		reg_tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2530		reg_tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2531		break;
2532	case em_80003es2lan:
2533		reg_tipg = DEFAULT_82543_TIPG_IPGR1;
2534		reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
2535		    E1000_TIPG_IPGR2_SHIFT;
2536		break;
2537	default:
2538		if (adapter->hw.media_type == em_media_type_fiber)
2539			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2540		else
2541			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2542		reg_tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2543		reg_tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2544	}
2545
2546	E1000_WRITE_REG(&adapter->hw, TIPG, reg_tipg);
2547	E1000_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay.value);
2548	if(adapter->hw.mac_type >= em_82540)
2549		E1000_WRITE_REG(&adapter->hw, TADV, adapter->tx_abs_int_delay.value);
2550
2551	/* Do adapter specific tweaks before we enable the transmitter. */
2552	if (adapter->hw.mac_type == em_82571 || adapter->hw.mac_type == em_82572) {
2553		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2554		reg_tarc |= (1 << 25);
2555		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2556		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2557		reg_tarc |= (1 << 25);
2558		reg_tarc &= ~(1 << 28);
2559		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2560	} else if (adapter->hw.mac_type == em_80003es2lan) {
2561		reg_tarc = E1000_READ_REG(&adapter->hw, TARC0);
2562		reg_tarc |= 1;
2563		if (adapter->hw.media_type == em_media_type_internal_serdes)
2564		    reg_tarc |= (1 << 20);
2565		E1000_WRITE_REG(&adapter->hw, TARC0, reg_tarc);
2566		reg_tarc = E1000_READ_REG(&adapter->hw, TARC1);
2567		reg_tarc |= 1;
2568		E1000_WRITE_REG(&adapter->hw, TARC1, reg_tarc);
2569	}
2570
2571	/* Program the Transmit Control Register */
2572	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
2573		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2574	if (adapter->hw.mac_type >= em_82571)
2575		reg_tctl |= E1000_TCTL_MULR;
2576	if (adapter->link_duplex == 1) {
2577		reg_tctl |= E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2578	} else {
2579		reg_tctl |= E1000_HDX_COLLISION_DISTANCE << E1000_COLD_SHIFT;
2580	}
2581	/* This write will effectively turn on the transmit unit. */
2582	E1000_WRITE_REG(&adapter->hw, TCTL, reg_tctl);
2583
2584	/* Setup Transmit Descriptor Settings for this adapter */
2585	adapter->txd_cmd = E1000_TXD_CMD_IFCS | E1000_TXD_CMD_RS;
2586
2587	if (adapter->tx_int_delay.value > 0)
2588		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2589}
2590
2591/*********************************************************************
2592 *
2593 *  Free all transmit related data structures.
2594 *
2595 **********************************************************************/
2596static void
2597em_free_transmit_structures(struct adapter *adapter)
2598{
2599	struct em_buffer *tx_buffer;
2600	int i;
2601
2602	INIT_DEBUGOUT("free_transmit_structures: begin");
2603
2604	if (adapter->tx_buffer_area != NULL) {
2605		tx_buffer = adapter->tx_buffer_area;
2606		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2607			if (tx_buffer->m_head != NULL) {
2608				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2609				    BUS_DMASYNC_POSTWRITE);
2610				bus_dmamap_unload(adapter->txtag,
2611				    tx_buffer->map);
2612				m_freem(tx_buffer->m_head);
2613				tx_buffer->m_head = NULL;
2614			} else if (tx_buffer->map != NULL)
2615				bus_dmamap_unload(adapter->txtag,
2616				    tx_buffer->map);
2617			if (tx_buffer->map != NULL) {
2618				bus_dmamap_destroy(adapter->txtag,
2619				    tx_buffer->map);
2620				tx_buffer->map = NULL;
2621			}
2622		}
2623	}
2624	if (adapter->tx_buffer_area != NULL) {
2625		free(adapter->tx_buffer_area, M_DEVBUF);
2626		adapter->tx_buffer_area = NULL;
2627	}
2628	if (adapter->txtag != NULL) {
2629		bus_dma_tag_destroy(adapter->txtag);
2630		adapter->txtag = NULL;
2631	}
2632}
2633
2634/*********************************************************************
2635 *
2636 *  The offload context needs to be set when we transfer the first
2637 *  packet of a particular protocol (TCP/UDP). We change the
2638 *  context only if the protocol type changes.
2639 *
2640 **********************************************************************/
2641static void
2642em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2643    uint32_t *txd_upper, uint32_t *txd_lower)
2644{
2645	struct em_context_desc *TXD;
2646	struct em_buffer *tx_buffer;
2647	int curr_txd;
2648
2649	if (mp->m_pkthdr.csum_flags) {
2650
2651		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
2652			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2653			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2654			if (adapter->active_checksum_context == OFFLOAD_TCP_IP)
2655				return;
2656			else
2657				adapter->active_checksum_context = OFFLOAD_TCP_IP;
2658
2659		} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
2660			*txd_upper = E1000_TXD_POPTS_TXSM << 8;
2661			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
2662			if (adapter->active_checksum_context == OFFLOAD_UDP_IP)
2663				return;
2664			else
2665				adapter->active_checksum_context = OFFLOAD_UDP_IP;
2666		} else {
2667			*txd_upper = 0;
2668			*txd_lower = 0;
2669			return;
2670		}
2671	} else {
2672		*txd_upper = 0;
2673		*txd_lower = 0;
2674		return;
2675	}
2676
2677	/* If we reach this point, the checksum offload context
2678	 * needs to be reset.
2679	 */
2680	curr_txd = adapter->next_avail_tx_desc;
2681	tx_buffer = &adapter->tx_buffer_area[curr_txd];
2682	TXD = (struct em_context_desc *) &adapter->tx_desc_base[curr_txd];
2683
2684	TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
2685	TXD->lower_setup.ip_fields.ipcso =
2686		ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
2687	TXD->lower_setup.ip_fields.ipcse =
2688		htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
2689
2690	TXD->upper_setup.tcp_fields.tucss =
2691		ETHER_HDR_LEN + sizeof(struct ip);
2692	TXD->upper_setup.tcp_fields.tucse = htole16(0);
2693
2694	if (adapter->active_checksum_context == OFFLOAD_TCP_IP) {
2695		TXD->upper_setup.tcp_fields.tucso =
2696			ETHER_HDR_LEN + sizeof(struct ip) +
2697			offsetof(struct tcphdr, th_sum);
2698	} else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) {
2699		TXD->upper_setup.tcp_fields.tucso =
2700			ETHER_HDR_LEN + sizeof(struct ip) +
2701			offsetof(struct udphdr, uh_sum);
2702	}
2703
2704	TXD->tcp_seg_setup.data = htole32(0);
2705	TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT);
2706
2707	tx_buffer->m_head = NULL;
2708
2709	if (++curr_txd == adapter->num_tx_desc)
2710		curr_txd = 0;
2711
2712	adapter->num_tx_desc_avail--;
2713	adapter->next_avail_tx_desc = curr_txd;
2714}
2715
2716/**********************************************************************
2717 *
2718 *  Examine each tx_buffer in the used queue. If the hardware is done
2719 *  processing the packet then free associated resources. The
2720 *  tx_buffer is put back on the free queue.
2721 *
2722 **********************************************************************/
2723static void
2724em_txeof(struct adapter *adapter)
2725{
2726	int i, num_avail;
2727	struct em_buffer *tx_buffer;
2728	struct em_tx_desc   *tx_desc;
2729	struct ifnet   *ifp = adapter->ifp;
2730
2731	EM_LOCK_ASSERT(adapter);
2732
2733	if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
2734		return;
2735
2736	num_avail = adapter->num_tx_desc_avail;
2737	i = adapter->oldest_used_tx_desc;
2738
2739	tx_buffer = &adapter->tx_buffer_area[i];
2740	tx_desc = &adapter->tx_desc_base[i];
2741
2742	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2743	    BUS_DMASYNC_POSTREAD);
2744	while (tx_desc->upper.fields.status & E1000_TXD_STAT_DD) {
2745
2746		tx_desc->upper.data = 0;
2747		num_avail++;
2748
2749		if (tx_buffer->m_head) {
2750			ifp->if_opackets++;
2751			bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2752			    BUS_DMASYNC_POSTWRITE);
2753			bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2754
2755			m_freem(tx_buffer->m_head);
2756			tx_buffer->m_head = NULL;
2757		}
2758
2759		if (++i == adapter->num_tx_desc)
2760			i = 0;
2761
2762		tx_buffer = &adapter->tx_buffer_area[i];
2763		tx_desc = &adapter->tx_desc_base[i];
2764	}
2765	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2766	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2767
2768	adapter->oldest_used_tx_desc = i;
2769
2770	/*
2771	 * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
2772	 * that it is OK to send packets.
2773	 * If there are no pending descriptors, clear the timeout. Otherwise,
2774	 * if some descriptors have been freed, restart the timeout.
2775	 */
2776	if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
2777		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2778		if (num_avail == adapter->num_tx_desc)
2779			ifp->if_timer = 0;
2780		else if (num_avail != adapter->num_tx_desc_avail)
2781			ifp->if_timer = EM_TX_TIMEOUT;
2782	}
2783	adapter->num_tx_desc_avail = num_avail;
2784}
2785
2786/*********************************************************************
2787 *
2788 *  Get a buffer from system mbuf buffer pool.
2789 *
2790 **********************************************************************/
2791static int
2792em_get_buf(int i, struct adapter *adapter, struct mbuf *mp)
2793{
2794	struct ifnet		*ifp = adapter->ifp;
2795	bus_dma_segment_t	segs[1];
2796	struct em_buffer	*rx_buffer;
2797	int			error, nsegs;
2798
2799	if (mp == NULL) {
2800		mp = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2801		if (mp == NULL) {
2802			adapter->mbuf_cluster_failed++;
2803			return (ENOBUFS);
2804		}
2805		mp->m_len = mp->m_pkthdr.len = MCLBYTES;
2806	} else {
2807		mp->m_len = mp->m_pkthdr.len = MCLBYTES;
2808		mp->m_data = mp->m_ext.ext_buf;
2809		mp->m_next = NULL;
2810	}
2811
2812	if (ifp->if_mtu <= ETHERMTU)
2813		m_adj(mp, ETHER_ALIGN);
2814
2815	rx_buffer = &adapter->rx_buffer_area[i];
2816
2817	/*
2818	 * Using memory from the mbuf cluster pool, invoke the
2819	 * bus_dma machinery to arrange the memory mapping.
2820	 */
2821	error = bus_dmamap_load_mbuf_sg(adapter->rxtag, rx_buffer->map,
2822	    mp, segs, &nsegs, 0);
2823	if (error != 0) {
2824		m_free(mp);
2825		return (error);
2826	}
2827	/* If nsegs is wrong then the stack is corrupt. */
2828	KASSERT(nsegs == 1, ("Too many segments returned!"));
2829	rx_buffer->m_head = mp;
2830	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
2831	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
2832
2833	return (0);
2834}
2835
2836/*********************************************************************
2837 *
2838 *  Allocate memory for rx_buffer structures. Since we use one
2839 *  rx_buffer per received packet, the maximum number of rx_buffer's
2840 *  that we'll need is equal to the number of receive descriptors
2841 *  that we've allocated.
2842 *
2843 **********************************************************************/
2844static int
2845em_allocate_receive_structures(struct adapter *adapter)
2846{
2847	device_t dev = adapter->dev;
2848	struct em_buffer *rx_buffer;
2849	int i, error;
2850
2851	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc,
2852	    M_DEVBUF, M_NOWAIT);
2853	if (adapter->rx_buffer_area == NULL) {
2854		device_printf(dev, "Unable to allocate rx_buffer memory\n");
2855		return (ENOMEM);
2856	}
2857
2858	bzero(adapter->rx_buffer_area, sizeof(struct em_buffer) * adapter->num_rx_desc);
2859
2860	error = bus_dma_tag_create(NULL,		/* parent */
2861				1, 0,			/* alignment, bounds */
2862				BUS_SPACE_MAXADDR,	/* lowaddr */
2863				BUS_SPACE_MAXADDR,	/* highaddr */
2864				NULL, NULL,		/* filter, filterarg */
2865				MCLBYTES,		/* maxsize */
2866				1,			/* nsegments */
2867				MCLBYTES,		/* maxsegsize */
2868				0,			/* flags */
2869				NULL,			/* lockfunc */
2870				NULL,			/* lockarg */
2871				&adapter->rxtag);
2872	if (error) {
2873		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
2874		    __func__, error);
2875		goto fail;
2876	}
2877
2878	rx_buffer = adapter->rx_buffer_area;
2879	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
2880		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
2881		    &rx_buffer->map);
2882		if (error) {
2883			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
2884			    __func__, error);
2885			goto fail;
2886		}
2887	}
2888
2889	for (i = 0; i < adapter->num_rx_desc; i++) {
2890		error = em_get_buf(i, adapter, NULL);
2891		if (error)
2892			goto fail;
2893	}
2894	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
2895	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2896
2897	return (0);
2898
2899fail:
2900	em_free_receive_structures(adapter);
2901	return (error);
2902}
2903
2904/*********************************************************************
2905 *
2906 *  Allocate and initialize receive structures.
2907 *
2908 **********************************************************************/
2909static int
2910em_setup_receive_structures(struct adapter *adapter)
2911{
2912	int error;
2913
2914	bzero(adapter->rx_desc_base, (sizeof(struct em_rx_desc)) * adapter->num_rx_desc);
2915
2916	if ((error = em_allocate_receive_structures(adapter)) != 0)
2917		return (error);
2918
2919	/* Setup our descriptor pointers */
2920	adapter->next_rx_desc_to_check = 0;
2921
2922	return (0);
2923}
2924
2925/*********************************************************************
2926 *
2927 *  Enable receive unit.
2928 *
2929 **********************************************************************/
2930static void
2931em_initialize_receive_unit(struct adapter *adapter)
2932{
2933	struct ifnet	*ifp = adapter->ifp;
2934	uint64_t	bus_addr;
2935	uint32_t	reg_rctl;
2936	uint32_t	reg_rxcsum;
2937
2938	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
2939
2940	/*
2941	 * Make sure receives are disabled while setting
2942	 * up the descriptor ring
2943	 */
2944	E1000_WRITE_REG(&adapter->hw, RCTL, 0);
2945
2946	/* Set the Receive Delay Timer Register */
2947	E1000_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay.value | E1000_RDT_FPDB);
2948
2949	if(adapter->hw.mac_type >= em_82540) {
2950		E1000_WRITE_REG(&adapter->hw, RADV, adapter->rx_abs_int_delay.value);
2951
2952		/*
2953		 * Set the interrupt throttling rate. Value is calculated
2954		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
2955		 */
2956#define MAX_INTS_PER_SEC	8000
2957#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
2958		E1000_WRITE_REG(&adapter->hw, ITR, DEFAULT_ITR);
2959	}
2960
2961	/* Setup the Base and Length of the Rx Descriptor Ring */
2962	bus_addr = adapter->rxdma.dma_paddr;
2963	E1000_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc *
2964			sizeof(struct em_rx_desc));
2965	E1000_WRITE_REG(&adapter->hw, RDBAH, (uint32_t)(bus_addr >> 32));
2966	E1000_WRITE_REG(&adapter->hw, RDBAL, (uint32_t)bus_addr);
2967
2968	/* Setup the HW Rx Head and Tail Descriptor Pointers */
2969	E1000_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1);
2970	E1000_WRITE_REG(&adapter->hw, RDH, 0);
2971
2972	/* Setup the Receive Control Register */
2973	reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2974		   E1000_RCTL_RDMTS_HALF |
2975		   (adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
2976
2977	if (adapter->hw.tbi_compatibility_on == TRUE)
2978		reg_rctl |= E1000_RCTL_SBP;
2979
2980
2981	switch (adapter->rx_buffer_len) {
2982	default:
2983	case EM_RXBUFFER_2048:
2984		reg_rctl |= E1000_RCTL_SZ_2048;
2985		break;
2986	case EM_RXBUFFER_4096:
2987		reg_rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2988		break;
2989	case EM_RXBUFFER_8192:
2990		reg_rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2991		break;
2992	case EM_RXBUFFER_16384:
2993		reg_rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE;
2994		break;
2995	}
2996
2997	if (ifp->if_mtu > ETHERMTU)
2998		reg_rctl |= E1000_RCTL_LPE;
2999
3000	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3001	if ((adapter->hw.mac_type >= em_82543) &&
3002	    (ifp->if_capenable & IFCAP_RXCSUM)) {
3003		reg_rxcsum = E1000_READ_REG(&adapter->hw, RXCSUM);
3004		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3005		E1000_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum);
3006	}
3007
3008	/* Enable Receives */
3009	E1000_WRITE_REG(&adapter->hw, RCTL, reg_rctl);
3010}
3011
3012/*********************************************************************
3013 *
3014 *  Free receive related data structures.
3015 *
3016 **********************************************************************/
3017static void
3018em_free_receive_structures(struct adapter *adapter)
3019{
3020	struct em_buffer *rx_buffer;
3021	int i;
3022
3023	INIT_DEBUGOUT("free_receive_structures: begin");
3024
3025	if (adapter->rx_buffer_area != NULL) {
3026		rx_buffer = adapter->rx_buffer_area;
3027		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3028			if (rx_buffer->m_head != NULL) {
3029				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3030				    BUS_DMASYNC_POSTREAD);
3031				bus_dmamap_unload(adapter->rxtag,
3032				    rx_buffer->map);
3033				m_freem(rx_buffer->m_head);
3034				rx_buffer->m_head = NULL;
3035			} else if (rx_buffer->map != NULL)
3036				bus_dmamap_unload(adapter->rxtag,
3037				    rx_buffer->map);
3038			if (rx_buffer->map != NULL) {
3039				bus_dmamap_destroy(adapter->rxtag,
3040				    rx_buffer->map);
3041				rx_buffer->map = NULL;
3042			}
3043		}
3044	}
3045	if (adapter->rx_buffer_area != NULL) {
3046		free(adapter->rx_buffer_area, M_DEVBUF);
3047		adapter->rx_buffer_area = NULL;
3048	}
3049	if (adapter->rxtag != NULL) {
3050		bus_dma_tag_destroy(adapter->rxtag);
3051		adapter->rxtag = NULL;
3052	}
3053}
3054
3055/*********************************************************************
3056 *
3057 *  This routine executes in interrupt context. It replenishes
3058 *  the mbufs in the descriptor and sends data which has been
3059 *  dma'ed into host memory to upper layer.
3060 *
3061 *  We loop at most count times if count is > 0, or until done if
3062 *  count < 0.
3063 *
3064 *********************************************************************/
3065static int
3066em_rxeof(struct adapter *adapter, int count)
3067{
3068	struct ifnet	*ifp;
3069	struct mbuf	*mp;
3070	uint8_t		accept_frame = 0;
3071	uint8_t		eop = 0;
3072	uint16_t 	len, desc_len, prev_len_adj;
3073	int		i;
3074
3075	/* Pointer to the receive descriptor being examined. */
3076	struct em_rx_desc   *current_desc;
3077
3078	ifp = adapter->ifp;
3079	i = adapter->next_rx_desc_to_check;
3080	current_desc = &adapter->rx_desc_base[i];
3081	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3082	    BUS_DMASYNC_POSTREAD);
3083
3084	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3085		return (0);
3086
3087	while ((current_desc->status & E1000_RXD_STAT_DD) &&
3088	    (count != 0) &&
3089	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3090		struct mbuf *m = NULL;
3091
3092		mp = adapter->rx_buffer_area[i].m_head;
3093		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3094		    BUS_DMASYNC_POSTREAD);
3095		bus_dmamap_unload(adapter->rxtag,
3096		    adapter->rx_buffer_area[i].map);
3097
3098		accept_frame = 1;
3099		prev_len_adj = 0;
3100		desc_len = le16toh(current_desc->length);
3101		if (current_desc->status & E1000_RXD_STAT_EOP) {
3102			count--;
3103			eop = 1;
3104			if (desc_len < ETHER_CRC_LEN) {
3105				len = 0;
3106				prev_len_adj = ETHER_CRC_LEN - desc_len;
3107			} else
3108				len = desc_len - ETHER_CRC_LEN;
3109		} else {
3110			eop = 0;
3111			len = desc_len;
3112		}
3113
3114		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3115			uint8_t		last_byte;
3116			uint32_t	pkt_len = desc_len;
3117
3118			if (adapter->fmp != NULL)
3119				pkt_len += adapter->fmp->m_pkthdr.len;
3120
3121			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3122			if (TBI_ACCEPT(&adapter->hw, current_desc->status,
3123			    current_desc->errors,
3124			    pkt_len, last_byte)) {
3125				em_tbi_adjust_stats(&adapter->hw,
3126				    &adapter->stats, pkt_len,
3127				    adapter->hw.mac_addr);
3128				if (len > 0)
3129					len--;
3130			} else
3131				accept_frame = 0;
3132		}
3133
3134		if (accept_frame) {
3135			if (em_get_buf(i, adapter, NULL) == ENOBUFS) {
3136				adapter->dropped_pkts++;
3137				em_get_buf(i, adapter, mp);
3138				if (adapter->fmp != NULL)
3139					m_freem(adapter->fmp);
3140				adapter->fmp = NULL;
3141				adapter->lmp = NULL;
3142				break;
3143			}
3144
3145			/* Assign correct length to the current fragment */
3146			mp->m_len = len;
3147
3148			if (adapter->fmp == NULL) {
3149				mp->m_pkthdr.len = len;
3150				adapter->fmp = mp; /* Store the first mbuf */
3151				adapter->lmp = mp;
3152			} else {
3153				/* Chain mbuf's together */
3154				mp->m_flags &= ~M_PKTHDR;
3155				/*
3156				 * Adjust length of previous mbuf in chain if
3157				 * we received less than 4 bytes in the last
3158				 * descriptor.
3159				 */
3160				if (prev_len_adj > 0) {
3161					adapter->lmp->m_len -= prev_len_adj;
3162					adapter->fmp->m_pkthdr.len -=
3163					    prev_len_adj;
3164				}
3165				adapter->lmp->m_next = mp;
3166				adapter->lmp = adapter->lmp->m_next;
3167				adapter->fmp->m_pkthdr.len += len;
3168			}
3169
3170			if (eop) {
3171				adapter->fmp->m_pkthdr.rcvif = ifp;
3172				ifp->if_ipackets++;
3173				em_receive_checksum(adapter, current_desc,
3174				    adapter->fmp);
3175#ifndef __NO_STRICT_ALIGNMENT
3176				if (ifp->if_mtu > ETHERMTU &&
3177				    em_fixup_rx(adapter) != 0)
3178					goto skip;
3179#endif
3180				if (current_desc->status & E1000_RXD_STAT_VP)
3181					VLAN_INPUT_TAG(ifp, adapter->fmp,
3182					    (le16toh(current_desc->special) &
3183					    E1000_RXD_SPC_VLAN_MASK));
3184#ifndef __NO_STRICT_ALIGNMENT
3185skip:
3186#endif
3187				m = adapter->fmp;
3188				adapter->fmp = NULL;
3189				adapter->lmp = NULL;
3190			}
3191		} else {
3192			adapter->dropped_pkts++;
3193			em_get_buf(i, adapter, mp);
3194			if (adapter->fmp != NULL)
3195				m_freem(adapter->fmp);
3196			adapter->fmp = NULL;
3197			adapter->lmp = NULL;
3198		}
3199
3200		/* Zero out the receive descriptors status. */
3201		current_desc->status = 0;
3202		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3203		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3204
3205		/* Advance our pointers to the next descriptor. */
3206		if (++i == adapter->num_rx_desc)
3207			i = 0;
3208		if (m != NULL) {
3209			adapter->next_rx_desc_to_check = i;
3210#ifdef DEVICE_POLLING
3211			EM_UNLOCK(adapter);
3212			(*ifp->if_input)(ifp, m);
3213			EM_LOCK(adapter);
3214#else
3215			(*ifp->if_input)(ifp, m);
3216#endif
3217			i = adapter->next_rx_desc_to_check;
3218		}
3219		current_desc = &adapter->rx_desc_base[i];
3220	}
3221	adapter->next_rx_desc_to_check = i;
3222
3223	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3224	if (--i < 0)
3225		i = adapter->num_rx_desc - 1;
3226	E1000_WRITE_REG(&adapter->hw, RDT, i);
3227	if (!((current_desc->status) & E1000_RXD_STAT_DD))
3228		return (0);
3229
3230	return (1);
3231}
3232
3233#ifndef __NO_STRICT_ALIGNMENT
3234/*
3235 * When jumbo frames are enabled we should realign entire payload on
3236 * architecures with strict alignment. This is serious design mistake of 8254x
3237 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3238 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3239 * payload. On architecures without strict alignment restrictions 8254x still
3240 * performs unaligned memory access which would reduce the performance too.
3241 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3242 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3243 * existing mbuf chain.
3244 *
3245 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3246 * not used at all on architectures with strict alignment.
3247 */
3248static int
3249em_fixup_rx(struct adapter *adapter)
3250{
3251	struct mbuf *m, *n;
3252	int error;
3253
3254	error = 0;
3255	m = adapter->fmp;
3256	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3257		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3258		m->m_data += ETHER_HDR_LEN;
3259	} else {
3260		MGETHDR(n, M_DONTWAIT, MT_DATA);
3261		if (n != NULL) {
3262			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3263			m->m_data += ETHER_HDR_LEN;
3264			m->m_len -= ETHER_HDR_LEN;
3265			n->m_len = ETHER_HDR_LEN;
3266			M_MOVE_PKTHDR(n, m);
3267			n->m_next = m;
3268			adapter->fmp = n;
3269		} else {
3270			adapter->dropped_pkts++;
3271			m_freem(adapter->fmp);
3272			adapter->fmp = NULL;
3273			error = ENOMEM;
3274		}
3275	}
3276
3277	return (error);
3278}
3279#endif
3280
3281/*********************************************************************
3282 *
3283 *  Verify that the hardware indicated that the checksum is valid.
3284 *  Inform the stack about the status of checksum so that stack
3285 *  doesn't spend time verifying the checksum.
3286 *
3287 *********************************************************************/
3288static void
3289em_receive_checksum(struct adapter *adapter, struct em_rx_desc *rx_desc,
3290		    struct mbuf *mp)
3291{
3292	/* 82543 or newer only */
3293	if ((adapter->hw.mac_type < em_82543) ||
3294	    /* Ignore Checksum bit is set */
3295	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3296		mp->m_pkthdr.csum_flags = 0;
3297		return;
3298	}
3299
3300	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3301		/* Did it pass? */
3302		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3303			/* IP Checksum Good */
3304			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3305			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3306
3307		} else {
3308			mp->m_pkthdr.csum_flags = 0;
3309		}
3310	}
3311
3312	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3313		/* Did it pass? */
3314		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3315			mp->m_pkthdr.csum_flags |=
3316			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3317			mp->m_pkthdr.csum_data = htons(0xffff);
3318		}
3319	}
3320}
3321
3322
3323static void
3324em_enable_vlans(struct adapter *adapter)
3325{
3326	uint32_t ctrl;
3327
3328	E1000_WRITE_REG(&adapter->hw, VET, ETHERTYPE_VLAN);
3329
3330	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3331	ctrl |= E1000_CTRL_VME;
3332	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3333}
3334
3335static void
3336em_disable_vlans(struct adapter *adapter)
3337{
3338	uint32_t ctrl;
3339
3340	ctrl = E1000_READ_REG(&adapter->hw, CTRL);
3341	ctrl &= ~E1000_CTRL_VME;
3342	E1000_WRITE_REG(&adapter->hw, CTRL, ctrl);
3343}
3344
3345static void
3346em_enable_intr(struct adapter *adapter)
3347{
3348	E1000_WRITE_REG(&adapter->hw, IMS, (IMS_ENABLE_MASK));
3349}
3350
3351static void
3352em_disable_intr(struct adapter *adapter)
3353{
3354	/*
3355	 * The first version of 82542 had an errata where when link was forced
3356	 * it would stay up even up even if the cable was disconnected.
3357	 * Sequence errors were used to detect the disconnect and then the
3358	 * driver would unforce the link. This code in the in the ISR. For this
3359	 * to work correctly the Sequence error interrupt had to be enabled
3360	 * all the time.
3361	 */
3362
3363	if (adapter->hw.mac_type == em_82542_rev2_0)
3364	    E1000_WRITE_REG(&adapter->hw, IMC,
3365		(0xffffffff & ~E1000_IMC_RXSEQ));
3366	else
3367	    E1000_WRITE_REG(&adapter->hw, IMC,
3368		0xffffffff);
3369}
3370
3371static int
3372em_is_valid_ether_addr(uint8_t *addr)
3373{
3374	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3375
3376	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3377		return (FALSE);
3378	}
3379
3380	return (TRUE);
3381}
3382
3383void
3384em_write_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3385{
3386	pci_write_config(((struct em_osdep *)hw->back)->dev, reg, *value, 2);
3387}
3388
3389void
3390em_read_pci_cfg(struct em_hw *hw, uint32_t reg, uint16_t *value)
3391{
3392	*value = pci_read_config(((struct em_osdep *)hw->back)->dev, reg, 2);
3393}
3394
3395void
3396em_pci_set_mwi(struct em_hw *hw)
3397{
3398	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3399	    (hw->pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
3400}
3401
3402void
3403em_pci_clear_mwi(struct em_hw *hw)
3404{
3405	pci_write_config(((struct em_osdep *)hw->back)->dev, PCIR_COMMAND,
3406	    (hw->pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
3407}
3408
3409/*********************************************************************
3410* 82544 Coexistence issue workaround.
3411*    There are 2 issues.
3412*       1. Transmit Hang issue.
3413*    To detect this issue, following equation can be used...
3414*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3415*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
3416*
3417*       2. DAC issue.
3418*    To detect this issue, following equation can be used...
3419*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
3420*	  If SUM[3:0] is in between 9 to c, we will have this issue.
3421*
3422*
3423*    WORKAROUND:
3424*	  Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c (DAC)
3425*
3426*** *********************************************************************/
3427static uint32_t
3428em_fill_descriptors (bus_addr_t address, uint32_t length,
3429		PDESC_ARRAY desc_array)
3430{
3431	/* Since issue is sensitive to length and address.*/
3432	/* Let us first check the address...*/
3433	uint32_t safe_terminator;
3434	if (length <= 4) {
3435		desc_array->descriptor[0].address = address;
3436		desc_array->descriptor[0].length = length;
3437		desc_array->elements = 1;
3438		return (desc_array->elements);
3439	}
3440	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) + (length & 0xF)) & 0xF);
3441	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
3442	if (safe_terminator == 0   ||
3443	(safe_terminator > 4   &&
3444	safe_terminator < 9)   ||
3445	(safe_terminator > 0xC &&
3446	safe_terminator <= 0xF)) {
3447		desc_array->descriptor[0].address = address;
3448		desc_array->descriptor[0].length = length;
3449		desc_array->elements = 1;
3450		return (desc_array->elements);
3451	}
3452
3453	desc_array->descriptor[0].address = address;
3454	desc_array->descriptor[0].length = length - 4;
3455	desc_array->descriptor[1].address = address + (length - 4);
3456	desc_array->descriptor[1].length = 4;
3457	desc_array->elements = 2;
3458	return (desc_array->elements);
3459}
3460
3461/**********************************************************************
3462 *
3463 *  Update the board statistics counters.
3464 *
3465 **********************************************************************/
3466static void
3467em_update_stats_counters(struct adapter *adapter)
3468{
3469	struct ifnet   *ifp;
3470
3471	if(adapter->hw.media_type == em_media_type_copper ||
3472	   (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU)) {
3473		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, SYMERRS);
3474		adapter->stats.sec += E1000_READ_REG(&adapter->hw, SEC);
3475	}
3476	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, CRCERRS);
3477	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, MPC);
3478	adapter->stats.scc += E1000_READ_REG(&adapter->hw, SCC);
3479	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, ECOL);
3480
3481	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, MCC);
3482	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, LATECOL);
3483	adapter->stats.colc += E1000_READ_REG(&adapter->hw, COLC);
3484	adapter->stats.dc += E1000_READ_REG(&adapter->hw, DC);
3485	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, RLEC);
3486	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, XONRXC);
3487	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, XONTXC);
3488	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, XOFFRXC);
3489	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, XOFFTXC);
3490	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, FCRUC);
3491	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, PRC64);
3492	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, PRC127);
3493	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, PRC255);
3494	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, PRC511);
3495	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, PRC1023);
3496	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, PRC1522);
3497	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, GPRC);
3498	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, BPRC);
3499	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, MPRC);
3500	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, GPTC);
3501
3502	/* For the 64-bit byte counters the low dword must be read first. */
3503	/* Both registers clear on the read of the high dword */
3504
3505	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, GORCL);
3506	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, GORCH);
3507	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, GOTCL);
3508	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, GOTCH);
3509
3510	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, RNBC);
3511	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, RUC);
3512	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, RFC);
3513	adapter->stats.roc += E1000_READ_REG(&adapter->hw, ROC);
3514	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, RJC);
3515
3516	adapter->stats.torl += E1000_READ_REG(&adapter->hw, TORL);
3517	adapter->stats.torh += E1000_READ_REG(&adapter->hw, TORH);
3518	adapter->stats.totl += E1000_READ_REG(&adapter->hw, TOTL);
3519	adapter->stats.toth += E1000_READ_REG(&adapter->hw, TOTH);
3520
3521	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, TPR);
3522	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, TPT);
3523	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, PTC64);
3524	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, PTC127);
3525	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, PTC255);
3526	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, PTC511);
3527	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, PTC1023);
3528	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, PTC1522);
3529	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, MPTC);
3530	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, BPTC);
3531
3532	if (adapter->hw.mac_type >= em_82543) {
3533		adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, ALGNERRC);
3534		adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, RXERRC);
3535		adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, TNCRS);
3536		adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, CEXTERR);
3537		adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, TSCTC);
3538		adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, TSCTFC);
3539	}
3540	ifp = adapter->ifp;
3541
3542	ifp->if_collisions = adapter->stats.colc;
3543
3544	/* Rx Errors */
3545	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
3546	    adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc +
3547	    adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr;
3548
3549	/* Tx Errors */
3550	ifp->if_oerrors = adapter->stats.ecol + adapter->stats.latecol +
3551	    adapter->watchdog_events;
3552}
3553
3554
3555/**********************************************************************
3556 *
3557 *  This routine is called only when em_display_debug_stats is enabled.
3558 *  This routine provides a way to take a look at important statistics
3559 *  maintained by the driver and hardware.
3560 *
3561 **********************************************************************/
3562static void
3563em_print_debug_info(struct adapter *adapter)
3564{
3565	device_t dev = adapter->dev;
3566	uint8_t *hw_addr = adapter->hw.hw_addr;
3567
3568	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
3569	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
3570	    E1000_READ_REG(&adapter->hw, CTRL),
3571	    E1000_READ_REG(&adapter->hw, RCTL));
3572	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
3573	    ((E1000_READ_REG(&adapter->hw, PBA) & 0xffff0000) >> 16),\
3574	    (E1000_READ_REG(&adapter->hw, PBA) & 0xffff) );
3575	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
3576	    adapter->hw.fc_high_water,
3577	    adapter->hw.fc_low_water);
3578	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
3579	    E1000_READ_REG(&adapter->hw, TIDV),
3580	    E1000_READ_REG(&adapter->hw, TADV));
3581	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
3582	    E1000_READ_REG(&adapter->hw, RDTR),
3583	    E1000_READ_REG(&adapter->hw, RADV));
3584	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
3585	    (long long)adapter->tx_fifo_wrk_cnt,
3586	    (long long)adapter->tx_fifo_reset_cnt);
3587	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
3588	    E1000_READ_REG(&adapter->hw, TDH),
3589	    E1000_READ_REG(&adapter->hw, TDT));
3590	device_printf(dev, "Num Tx descriptors avail = %d\n",
3591	    adapter->num_tx_desc_avail);
3592	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
3593	    adapter->no_tx_desc_avail1);
3594	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
3595	    adapter->no_tx_desc_avail2);
3596	device_printf(dev, "Std mbuf failed = %ld\n",
3597	    adapter->mbuf_alloc_failed);
3598	device_printf(dev, "Std mbuf cluster failed = %ld\n",
3599	    adapter->mbuf_cluster_failed);
3600	device_printf(dev, "Driver dropped packets = %ld\n",
3601	    adapter->dropped_pkts);
3602}
3603
3604static void
3605em_print_hw_stats(struct adapter *adapter)
3606{
3607	device_t dev = adapter->dev;
3608
3609	device_printf(dev, "Excessive collisions = %lld\n",
3610	    (long long)adapter->stats.ecol);
3611	device_printf(dev, "Symbol errors = %lld\n",
3612	    (long long)adapter->stats.symerrs);
3613	device_printf(dev, "Sequence errors = %lld\n",
3614	    (long long)adapter->stats.sec);
3615	device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc);
3616
3617	device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc);
3618	device_printf(dev, "Receive No Buffers = %lld\n",
3619	    (long long)adapter->stats.rnbc);
3620	/* RLEC is inaccurate on some hardware, calculate our own. */
3621	device_printf(dev, "Receive Length Errors = %lld\n",
3622	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
3623	device_printf(dev, "Receive errors = %lld\n",
3624	    (long long)adapter->stats.rxerrc);
3625	device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs);
3626	device_printf(dev, "Alignment errors = %lld\n",
3627	    (long long)adapter->stats.algnerrc);
3628	device_printf(dev, "Carrier extension errors = %lld\n",
3629	    (long long)adapter->stats.cexterr);
3630	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
3631	device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events);
3632
3633	device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc);
3634	device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc);
3635	device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc);
3636	device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc);
3637
3638	device_printf(dev, "Good Packets Rcvd = %lld\n",
3639	    (long long)adapter->stats.gprc);
3640	device_printf(dev, "Good Packets Xmtd = %lld\n",
3641	    (long long)adapter->stats.gptc);
3642}
3643
3644static int
3645em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
3646{
3647	struct adapter *adapter;
3648	int error;
3649	int result;
3650
3651	result = -1;
3652	error = sysctl_handle_int(oidp, &result, 0, req);
3653
3654	if (error || !req->newptr)
3655		return (error);
3656
3657	if (result == 1) {
3658		adapter = (struct adapter *)arg1;
3659		em_print_debug_info(adapter);
3660	}
3661
3662	return (error);
3663}
3664
3665
3666static int
3667em_sysctl_stats(SYSCTL_HANDLER_ARGS)
3668{
3669	struct adapter *adapter;
3670	int error;
3671	int result;
3672
3673	result = -1;
3674	error = sysctl_handle_int(oidp, &result, 0, req);
3675
3676	if (error || !req->newptr)
3677		return (error);
3678
3679	if (result == 1) {
3680		adapter = (struct adapter *)arg1;
3681		em_print_hw_stats(adapter);
3682	}
3683
3684	return (error);
3685}
3686
3687static int
3688em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
3689{
3690	struct em_int_delay_info *info;
3691	struct adapter *adapter;
3692	uint32_t regval;
3693	int error;
3694	int usecs;
3695	int ticks;
3696
3697	info = (struct em_int_delay_info *)arg1;
3698	usecs = info->value;
3699	error = sysctl_handle_int(oidp, &usecs, 0, req);
3700	if (error != 0 || req->newptr == NULL)
3701		return (error);
3702	if (usecs < 0 || usecs > E1000_TICKS_TO_USECS(65535))
3703		return (EINVAL);
3704	info->value = usecs;
3705	ticks = E1000_USECS_TO_TICKS(usecs);
3706
3707	adapter = info->adapter;
3708
3709	EM_LOCK(adapter);
3710	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
3711	regval = (regval & ~0xffff) | (ticks & 0xffff);
3712	/* Handle a few special cases. */
3713	switch (info->offset) {
3714	case E1000_RDTR:
3715	case E1000_82542_RDTR:
3716		regval |= E1000_RDT_FPDB;
3717		break;
3718	case E1000_TIDV:
3719	case E1000_82542_TIDV:
3720		if (ticks == 0) {
3721			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
3722			/* Don't write 0 into the TIDV register. */
3723			regval++;
3724		} else
3725			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3726		break;
3727	}
3728	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
3729	EM_UNLOCK(adapter);
3730	return (0);
3731}
3732
3733static void
3734em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
3735	const char *description, struct em_int_delay_info *info,
3736	int offset, int value)
3737{
3738	info->adapter = adapter;
3739	info->offset = offset;
3740	info->value = value;
3741	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
3742	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3743	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
3744	    info, 0, em_sysctl_int_delay, "I", description);
3745}
3746
3747#ifndef DEVICE_POLLING
3748static void
3749em_add_int_process_limit(struct adapter *adapter, const char *name,
3750	const char *description, int *limit, int value)
3751{
3752	*limit = value;
3753	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
3754	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
3755	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
3756}
3757#endif
3758