if_em.c revision 172138
1126371Sphk/**************************************************************************
2208291Suqs
3126371SphkCopyright (c) 2001-2007, Intel Corporation
4126371SphkAll rights reserved.
5126371Sphk
6126371SphkRedistribution and use in source and binary forms, with or without
7126371Sphkmodification, are permitted provided that the following conditions are met:
8126371Sphk
9126371Sphk 1. Redistributions of source code must retain the above copyright notice,
10126371Sphk    this list of conditions and the following disclaimer.
11126371Sphk
12126371Sphk 2. Redistributions in binary form must reproduce the above copyright
13126371Sphk    notice, this list of conditions and the following disclaimer in the
14126371Sphk    documentation and/or other materials provided with the distribution.
15126371Sphk
16126371Sphk 3. Neither the name of the Intel Corporation nor the names of its
17126371Sphk    contributors may be used to endorse or promote products derived from
18126371Sphk    this software without specific prior written permission.
19126371Sphk
20126371SphkTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21126371SphkAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22126371SphkIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23126371SphkARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24126371SphkLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25126371SphkCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26126371SphkSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27126371SphkINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28131685SruCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29126371SphkARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30126371SphkPOSSIBILITY OF SUCH DAMAGE.
31126371Sphk
32131685Sru***************************************************************************/
33126371Sphk
34126371Sphk/*$FreeBSD: head/sys/dev/em/if_em.c 172138 2007-09-10 21:50:40Z jfv $*/
35131685Sru
36126371Sphk#ifdef HAVE_KERNEL_OPTION_HEADERS
37131685Sru#include "opt_device_polling.h"
38165260Sn_hibma#endif
39126371Sphk
40126371Sphk#include <sys/param.h>
41126371Sphk#include <sys/systm.h>
42131685Sru#include <sys/bus.h>
43126371Sphk#include <sys/endian.h>
44131685Sru#include <sys/kernel.h>
45131685Sru#include <sys/kthread.h>
46131685Sru#include <sys/malloc.h>
47126371Sphk#include <sys/mbuf.h>
48126371Sphk#include <sys/module.h>
49131685Sru#include <sys/rman.h>
50131685Sru#include <sys/socket.h>
51131685Sru#include <sys/sockio.h>
52131685Sru#include <sys/sysctl.h>
53168008Sbrueffer#include <sys/taskqueue.h>
54168008Sbrueffer
55167953Sn_hibma#include <machine/bus.h>
56233648Seadler#include <machine/resource.h>
57167953Sn_hibma
58126371Sphk#include <net/bpf.h>
59126371Sphk#include <net/ethernet.h>
60131685Sru#include <net/if.h>
61131685Sru#include <net/if_arp.h>
62131685Sru#include <net/if_dl.h>
63167953Sn_hibma#include <net/if_media.h>
64126371Sphk
65131685Sru#include <net/if_types.h>
66131685Sru#include <net/if_vlan_var.h>
67131685Sru
68167953Sn_hibma#include <netinet/in_systm.h>
69126371Sphk#include <netinet/in.h>
70126371Sphk#include <netinet/if_ether.h>
71131685Sru#include <netinet/ip.h>
72131685Sru#include <netinet/ip6.h>
73126371Sphk#include <netinet/tcp.h>
74126371Sphk#include <netinet/udp.h>
75126371Sphk
76126371Sphk#include <machine/in_cksum.h>
77126371Sphk#include <dev/pci/pcivar.h>
78131685Sru#include <dev/pci/pcireg.h>
79131685Sru
80131685Sru#include "e1000_api.h"
81267936Sbapt#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.3";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware and mac info */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Tell the stack that the interface is not active */
723	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
724
725	INIT_DEBUGOUT("em_attach: end");
726
727	return (0);
728
729err_hw_init:
730	em_release_hw_control(adapter);
731	e1000_remove_device(&adapter->hw);
732	em_dma_free(adapter, &adapter->rxdma);
733err_rx_desc:
734	em_dma_free(adapter, &adapter->txdma);
735err_tx_desc:
736err_pci:
737	em_free_intr(adapter);
738	em_free_pci_resources(adapter);
739	EM_LOCK_DESTROY(adapter);
740
741	return (error);
742}
743
744/*********************************************************************
745 *  Device removal routine
746 *
747 *  The detach entry point is called when the driver is being removed.
748 *  This routine stops the adapter and deallocates all the resources
749 *  that were allocated for driver operation.
750 *
751 *  return 0 on success, positive on failure
752 *********************************************************************/
753
754static int
755em_detach(device_t dev)
756{
757	struct adapter	*adapter = device_get_softc(dev);
758	struct ifnet	*ifp = adapter->ifp;
759
760	INIT_DEBUGOUT("em_detach: begin");
761
762#ifdef DEVICE_POLLING
763	if (ifp->if_capenable & IFCAP_POLLING)
764		ether_poll_deregister(ifp);
765#endif
766
767	em_disable_intr(adapter);
768	em_free_intr(adapter);
769	EM_LOCK(adapter);
770	adapter->in_detach = 1;
771	em_stop(adapter);
772	e1000_phy_hw_reset(&adapter->hw);
773
774	em_release_manageability(adapter);
775
776	if (((adapter->hw.mac.type == e1000_82573) ||
777	    (adapter->hw.mac.type == e1000_ich8lan) ||
778	    (adapter->hw.mac.type == e1000_ich9lan)) &&
779	    e1000_check_mng_mode(&adapter->hw))
780		em_release_hw_control(adapter);
781
782	if (adapter->wol) {
783		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
784		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
785		em_enable_wakeup(dev);
786	}
787
788	EM_UNLOCK(adapter);
789	ether_ifdetach(adapter->ifp);
790
791	callout_drain(&adapter->timer);
792	callout_drain(&adapter->tx_fifo_timer);
793
794	em_free_pci_resources(adapter);
795	bus_generic_detach(dev);
796	if_free(ifp);
797
798	e1000_remove_device(&adapter->hw);
799	em_free_transmit_structures(adapter);
800	em_free_receive_structures(adapter);
801
802	/* Free Transmit Descriptor ring */
803	if (adapter->tx_desc_base) {
804		em_dma_free(adapter, &adapter->txdma);
805		adapter->tx_desc_base = NULL;
806	}
807
808	/* Free Receive Descriptor ring */
809	if (adapter->rx_desc_base) {
810		em_dma_free(adapter, &adapter->rxdma);
811		adapter->rx_desc_base = NULL;
812	}
813
814	EM_LOCK_DESTROY(adapter);
815
816	return (0);
817}
818
819/*********************************************************************
820 *
821 *  Shutdown entry point
822 *
823 **********************************************************************/
824
825static int
826em_shutdown(device_t dev)
827{
828	return em_suspend(dev);
829}
830
831/*
832 * Suspend/resume device methods.
833 */
834static int
835em_suspend(device_t dev)
836{
837	struct adapter *adapter = device_get_softc(dev);
838
839	EM_LOCK(adapter);
840	em_stop(adapter);
841
842        em_release_manageability(adapter);
843
844        if (((adapter->hw.mac.type == e1000_82573) ||
845            (adapter->hw.mac.type == e1000_ich8lan) ||
846            (adapter->hw.mac.type == e1000_ich9lan)) &&
847            e1000_check_mng_mode(&adapter->hw))
848                em_release_hw_control(adapter);
849
850        if (adapter->wol) {
851                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
852                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
853                em_enable_wakeup(dev);
854        }
855
856	EM_UNLOCK(adapter);
857
858	return bus_generic_suspend(dev);
859}
860
861static int
862em_resume(device_t dev)
863{
864	struct adapter *adapter = device_get_softc(dev);
865	struct ifnet *ifp = adapter->ifp;
866
867	EM_LOCK(adapter);
868	em_init_locked(adapter);
869	em_init_manageability(adapter);
870
871	if ((ifp->if_flags & IFF_UP) &&
872	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
873		em_start_locked(ifp);
874
875	EM_UNLOCK(adapter);
876
877	return bus_generic_resume(dev);
878}
879
880
881/*********************************************************************
882 *  Transmit entry point
883 *
884 *  em_start is called by the stack to initiate a transmit.
885 *  The driver will remain in this routine as long as there are
886 *  packets to transmit and transmit resources are available.
887 *  In case resources are not available stack is notified and
888 *  the packet is requeued.
889 **********************************************************************/
890
891static void
892em_start_locked(struct ifnet *ifp)
893{
894	struct adapter	*adapter = ifp->if_softc;
895	struct mbuf	*m_head;
896
897	EM_LOCK_ASSERT(adapter);
898
899	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
900	    IFF_DRV_RUNNING)
901		return;
902	if (!adapter->link_active)
903		return;
904
905	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
906
907		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
908		if (m_head == NULL)
909			break;
910		/*
911		 *  Encapsulation can modify our pointer, and or make it
912		 *  NULL on failure.  In that event, we can't requeue.
913		 *
914		 *  We now use a pointer to accomodate legacy and
915		 *  advanced transmit functions.
916		 */
917		if (adapter->em_xmit(adapter, &m_head)) {
918			if (m_head == NULL)
919				break;
920			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
921			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
922			break;
923		}
924
925		/* Send a copy of the frame to the BPF listener */
926		ETHER_BPF_MTAP(ifp, m_head);
927
928		/* Set timeout in case hardware has problems transmitting. */
929		adapter->watchdog_timer = EM_TX_TIMEOUT;
930	}
931}
932
933static void
934em_start(struct ifnet *ifp)
935{
936	struct adapter *adapter = ifp->if_softc;
937
938	EM_LOCK(adapter);
939	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
940		em_start_locked(ifp);
941	EM_UNLOCK(adapter);
942}
943
944/*********************************************************************
945 *  Ioctl entry point
946 *
947 *  em_ioctl is called when the user wants to configure the
948 *  interface.
949 *
950 *  return 0 on success, positive on failure
951 **********************************************************************/
952
953static int
954em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
955{
956	struct adapter	*adapter = ifp->if_softc;
957	struct ifreq *ifr = (struct ifreq *)data;
958	struct ifaddr *ifa = (struct ifaddr *)data;
959	int error = 0;
960
961	if (adapter->in_detach)
962		return (error);
963
964	switch (command) {
965	case SIOCSIFADDR:
966		if (ifa->ifa_addr->sa_family == AF_INET) {
967			/*
968			 * XXX
969			 * Since resetting hardware takes a very long time
970			 * and results in link renegotiation we only
971			 * initialize the hardware only when it is absolutely
972			 * required.
973			 */
974			ifp->if_flags |= IFF_UP;
975			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
976				EM_LOCK(adapter);
977				em_init_locked(adapter);
978				EM_UNLOCK(adapter);
979			}
980			arp_ifinit(ifp, ifa);
981		} else
982			error = ether_ioctl(ifp, command, data);
983		break;
984	case SIOCSIFMTU:
985	    {
986		int max_frame_size;
987		uint16_t eeprom_data = 0;
988
989		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
990
991		EM_LOCK(adapter);
992		switch (adapter->hw.mac.type) {
993		case e1000_82573:
994			/*
995			 * 82573 only supports jumbo frames
996			 * if ASPM is disabled.
997			 */
998			e1000_read_nvm(&adapter->hw,
999			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1000			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1001				max_frame_size = ETHER_MAX_LEN;
1002				break;
1003			}
1004			/* Allow Jumbo frames - fall thru */
1005		case e1000_82571:
1006		case e1000_82572:
1007		case e1000_ich9lan:
1008		case e1000_82575:
1009		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1010			max_frame_size = 9234;
1011			break;
1012			/* Adapters that do not support jumbo frames */
1013		case e1000_82542:
1014		case e1000_ich8lan:
1015			max_frame_size = ETHER_MAX_LEN;
1016			break;
1017		default:
1018			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1019		}
1020		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1021		    ETHER_CRC_LEN) {
1022			EM_UNLOCK(adapter);
1023			error = EINVAL;
1024			break;
1025		}
1026
1027		ifp->if_mtu = ifr->ifr_mtu;
1028		adapter->hw.mac.max_frame_size =
1029		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1030		em_init_locked(adapter);
1031		EM_UNLOCK(adapter);
1032		break;
1033	    }
1034	case SIOCSIFFLAGS:
1035		IOCTL_DEBUGOUT("ioctl rcv'd:\
1036		    SIOCSIFFLAGS (Set Interface Flags)");
1037		EM_LOCK(adapter);
1038		if (ifp->if_flags & IFF_UP) {
1039			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1040				if ((ifp->if_flags ^ adapter->if_flags) &
1041				    IFF_PROMISC) {
1042					em_disable_promisc(adapter);
1043					em_set_promisc(adapter);
1044				}
1045			} else
1046				em_init_locked(adapter);
1047		} else
1048			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1049				em_stop(adapter);
1050		adapter->if_flags = ifp->if_flags;
1051		EM_UNLOCK(adapter);
1052		break;
1053	case SIOCADDMULTI:
1054	case SIOCDELMULTI:
1055		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1056		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057			EM_LOCK(adapter);
1058			em_disable_intr(adapter);
1059			em_set_multi(adapter);
1060			if (adapter->hw.mac.type == e1000_82542 &&
1061	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1062				em_initialize_receive_unit(adapter);
1063			}
1064#ifdef DEVICE_POLLING
1065			if (!(ifp->if_capenable & IFCAP_POLLING))
1066#endif
1067				em_enable_intr(adapter);
1068			EM_UNLOCK(adapter);
1069		}
1070		break;
1071	case SIOCSIFMEDIA:
1072		/* Check SOL/IDER usage */
1073		EM_LOCK(adapter);
1074		if (e1000_check_reset_block(&adapter->hw)) {
1075			EM_UNLOCK(adapter);
1076			device_printf(adapter->dev, "Media change is"
1077			    " blocked due to SOL/IDER session.\n");
1078			break;
1079		}
1080		EM_UNLOCK(adapter);
1081	case SIOCGIFMEDIA:
1082		IOCTL_DEBUGOUT("ioctl rcv'd: \
1083		    SIOCxIFMEDIA (Get/Set Interface Media)");
1084		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1085		break;
1086	case SIOCSIFCAP:
1087	    {
1088		int mask, reinit;
1089
1090		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1091		reinit = 0;
1092		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1093#ifdef DEVICE_POLLING
1094		if (mask & IFCAP_POLLING) {
1095			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1096				error = ether_poll_register(em_poll, ifp);
1097				if (error)
1098					return (error);
1099				EM_LOCK(adapter);
1100				em_disable_intr(adapter);
1101				ifp->if_capenable |= IFCAP_POLLING;
1102				EM_UNLOCK(adapter);
1103			} else {
1104				error = ether_poll_deregister(ifp);
1105				/* Enable interrupt even in error case */
1106				EM_LOCK(adapter);
1107				em_enable_intr(adapter);
1108				ifp->if_capenable &= ~IFCAP_POLLING;
1109				EM_UNLOCK(adapter);
1110			}
1111		}
1112#endif
1113		if (mask & IFCAP_HWCSUM) {
1114			ifp->if_capenable ^= IFCAP_HWCSUM;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_TSO4) {
1118			ifp->if_capenable ^= IFCAP_TSO4;
1119			reinit = 1;
1120		}
1121		if (mask & IFCAP_VLAN_HWTAGGING) {
1122			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1123			reinit = 1;
1124		}
1125		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1126			em_init(adapter);
1127		VLAN_CAPABILITIES(ifp);
1128		break;
1129	    }
1130	default:
1131		error = ether_ioctl(ifp, command, data);
1132		break;
1133	}
1134
1135	return (error);
1136}
1137
1138/*********************************************************************
1139 *  Watchdog timer:
1140 *
1141 *  This routine is called from the local timer every second.
1142 *  As long as transmit descriptors are being cleaned the value
1143 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1144 *  and we then reset the device.
1145 *
1146 **********************************************************************/
1147
1148static void
1149em_watchdog(struct adapter *adapter)
1150{
1151
1152	EM_LOCK_ASSERT(adapter);
1153
1154	/*
1155	** The timer is set to 5 every time start queues a packet.
1156	** Then txeof keeps resetting to 5 as long as it cleans at
1157	** least one descriptor.
1158	** Finally, anytime all descriptors are clean the timer is
1159	** set to 0.
1160	*/
1161	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1162		return;
1163
1164	/* If we are in this routine because of pause frames, then
1165	 * don't reset the hardware.
1166	 */
1167	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1168	    E1000_STATUS_TXOFF) {
1169		adapter->watchdog_timer = EM_TX_TIMEOUT;
1170		return;
1171	}
1172
1173	if (e1000_check_for_link(&adapter->hw) == 0)
1174		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1175	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1176	adapter->watchdog_events++;
1177
1178	em_init_locked(adapter);
1179}
1180
1181/*********************************************************************
1182 *  Init entry point
1183 *
1184 *  This routine is used in two ways. It is used by the stack as
1185 *  init entry point in network interface structure. It is also used
1186 *  by the driver as a hw/sw initialization routine to get to a
1187 *  consistent state.
1188 *
1189 *  return 0 on success, positive on failure
1190 **********************************************************************/
1191
1192static void
1193em_init_locked(struct adapter *adapter)
1194{
1195	struct ifnet	*ifp = adapter->ifp;
1196	device_t	dev = adapter->dev;
1197	uint32_t	pba;
1198
1199	INIT_DEBUGOUT("em_init: begin");
1200
1201	EM_LOCK_ASSERT(adapter);
1202
1203	em_stop(adapter);
1204
1205	/*
1206	 * Packet Buffer Allocation (PBA)
1207	 * Writing PBA sets the receive portion of the buffer
1208	 * the remainder is used for the transmit buffer.
1209	 *
1210	 * Devices before the 82547 had a Packet Buffer of 64K.
1211	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1212	 * After the 82547 the buffer was reduced to 40K.
1213	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1214	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1215	 */
1216	switch (adapter->hw.mac.type) {
1217	case e1000_82547:
1218	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1219		if (adapter->hw.mac.max_frame_size > 8192)
1220			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1221		else
1222			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1223		adapter->tx_fifo_head = 0;
1224		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1225		adapter->tx_fifo_size =
1226		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1227		break;
1228	/* Total Packet Buffer on these is 48K */
1229	case e1000_82571:
1230	case e1000_82572:
1231	case e1000_82575:
1232	case e1000_80003es2lan:
1233			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1234		break;
1235	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1236			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1237		break;
1238	case e1000_ich9lan:
1239#define E1000_PBA_10K	0x000A
1240		pba = E1000_PBA_10K;
1241		break;
1242	case e1000_ich8lan:
1243		pba = E1000_PBA_8K;
1244		break;
1245	default:
1246		/* Devices before 82547 had a Packet Buffer of 64K.   */
1247		if (adapter->hw.mac.max_frame_size > 8192)
1248			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1249		else
1250			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1251	}
1252
1253	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1254	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1255
1256	/* Get the latest mac address, User can use a LAA */
1257        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1258              ETHER_ADDR_LEN);
1259
1260	/* Put the address into the Receive Address Array */
1261	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1262
1263	/*
1264	 * With 82571 controllers, LAA may be overwritten
1265	 * due to controller reset from the other port.
1266	 */
1267	if (adapter->hw.mac.type == e1000_82571)
1268                e1000_set_laa_state_82571(&adapter->hw, TRUE);
1269
1270	/* Initialize the hardware */
1271	if (em_hardware_init(adapter)) {
1272		device_printf(dev, "Unable to initialize the hardware\n");
1273		return;
1274	}
1275	em_update_link_status(adapter);
1276
1277	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1278		em_enable_vlans(adapter);
1279
1280	/* Set hardware offload abilities */
1281	ifp->if_hwassist = 0;
1282	if (adapter->hw.mac.type >= e1000_82543) {
1283		if (ifp->if_capenable & IFCAP_TXCSUM)
1284			ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1285		if (ifp->if_capenable & IFCAP_TSO4)
1286			ifp->if_hwassist |= CSUM_TSO;
1287	}
1288
1289	/* Configure for OS presence */
1290	em_init_manageability(adapter);
1291
1292	/* Prepare transmit descriptors and buffers */
1293	if (em_setup_transmit_structures(adapter)) {
1294		device_printf(dev, "Could not setup transmit structures\n");
1295		em_stop(adapter);
1296		return;
1297	}
1298	em_initialize_transmit_unit(adapter);
1299
1300	/* Setup Multicast table */
1301	em_set_multi(adapter);
1302
1303	/* Prepare receive descriptors and buffers */
1304	if (em_setup_receive_structures(adapter)) {
1305		device_printf(dev, "Could not setup receive structures\n");
1306		em_stop(adapter);
1307		return;
1308	}
1309	em_initialize_receive_unit(adapter);
1310
1311	/* Don't lose promiscuous settings */
1312	em_set_promisc(adapter);
1313
1314	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1315	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1316
1317	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1318	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1319
1320#ifdef DEVICE_POLLING
1321	/*
1322	 * Only enable interrupts if we are not polling, make sure
1323	 * they are off otherwise.
1324	 */
1325	if (ifp->if_capenable & IFCAP_POLLING)
1326		em_disable_intr(adapter);
1327	else
1328#endif /* DEVICE_POLLING */
1329		em_enable_intr(adapter);
1330
1331	/* Don't reset the phy next time init gets called */
1332	adapter->hw.phy.reset_disable = TRUE;
1333}
1334
1335static void
1336em_init(void *arg)
1337{
1338	struct adapter *adapter = arg;
1339
1340	EM_LOCK(adapter);
1341	em_init_locked(adapter);
1342	EM_UNLOCK(adapter);
1343}
1344
1345
1346#ifdef DEVICE_POLLING
1347/*********************************************************************
1348 *
1349 *  Legacy polling routine
1350 *
1351 *********************************************************************/
1352static void
1353em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1354{
1355	struct adapter *adapter = ifp->if_softc;
1356	uint32_t reg_icr;
1357
1358	EM_LOCK(adapter);
1359	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1360		EM_UNLOCK(adapter);
1361		return;
1362	}
1363
1364	if (cmd == POLL_AND_CHECK_STATUS) {
1365		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1366		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1367			callout_stop(&adapter->timer);
1368			adapter->hw.mac.get_link_status = 1;
1369			e1000_check_for_link(&adapter->hw);
1370			em_update_link_status(adapter);
1371			callout_reset(&adapter->timer, hz,
1372			    em_local_timer, adapter);
1373		}
1374	}
1375	em_rxeof(adapter, count);
1376	em_txeof(adapter);
1377
1378	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1379		em_start_locked(ifp);
1380	EM_UNLOCK(adapter);
1381}
1382
1383/*********************************************************************
1384 *
1385 *  Legacy Interrupt Service routine
1386 *
1387 *********************************************************************/
1388
1389static void
1390em_intr(void *arg)
1391{
1392	struct adapter	*adapter = arg;
1393	struct ifnet	*ifp;
1394	uint32_t	reg_icr;
1395
1396	EM_LOCK(adapter);
1397	ifp = adapter->ifp;
1398
1399	if (ifp->if_capenable & IFCAP_POLLING) {
1400		EM_UNLOCK(adapter);
1401		return;
1402	}
1403
1404	for (;;) {
1405		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1406
1407		if (adapter->hw.mac.type >= e1000_82571 &&
1408	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1409			break;
1410		else if (reg_icr == 0)
1411			break;
1412
1413		/*
1414		 * XXX: some laptops trigger several spurious interrupts
1415		 * on em(4) when in the resume cycle. The ICR register
1416		 * reports all-ones value in this case. Processing such
1417		 * interrupts would lead to a freeze. I don't know why.
1418		 */
1419		if (reg_icr == 0xffffffff)
1420			break;
1421
1422		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1423			em_rxeof(adapter, -1);
1424			em_txeof(adapter);
1425		}
1426
1427		/* Link status change */
1428		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1429			callout_stop(&adapter->timer);
1430			adapter->hw.mac.get_link_status = 1;
1431			e1000_check_for_link(&adapter->hw);
1432			em_update_link_status(adapter);
1433			callout_reset(&adapter->timer, hz,
1434			    em_local_timer, adapter);
1435		}
1436
1437		if (reg_icr & E1000_ICR_RXO)
1438			adapter->rx_overruns++;
1439	}
1440
1441	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1442	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1443		em_start_locked(ifp);
1444	EM_UNLOCK(adapter);
1445}
1446
1447#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1448
1449static void
1450em_handle_link(void *context, int pending)
1451{
1452	struct adapter	*adapter = context;
1453	struct ifnet *ifp;
1454
1455	ifp = adapter->ifp;
1456
1457	EM_LOCK(adapter);
1458	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1459		EM_UNLOCK(adapter);
1460		return;
1461	}
1462
1463	callout_stop(&adapter->timer);
1464	adapter->hw.mac.get_link_status = 1;
1465	e1000_check_for_link(&adapter->hw);
1466	em_update_link_status(adapter);
1467	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1468	EM_UNLOCK(adapter);
1469}
1470
1471static void
1472em_handle_rxtx(void *context, int pending)
1473{
1474	struct adapter	*adapter = context;
1475	struct ifnet	*ifp;
1476
1477	ifp = adapter->ifp;
1478
1479	/*
1480	 * TODO:
1481	 * It should be possible to run the tx clean loop without the lock.
1482	 */
1483	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1484		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1485			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1486		EM_LOCK(adapter);
1487		em_txeof(adapter);
1488
1489		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1490			em_start_locked(ifp);
1491		EM_UNLOCK(adapter);
1492	}
1493
1494	em_enable_intr(adapter);
1495}
1496
1497/*********************************************************************
1498 *
1499 *  Fast Interrupt Service routine
1500 *
1501 *********************************************************************/
1502static int
1503em_intr_fast(void *arg)
1504{
1505	struct adapter	*adapter = arg;
1506	struct ifnet	*ifp;
1507	uint32_t	reg_icr;
1508
1509	ifp = adapter->ifp;
1510
1511	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1512
1513	/* Hot eject?  */
1514	if (reg_icr == 0xffffffff)
1515		return (FILTER_STRAY);
1516
1517	/* Definitely not our interrupt.  */
1518	if (reg_icr == 0x0)
1519		return (FILTER_STRAY);
1520
1521	/*
1522	 * Starting with the 82571 chip, bit 31 should be used to
1523	 * determine whether the interrupt belongs to us.
1524	 */
1525	if (adapter->hw.mac.type >= e1000_82571 &&
1526	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1527		return (FILTER_STRAY);
1528
1529	/*
1530	 * Mask interrupts until the taskqueue is finished running.  This is
1531	 * cheap, just assume that it is needed.  This also works around the
1532	 * MSI message reordering errata on certain systems.
1533	 */
1534	em_disable_intr(adapter);
1535	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1536
1537	/* Link status change */
1538	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1539		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1540
1541	if (reg_icr & E1000_ICR_RXO)
1542		adapter->rx_overruns++;
1543	return (FILTER_HANDLED);
1544}
1545#endif /* ! DEVICE_POLLING */
1546
1547/*********************************************************************
1548 *
1549 *  Media Ioctl callback
1550 *
1551 *  This routine is called whenever the user queries the status of
1552 *  the interface using ifconfig.
1553 *
1554 **********************************************************************/
1555static void
1556em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1557{
1558	struct adapter *adapter = ifp->if_softc;
1559	u_char fiber_type = IFM_1000_SX;
1560
1561	INIT_DEBUGOUT("em_media_status: begin");
1562
1563	EM_LOCK(adapter);
1564	e1000_check_for_link(&adapter->hw);
1565	em_update_link_status(adapter);
1566
1567	ifmr->ifm_status = IFM_AVALID;
1568	ifmr->ifm_active = IFM_ETHER;
1569
1570	if (!adapter->link_active) {
1571		EM_UNLOCK(adapter);
1572		return;
1573	}
1574
1575	ifmr->ifm_status |= IFM_ACTIVE;
1576
1577	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1578	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1579		if (adapter->hw.mac.type == e1000_82545)
1580			fiber_type = IFM_1000_LX;
1581		ifmr->ifm_active |= fiber_type | IFM_FDX;
1582	} else {
1583		switch (adapter->link_speed) {
1584		case 10:
1585			ifmr->ifm_active |= IFM_10_T;
1586			break;
1587		case 100:
1588			ifmr->ifm_active |= IFM_100_TX;
1589			break;
1590		case 1000:
1591			ifmr->ifm_active |= IFM_1000_T;
1592			break;
1593		}
1594		if (adapter->link_duplex == FULL_DUPLEX)
1595			ifmr->ifm_active |= IFM_FDX;
1596		else
1597			ifmr->ifm_active |= IFM_HDX;
1598	}
1599	EM_UNLOCK(adapter);
1600}
1601
1602/*********************************************************************
1603 *
1604 *  Media Ioctl callback
1605 *
1606 *  This routine is called when the user changes speed/duplex using
1607 *  media/mediopt option with ifconfig.
1608 *
1609 **********************************************************************/
1610static int
1611em_media_change(struct ifnet *ifp)
1612{
1613	struct adapter *adapter = ifp->if_softc;
1614	struct ifmedia  *ifm = &adapter->media;
1615
1616	INIT_DEBUGOUT("em_media_change: begin");
1617
1618	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1619		return (EINVAL);
1620
1621	EM_LOCK(adapter);
1622	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1623	case IFM_AUTO:
1624		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1625		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1626		break;
1627	case IFM_1000_LX:
1628	case IFM_1000_SX:
1629	case IFM_1000_T:
1630		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1631		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1632		break;
1633	case IFM_100_TX:
1634		adapter->hw.mac.autoneg = FALSE;
1635		adapter->hw.phy.autoneg_advertised = 0;
1636		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1637			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1638		else
1639			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1640		break;
1641	case IFM_10_T:
1642		adapter->hw.mac.autoneg = FALSE;
1643		adapter->hw.phy.autoneg_advertised = 0;
1644		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1645			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1646		else
1647			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1648		break;
1649	default:
1650		device_printf(adapter->dev, "Unsupported media type\n");
1651	}
1652
1653	/* As the speed/duplex settings my have changed we need to
1654	 * reset the PHY.
1655	 */
1656	adapter->hw.phy.reset_disable = FALSE;
1657
1658	em_init_locked(adapter);
1659	EM_UNLOCK(adapter);
1660
1661	return (0);
1662}
1663
1664/*********************************************************************
1665 *
1666 *  This routine maps the mbufs to tx descriptors.
1667 *
1668 *  return 0 on success, positive on failure
1669 **********************************************************************/
1670
1671static int
1672em_encap(struct adapter *adapter, struct mbuf **m_headp)
1673{
1674	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1675	bus_dmamap_t		map;
1676	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1677	struct e1000_tx_desc	*ctxd = NULL;
1678	struct mbuf		*m_head;
1679	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1680	int			nsegs, i, j, first, last = 0;
1681	int			error, do_tso, tso_desc = 0;
1682
1683	m_head = *m_headp;
1684	txd_upper = txd_lower = txd_used = txd_saved = 0;
1685
1686	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1687
1688        /*
1689         * Force a cleanup if number of TX descriptors
1690         * available hits the threshold
1691         */
1692	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1693		em_txeof(adapter);
1694		/* Now do we at least have a minimal? */
1695		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1696			adapter->no_tx_desc_avail1++;
1697			return (ENOBUFS);
1698		}
1699	}
1700
1701
1702	/*
1703	 * TSO workaround:
1704	 *  If an mbuf is only header we need
1705	 *     to pull 4 bytes of data into it.
1706	 */
1707	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1708		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1709		*m_headp = m_head;
1710		if (m_head == NULL)
1711			return (ENOBUFS);
1712	}
1713
1714	/*
1715	 * Map the packet for DMA
1716	 *
1717	 * Capture the first descriptor index,
1718	 * this descriptor will have the index
1719	 * of the EOP which is the only one that
1720	 * now gets a DONE bit writeback.
1721	 */
1722	first = adapter->next_avail_tx_desc;
1723	tx_buffer = &adapter->tx_buffer_area[first];
1724	tx_buffer_mapped = tx_buffer;
1725	map = tx_buffer->map;
1726
1727	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1728	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1729
1730	/*
1731	 * There are two types of errors we can (try) to handle:
1732	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1733	 *   out of segments.  Defragment the mbuf chain and try again.
1734	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1735	 *   at this point in time.  Defer sending and try again later.
1736	 * All other errors, in particular EINVAL, are fatal and prevent the
1737	 * mbuf chain from ever going through.  Drop it and report error.
1738	 */
1739	if (error == EFBIG) {
1740		struct mbuf *m;
1741
1742		m = m_defrag(*m_headp, M_DONTWAIT);
1743		if (m == NULL) {
1744			adapter->mbuf_alloc_failed++;
1745			m_freem(*m_headp);
1746			*m_headp = NULL;
1747			return (ENOBUFS);
1748		}
1749		*m_headp = m;
1750
1751		/* Try it again */
1752		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1753		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1754
1755		if (error == ENOMEM) {
1756			adapter->no_tx_dma_setup++;
1757			return (error);
1758		} else if (error != 0) {
1759			adapter->no_tx_dma_setup++;
1760			m_freem(*m_headp);
1761			*m_headp = NULL;
1762			return (error);
1763		}
1764	} else if (error == ENOMEM) {
1765		adapter->no_tx_dma_setup++;
1766		return (error);
1767	} else if (error != 0) {
1768		adapter->no_tx_dma_setup++;
1769		m_freem(*m_headp);
1770		*m_headp = NULL;
1771		return (error);
1772	}
1773
1774	/*
1775	 * TSO Hardware workaround, if this packet is not
1776	 * TSO, and is only a single descriptor long, and
1777	 * it follows a TSO burst, then we need to add a
1778	 * sentinel descriptor to prevent premature writeback.
1779	 */
1780	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1781		if (nsegs == 1)
1782			tso_desc = TRUE;
1783		adapter->tx_tso = FALSE;
1784	}
1785
1786        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1787                adapter->no_tx_desc_avail2++;
1788		bus_dmamap_unload(adapter->txtag, map);
1789		return (ENOBUFS);
1790        }
1791	m_head = *m_headp;
1792
1793	/* Do hardware assists */
1794	if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower))
1795		/* we need to make a final sentinel transmit desc */
1796		tso_desc = TRUE;
1797	else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1798		em_transmit_checksum_setup(adapter,  m_head,
1799		    &txd_upper, &txd_lower);
1800
1801	i = adapter->next_avail_tx_desc;
1802	if (adapter->pcix_82544)
1803		txd_saved = i;
1804
1805	/* Set up our transmit descriptors */
1806	for (j = 0; j < nsegs; j++) {
1807		bus_size_t seg_len;
1808		bus_addr_t seg_addr;
1809		/* If adapter is 82544 and on PCIX bus */
1810		if(adapter->pcix_82544) {
1811			DESC_ARRAY	desc_array;
1812			uint32_t	array_elements, counter;
1813			/*
1814			 * Check the Address and Length combination and
1815			 * split the data accordingly
1816			 */
1817			array_elements = em_fill_descriptors(segs[j].ds_addr,
1818			    segs[j].ds_len, &desc_array);
1819			for (counter = 0; counter < array_elements; counter++) {
1820				if (txd_used == adapter->num_tx_desc_avail) {
1821					adapter->next_avail_tx_desc = txd_saved;
1822					adapter->no_tx_desc_avail2++;
1823					bus_dmamap_unload(adapter->txtag, map);
1824					return (ENOBUFS);
1825				}
1826				tx_buffer = &adapter->tx_buffer_area[i];
1827				ctxd = &adapter->tx_desc_base[i];
1828				ctxd->buffer_addr = htole64(
1829				    desc_array.descriptor[counter].address);
1830				ctxd->lower.data = htole32(
1831				    (adapter->txd_cmd | txd_lower | (uint16_t)
1832				    desc_array.descriptor[counter].length));
1833				ctxd->upper.data =
1834				    htole32((txd_upper));
1835				last = i;
1836				if (++i == adapter->num_tx_desc)
1837                                         i = 0;
1838				tx_buffer->m_head = NULL;
1839				tx_buffer->next_eop = -1;
1840				txd_used++;
1841                        }
1842		} else {
1843			tx_buffer = &adapter->tx_buffer_area[i];
1844			ctxd = &adapter->tx_desc_base[i];
1845			seg_addr = segs[j].ds_addr;
1846			seg_len  = segs[j].ds_len;
1847			/*
1848			** TSO Workaround:
1849			** If this is the last descriptor, we want to
1850			** split it so we have a small final sentinel
1851			*/
1852			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1853				seg_len -= 4;
1854				ctxd->buffer_addr = htole64(seg_addr);
1855				ctxd->lower.data = htole32(
1856				adapter->txd_cmd | txd_lower | seg_len);
1857				ctxd->upper.data =
1858				    htole32(txd_upper);
1859				if (++i == adapter->num_tx_desc)
1860					i = 0;
1861				/* Now make the sentinel */
1862				++txd_used; /* using an extra txd */
1863				ctxd = &adapter->tx_desc_base[i];
1864				tx_buffer = &adapter->tx_buffer_area[i];
1865				ctxd->buffer_addr =
1866				    htole64(seg_addr + seg_len);
1867				ctxd->lower.data = htole32(
1868				adapter->txd_cmd | txd_lower | 4);
1869				ctxd->upper.data =
1870				    htole32(txd_upper);
1871				last = i;
1872				if (++i == adapter->num_tx_desc)
1873					i = 0;
1874			} else {
1875				ctxd->buffer_addr = htole64(seg_addr);
1876				ctxd->lower.data = htole32(
1877				adapter->txd_cmd | txd_lower | seg_len);
1878				ctxd->upper.data =
1879				    htole32(txd_upper);
1880				last = i;
1881				if (++i == adapter->num_tx_desc)
1882					i = 0;
1883			}
1884			tx_buffer->m_head = NULL;
1885			tx_buffer->next_eop = -1;
1886		}
1887	}
1888
1889	adapter->next_avail_tx_desc = i;
1890	if (adapter->pcix_82544)
1891		adapter->num_tx_desc_avail -= txd_used;
1892	else {
1893		adapter->num_tx_desc_avail -= nsegs;
1894		if (tso_desc) /* TSO used an extra for sentinel */
1895			adapter->num_tx_desc_avail -= txd_used;
1896	}
1897
1898	if (m_head->m_flags & M_VLANTAG) {
1899		/* Set the vlan id. */
1900		ctxd->upper.fields.special =
1901		    htole16(m_head->m_pkthdr.ether_vtag);
1902                /* Tell hardware to add tag */
1903                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1904        }
1905
1906        tx_buffer->m_head = m_head;
1907	tx_buffer_mapped->map = tx_buffer->map;
1908	tx_buffer->map = map;
1909        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1910
1911        /*
1912         * Last Descriptor of Packet
1913	 * needs End Of Packet (EOP)
1914	 * and Report Status (RS)
1915         */
1916        ctxd->lower.data |=
1917	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1918	/*
1919	 * Keep track in the first buffer which
1920	 * descriptor will be written back
1921	 */
1922	tx_buffer = &adapter->tx_buffer_area[first];
1923	tx_buffer->next_eop = last;
1924
1925	/*
1926	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1927	 * that this frame is available to transmit.
1928	 */
1929	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1930	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1931	if (adapter->hw.mac.type == e1000_82547 &&
1932	    adapter->link_duplex == HALF_DUPLEX)
1933		em_82547_move_tail(adapter);
1934	else {
1935		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1936		if (adapter->hw.mac.type == e1000_82547)
1937			em_82547_update_fifo_head(adapter,
1938			    m_head->m_pkthdr.len);
1939	}
1940
1941	return (0);
1942}
1943
1944/*********************************************************************
1945 *
1946 *  This routine maps the mbufs to Advanced TX descriptors.
1947 *  used by the 82575 adapter. It also needs no workarounds.
1948 *
1949 **********************************************************************/
1950
1951static int
1952em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1953{
1954	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1955	bus_dmamap_t		map;
1956	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1957	union e1000_adv_tx_desc	*txd = NULL;
1958	struct mbuf		*m_head;
1959	u32			olinfo_status = 0, cmd_type_len = 0;
1960	u32			paylen = 0;
1961	int			nsegs, i, j, error, first, last = 0;
1962
1963	m_head = *m_headp;
1964
1965
1966	/* Set basic descriptor constants */
1967	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1968	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1969
1970        /*
1971         * Force a cleanup if number of TX descriptors
1972         * available hits the threshold
1973         */
1974	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1975		em_txeof(adapter);
1976		/* Now do we at least have a minimal? */
1977		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1978			adapter->no_tx_desc_avail1++;
1979			return (ENOBUFS);
1980		}
1981	}
1982
1983	/*
1984         * Map the packet for DMA.
1985	 *
1986	 * Capture the first descriptor index,
1987	 * this descriptor will have the index
1988	 * of the EOP which is the only one that
1989	 * now gets a DONE bit writeback.
1990	 */
1991	first = adapter->next_avail_tx_desc;
1992	tx_buffer = &adapter->tx_buffer_area[first];
1993	tx_buffer_mapped = tx_buffer;
1994	map = tx_buffer->map;
1995
1996	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1997	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1998
1999	if (error == EFBIG) {
2000		struct mbuf *m;
2001
2002		m = m_defrag(*m_headp, M_DONTWAIT);
2003		if (m == NULL) {
2004			adapter->mbuf_alloc_failed++;
2005			m_freem(*m_headp);
2006			*m_headp = NULL;
2007			return (ENOBUFS);
2008		}
2009		*m_headp = m;
2010
2011		/* Try it again */
2012		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2013		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2014
2015		if (error == ENOMEM) {
2016			adapter->no_tx_dma_setup++;
2017			return (error);
2018		} else if (error != 0) {
2019			adapter->no_tx_dma_setup++;
2020			m_freem(*m_headp);
2021			*m_headp = NULL;
2022			return (error);
2023		}
2024	} else if (error == ENOMEM) {
2025		adapter->no_tx_dma_setup++;
2026		return (error);
2027	} else if (error != 0) {
2028		adapter->no_tx_dma_setup++;
2029		m_freem(*m_headp);
2030		*m_headp = NULL;
2031		return (error);
2032	}
2033
2034	/* Check again to be sure we have enough descriptors */
2035        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2036                adapter->no_tx_desc_avail2++;
2037		bus_dmamap_unload(adapter->txtag, map);
2038		return (ENOBUFS);
2039        }
2040	m_head = *m_headp;
2041
2042        /*
2043         * Set up the context descriptor:
2044         * used when any hardware offload is done.
2045	 * This includes CSUM, VLAN, and TSO. It
2046	 * will use the first descriptor.
2047         */
2048	/* First try TSO */
2049	if (em_tso_adv_setup(adapter, m_head, &paylen)) {
2050		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2051		olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2052		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2053		olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2054	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
2055		if (em_tx_adv_ctx_setup(adapter, m_head))
2056			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2057	}
2058
2059	/* Set up our transmit descriptors */
2060	i = adapter->next_avail_tx_desc;
2061	for (j = 0; j < nsegs; j++) {
2062		bus_size_t seg_len;
2063		bus_addr_t seg_addr;
2064
2065		tx_buffer = &adapter->tx_buffer_area[i];
2066		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2067		seg_addr = segs[j].ds_addr;
2068		seg_len  = segs[j].ds_len;
2069
2070		txd->read.buffer_addr = htole64(seg_addr);
2071		txd->read.cmd_type_len = htole32(
2072		    adapter->txd_cmd | cmd_type_len | seg_len);
2073		txd->read.olinfo_status = htole32(olinfo_status);
2074		last = i;
2075		if (++i == adapter->num_tx_desc)
2076			i = 0;
2077		tx_buffer->m_head = NULL;
2078		tx_buffer->next_eop = -1;
2079	}
2080
2081	adapter->next_avail_tx_desc = i;
2082	adapter->num_tx_desc_avail -= nsegs;
2083
2084        tx_buffer->m_head = m_head;
2085	tx_buffer_mapped->map = tx_buffer->map;
2086	tx_buffer->map = map;
2087        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2088
2089        /*
2090         * Last Descriptor of Packet
2091	 * needs End Of Packet (EOP)
2092	 * and Report Status (RS)
2093         */
2094        txd->read.cmd_type_len |=
2095	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2096	/*
2097	 * Keep track in the first buffer which
2098	 * descriptor will be written back
2099	 */
2100	tx_buffer = &adapter->tx_buffer_area[first];
2101	tx_buffer->next_eop = last;
2102
2103	/*
2104	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2105	 * that this frame is available to transmit.
2106	 */
2107	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2108	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2109	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2110
2111	return (0);
2112
2113}
2114
2115/*********************************************************************
2116 *
2117 * 82547 workaround to avoid controller hang in half-duplex environment.
2118 * The workaround is to avoid queuing a large packet that would span
2119 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2120 * in this case. We do that only when FIFO is quiescent.
2121 *
2122 **********************************************************************/
2123static void
2124em_82547_move_tail(void *arg)
2125{
2126	struct adapter *adapter = arg;
2127	uint16_t hw_tdt;
2128	uint16_t sw_tdt;
2129	struct e1000_tx_desc *tx_desc;
2130	uint16_t length = 0;
2131	boolean_t eop = 0;
2132
2133	EM_LOCK_ASSERT(adapter);
2134
2135	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2136	sw_tdt = adapter->next_avail_tx_desc;
2137
2138	while (hw_tdt != sw_tdt) {
2139		tx_desc = &adapter->tx_desc_base[hw_tdt];
2140		length += tx_desc->lower.flags.length;
2141		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2142		if (++hw_tdt == adapter->num_tx_desc)
2143			hw_tdt = 0;
2144
2145		if (eop) {
2146			if (em_82547_fifo_workaround(adapter, length)) {
2147				adapter->tx_fifo_wrk_cnt++;
2148				callout_reset(&adapter->tx_fifo_timer, 1,
2149					em_82547_move_tail, adapter);
2150				break;
2151			}
2152			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2153			em_82547_update_fifo_head(adapter, length);
2154			length = 0;
2155		}
2156	}
2157}
2158
2159static int
2160em_82547_fifo_workaround(struct adapter *adapter, int len)
2161{
2162	int fifo_space, fifo_pkt_len;
2163
2164	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2165
2166	if (adapter->link_duplex == HALF_DUPLEX) {
2167		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2168
2169		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2170			if (em_82547_tx_fifo_reset(adapter))
2171				return (0);
2172			else
2173				return (1);
2174		}
2175	}
2176
2177	return (0);
2178}
2179
2180static void
2181em_82547_update_fifo_head(struct adapter *adapter, int len)
2182{
2183	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2184
2185	/* tx_fifo_head is always 16 byte aligned */
2186	adapter->tx_fifo_head += fifo_pkt_len;
2187	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2188		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2189	}
2190}
2191
2192
2193static int
2194em_82547_tx_fifo_reset(struct adapter *adapter)
2195{
2196	uint32_t tctl;
2197
2198	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2199	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2200	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2201	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2202	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2203	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2204	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2205		/* Disable TX unit */
2206		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2207		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2208		    tctl & ~E1000_TCTL_EN);
2209
2210		/* Reset FIFO pointers */
2211		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2212		    adapter->tx_head_addr);
2213		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2214		    adapter->tx_head_addr);
2215		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2216		    adapter->tx_head_addr);
2217		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2218		    adapter->tx_head_addr);
2219
2220		/* Re-enable TX unit */
2221		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2222		E1000_WRITE_FLUSH(&adapter->hw);
2223
2224		adapter->tx_fifo_head = 0;
2225		adapter->tx_fifo_reset_cnt++;
2226
2227		return (TRUE);
2228	}
2229	else {
2230		return (FALSE);
2231	}
2232}
2233
2234static void
2235em_set_promisc(struct adapter *adapter)
2236{
2237	struct ifnet	*ifp = adapter->ifp;
2238	uint32_t	reg_rctl;
2239
2240	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2241
2242	if (ifp->if_flags & IFF_PROMISC) {
2243		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2244		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245	} else if (ifp->if_flags & IFF_ALLMULTI) {
2246		reg_rctl |= E1000_RCTL_MPE;
2247		reg_rctl &= ~E1000_RCTL_UPE;
2248		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249	}
2250}
2251
2252static void
2253em_disable_promisc(struct adapter *adapter)
2254{
2255	uint32_t	reg_rctl;
2256
2257	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2258
2259	reg_rctl &=  (~E1000_RCTL_UPE);
2260	reg_rctl &=  (~E1000_RCTL_MPE);
2261	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2262}
2263
2264
2265/*********************************************************************
2266 *  Multicast Update
2267 *
2268 *  This routine is called whenever multicast address list is updated.
2269 *
2270 **********************************************************************/
2271
2272static void
2273em_set_multi(struct adapter *adapter)
2274{
2275	struct ifnet	*ifp = adapter->ifp;
2276	struct ifmultiaddr *ifma;
2277	uint32_t reg_rctl = 0;
2278	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2279	int mcnt = 0;
2280
2281	IOCTL_DEBUGOUT("em_set_multi: begin");
2282
2283	if (adapter->hw.mac.type == e1000_82542 &&
2284	    adapter->hw.revision_id == E1000_REVISION_2) {
2285		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2286		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2287			e1000_pci_clear_mwi(&adapter->hw);
2288		reg_rctl |= E1000_RCTL_RST;
2289		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2290		msec_delay(5);
2291	}
2292
2293	IF_ADDR_LOCK(ifp);
2294	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2295		if (ifma->ifma_addr->sa_family != AF_LINK)
2296			continue;
2297
2298		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2299			break;
2300
2301		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2302		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2303		mcnt++;
2304	}
2305	IF_ADDR_UNLOCK(ifp);
2306
2307	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2308		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2309		reg_rctl |= E1000_RCTL_MPE;
2310		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2311	} else
2312		e1000_mc_addr_list_update(&adapter->hw, mta,
2313		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2314
2315	if (adapter->hw.mac.type == e1000_82542 &&
2316	    adapter->hw.revision_id == E1000_REVISION_2) {
2317		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2318		reg_rctl &= ~E1000_RCTL_RST;
2319		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2320		msec_delay(5);
2321		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2322			e1000_pci_set_mwi(&adapter->hw);
2323	}
2324}
2325
2326
2327/*********************************************************************
2328 *  Timer routine
2329 *
2330 *  This routine checks for link status and updates statistics.
2331 *
2332 **********************************************************************/
2333
2334static void
2335em_local_timer(void *arg)
2336{
2337	struct adapter	*adapter = arg;
2338	struct ifnet	*ifp = adapter->ifp;
2339
2340	EM_LOCK_ASSERT(adapter);
2341
2342	e1000_check_for_link(&adapter->hw);
2343	em_update_link_status(adapter);
2344	em_update_stats_counters(adapter);
2345
2346	/* Check for 82571 LAA reset by other port */
2347	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2348		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2349
2350	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2351		em_print_hw_stats(adapter);
2352
2353	em_smartspeed(adapter);
2354
2355	/*
2356	 * Each second we check the watchdog to
2357	 * protect against hardware hangs.
2358	 */
2359	em_watchdog(adapter);
2360
2361	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2362}
2363
2364static void
2365em_update_link_status(struct adapter *adapter)
2366{
2367	struct ifnet *ifp = adapter->ifp;
2368	device_t dev = adapter->dev;
2369
2370	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2371	    E1000_STATUS_LU) {
2372		if (adapter->link_active == 0) {
2373			e1000_get_speed_and_duplex(&adapter->hw,
2374			    &adapter->link_speed, &adapter->link_duplex);
2375			/* Check if we must disable SPEED_MODE bit on PCI-E */
2376			if ((adapter->link_speed != SPEED_1000) &&
2377			    ((adapter->hw.mac.type == e1000_82571) ||
2378			    (adapter->hw.mac.type == e1000_82572))) {
2379				int tarc0;
2380
2381				tarc0 = E1000_READ_REG(&adapter->hw,
2382				    E1000_TARC0);
2383				tarc0 &= ~SPEED_MODE_BIT;
2384				E1000_WRITE_REG(&adapter->hw,
2385				    E1000_TARC0, tarc0);
2386			}
2387			if (bootverbose)
2388				device_printf(dev, "Link is up %d Mbps %s\n",
2389				    adapter->link_speed,
2390				    ((adapter->link_duplex == FULL_DUPLEX) ?
2391				    "Full Duplex" : "Half Duplex"));
2392			adapter->link_active = 1;
2393			adapter->smartspeed = 0;
2394			ifp->if_baudrate = adapter->link_speed * 1000000;
2395			if_link_state_change(ifp, LINK_STATE_UP);
2396		}
2397	} else {
2398		if (adapter->link_active == 1) {
2399			ifp->if_baudrate = adapter->link_speed = 0;
2400			adapter->link_duplex = 0;
2401			if (bootverbose)
2402				device_printf(dev, "Link is Down\n");
2403			adapter->link_active = 0;
2404			if_link_state_change(ifp, LINK_STATE_DOWN);
2405		}
2406	}
2407}
2408
2409/*********************************************************************
2410 *
2411 *  This routine disables all traffic on the adapter by issuing a
2412 *  global reset on the MAC and deallocates TX/RX buffers.
2413 *
2414 **********************************************************************/
2415
2416static void
2417em_stop(void *arg)
2418{
2419	struct adapter	*adapter = arg;
2420	struct ifnet	*ifp = adapter->ifp;
2421
2422	EM_LOCK_ASSERT(adapter);
2423
2424	INIT_DEBUGOUT("em_stop: begin");
2425
2426	em_disable_intr(adapter);
2427	callout_stop(&adapter->timer);
2428	callout_stop(&adapter->tx_fifo_timer);
2429	em_free_transmit_structures(adapter);
2430	em_free_receive_structures(adapter);
2431
2432	/* Tell the stack that the interface is no longer active */
2433	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2434
2435	e1000_reset_hw(&adapter->hw);
2436	if (adapter->hw.mac.type >= e1000_82544)
2437		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2438}
2439
2440
2441/*********************************************************************
2442 *
2443 *  Determine hardware revision.
2444 *
2445 **********************************************************************/
2446static void
2447em_identify_hardware(struct adapter *adapter)
2448{
2449	device_t dev = adapter->dev;
2450
2451	/* Make sure our PCI config space has the necessary stuff set */
2452	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2453	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2454	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2455		device_printf(dev, "Memory Access and/or Bus Master bits "
2456		    "were not set!\n");
2457		adapter->hw.bus.pci_cmd_word |=
2458		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2459		pci_write_config(dev, PCIR_COMMAND,
2460		    adapter->hw.bus.pci_cmd_word, 2);
2461	}
2462
2463	/* Save off the information about this board */
2464	adapter->hw.vendor_id = pci_get_vendor(dev);
2465	adapter->hw.device_id = pci_get_device(dev);
2466	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2467	adapter->hw.subsystem_vendor_id =
2468	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2469	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2470
2471	/* Do Shared Code Init and Setup */
2472	if (e1000_set_mac_type(&adapter->hw)) {
2473		device_printf(dev, "Setup init failure\n");
2474		return;
2475	}
2476}
2477
2478static int
2479em_allocate_pci_resources(struct adapter *adapter)
2480{
2481	device_t	dev = adapter->dev;
2482	int		val, rid;
2483
2484	rid = PCIR_BAR(0);
2485	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2486	    &rid, RF_ACTIVE);
2487	if (adapter->res_memory == NULL) {
2488		device_printf(dev, "Unable to allocate bus resource: memory\n");
2489		return (ENXIO);
2490	}
2491	adapter->osdep.mem_bus_space_tag =
2492	    rman_get_bustag(adapter->res_memory);
2493	adapter->osdep.mem_bus_space_handle =
2494	    rman_get_bushandle(adapter->res_memory);
2495	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2496
2497	/* Only older adapters use IO mapping */
2498	if ((adapter->hw.mac.type > e1000_82543) &&
2499	    (adapter->hw.mac.type < e1000_82571)) {
2500		/* Figure our where our IO BAR is ? */
2501		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2502			val = pci_read_config(dev, rid, 4);
2503			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2504				adapter->io_rid = rid;
2505				break;
2506			}
2507			rid += 4;
2508			/* check for 64bit BAR */
2509			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2510				rid += 4;
2511		}
2512		if (rid >= PCIR_CIS) {
2513			device_printf(dev, "Unable to locate IO BAR\n");
2514			return (ENXIO);
2515		}
2516		adapter->res_ioport = bus_alloc_resource_any(dev,
2517		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2518		if (adapter->res_ioport == NULL) {
2519			device_printf(dev, "Unable to allocate bus resource: "
2520			    "ioport\n");
2521			return (ENXIO);
2522		}
2523		adapter->hw.io_base = 0;
2524		adapter->osdep.io_bus_space_tag =
2525		    rman_get_bustag(adapter->res_ioport);
2526		adapter->osdep.io_bus_space_handle =
2527		    rman_get_bushandle(adapter->res_ioport);
2528	}
2529
2530	/*
2531	 * Setup MSI/X or MSI if PCI Express
2532	 * only the latest can use MSI/X and
2533	 * real support for it is forthcoming
2534	 */
2535	adapter->msi = 0; /* Set defaults */
2536	rid = 0x0;
2537	if (adapter->hw.mac.type >= e1000_82575) {
2538		/*
2539		 * Setup MSI/X
2540		 */
2541		rid = PCIR_BAR(EM_MSIX_BAR);
2542		adapter->msix_mem = bus_alloc_resource_any(dev,
2543		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2544        	if (!adapter->msix_mem) {
2545                	device_printf(dev,"Unable to map MSIX table \n");
2546                        return (ENXIO);
2547        	}
2548		/*
2549		 * Eventually this may be used
2550		 * for Multiqueue, for now we will
2551		 * just use one vector.
2552		 *
2553        	 * val = pci_msix_count(dev);
2554		 */
2555		val = 1;
2556		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2557                	rid = 1;
2558                	adapter->msi = 1;
2559		}
2560	} else if (adapter->hw.mac.type >= e1000_82571) {
2561        	val = pci_msi_count(dev);
2562        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2563                	rid = 1;
2564                	adapter->msi = 1;
2565        	}
2566	}
2567	adapter->res_interrupt = bus_alloc_resource_any(dev,
2568	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2569	if (adapter->res_interrupt == NULL) {
2570		device_printf(dev, "Unable to allocate bus resource: "
2571		    "interrupt\n");
2572		return (ENXIO);
2573	}
2574
2575	adapter->hw.back = &adapter->osdep;
2576
2577	return (0);
2578}
2579
2580/*********************************************************************
2581 *
2582 *  Setup the appropriate Interrupt handlers.
2583 *
2584 **********************************************************************/
2585int
2586em_allocate_intr(struct adapter *adapter)
2587{
2588	device_t dev = adapter->dev;
2589	int error;
2590
2591	/* Manually turn off all interrupts */
2592	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2593
2594#ifdef DEVICE_POLLING
2595	/* We do Legacy setup */
2596	if (adapter->int_handler_tag == NULL &&
2597	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2598	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2599	    &adapter->int_handler_tag)) != 0) {
2600		device_printf(dev, "Failed to register interrupt handler");
2601		return (error);
2602	}
2603
2604#else
2605	/*
2606	 * Try allocating a fast interrupt and the associated deferred
2607	 * processing contexts.
2608	 */
2609	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2610	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2611	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2612	    taskqueue_thread_enqueue, &adapter->tq);
2613	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2614	    device_get_nameunit(adapter->dev));
2615	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2616	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2617	    &adapter->int_handler_tag)) != 0) {
2618		device_printf(dev, "Failed to register fast interrupt "
2619			    "handler: %d\n", error);
2620		taskqueue_free(adapter->tq);
2621		adapter->tq = NULL;
2622		return (error);
2623	}
2624#endif
2625
2626	em_enable_intr(adapter);
2627	return (0);
2628}
2629
2630static void
2631em_free_intr(struct adapter *adapter)
2632{
2633	device_t dev = adapter->dev;
2634
2635	if (adapter->res_interrupt != NULL) {
2636		bus_teardown_intr(dev, adapter->res_interrupt,
2637			adapter->int_handler_tag);
2638		adapter->int_handler_tag = NULL;
2639	}
2640	if (adapter->tq != NULL) {
2641		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2642		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2643		taskqueue_free(adapter->tq);
2644		adapter->tq = NULL;
2645	}
2646}
2647
2648static void
2649em_free_pci_resources(struct adapter *adapter)
2650{
2651	device_t dev = adapter->dev;
2652
2653	if (adapter->res_interrupt != NULL)
2654		bus_release_resource(dev, SYS_RES_IRQ,
2655		    adapter->msi ? 1 : 0, adapter->res_interrupt);
2656
2657	if (adapter->msix_mem != NULL)
2658		bus_release_resource(dev, SYS_RES_MEMORY,
2659		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2660
2661	if (adapter->msi)
2662		pci_release_msi(dev);
2663
2664	if (adapter->res_memory != NULL)
2665		bus_release_resource(dev, SYS_RES_MEMORY,
2666		    PCIR_BAR(0), adapter->res_memory);
2667
2668	if (adapter->flash_mem != NULL)
2669		bus_release_resource(dev, SYS_RES_MEMORY,
2670		    EM_FLASH, adapter->flash_mem);
2671
2672	if (adapter->res_ioport != NULL)
2673		bus_release_resource(dev, SYS_RES_IOPORT,
2674		    adapter->io_rid, adapter->res_ioport);
2675}
2676
2677/*********************************************************************
2678 *
2679 *  Initialize the hardware to a configuration
2680 *  as specified by the adapter structure.
2681 *
2682 **********************************************************************/
2683static int
2684em_hardware_init(struct adapter *adapter)
2685{
2686	device_t dev = adapter->dev;
2687	uint16_t rx_buffer_size;
2688
2689	INIT_DEBUGOUT("em_hardware_init: begin");
2690
2691	/* Issue a global reset */
2692	e1000_reset_hw(&adapter->hw);
2693
2694	/* Get control from any management/hw control */
2695	if (((adapter->hw.mac.type == e1000_82573) ||
2696	    (adapter->hw.mac.type == e1000_ich8lan) ||
2697	    (adapter->hw.mac.type == e1000_ich9lan)) &&
2698	    e1000_check_mng_mode(&adapter->hw))
2699		em_get_hw_control(adapter);
2700
2701	/* When hardware is reset, fifo_head is also reset */
2702	adapter->tx_fifo_head = 0;
2703
2704	/* Set up smart power down as default off on newer adapters. */
2705	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2706	    adapter->hw.mac.type == e1000_82572)) {
2707		uint16_t phy_tmp = 0;
2708
2709		/* Speed up time to link by disabling smart power down. */
2710		e1000_read_phy_reg(&adapter->hw,
2711		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2712		phy_tmp &= ~IGP02E1000_PM_SPD;
2713		e1000_write_phy_reg(&adapter->hw,
2714		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2715	}
2716
2717	/*
2718	 * These parameters control the automatic generation (Tx) and
2719	 * response (Rx) to Ethernet PAUSE frames.
2720	 * - High water mark should allow for at least two frames to be
2721	 *   received after sending an XOFF.
2722	 * - Low water mark works best when it is very near the high water mark.
2723	 *   This allows the receiver to restart by sending XON when it has
2724	 *   drained a bit. Here we use an arbitary value of 1500 which will
2725	 *   restart after one full frame is pulled from the buffer. There
2726	 *   could be several smaller frames in the buffer and if so they will
2727	 *   not trigger the XON until their total number reduces the buffer
2728	 *   by 1500.
2729	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2730	 */
2731	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2732	    0xffff) << 10 );
2733
2734	adapter->hw.mac.fc_high_water = rx_buffer_size -
2735	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2736	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2737	if (adapter->hw.mac.type == e1000_80003es2lan)
2738		adapter->hw.mac.fc_pause_time = 0xFFFF;
2739	else
2740		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2741	adapter->hw.mac.fc_send_xon = TRUE;
2742	adapter->hw.mac.fc = e1000_fc_full;
2743
2744	if (e1000_init_hw(&adapter->hw) < 0) {
2745		device_printf(dev, "Hardware Initialization Failed\n");
2746		return (EIO);
2747	}
2748
2749	e1000_check_for_link(&adapter->hw);
2750
2751	return (0);
2752}
2753
2754/*********************************************************************
2755 *
2756 *  Setup networking device structure and register an interface.
2757 *
2758 **********************************************************************/
2759static void
2760em_setup_interface(device_t dev, struct adapter *adapter)
2761{
2762	struct ifnet   *ifp;
2763
2764	INIT_DEBUGOUT("em_setup_interface: begin");
2765
2766	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2767	if (ifp == NULL)
2768		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2769	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2770	ifp->if_mtu = ETHERMTU;
2771	ifp->if_init =  em_init;
2772	ifp->if_softc = adapter;
2773	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2774	ifp->if_ioctl = em_ioctl;
2775	ifp->if_start = em_start;
2776	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2777	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2778	IFQ_SET_READY(&ifp->if_snd);
2779
2780	ether_ifattach(ifp, adapter->hw.mac.addr);
2781
2782	ifp->if_capabilities = ifp->if_capenable = 0;
2783
2784	if (adapter->hw.mac.type >= e1000_82543) {
2785		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2786		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2787	}
2788
2789	/* Identify TSO capable adapters */
2790	if ((adapter->hw.mac.type > e1000_82544) &&
2791	    (adapter->hw.mac.type != e1000_82547))
2792		ifp->if_capabilities |= IFCAP_TSO4;
2793	/*
2794	 * By default only enable on PCI-E, this
2795	 * can be overriden by ifconfig.
2796	 */
2797	if (adapter->hw.mac.type >= e1000_82571)
2798		ifp->if_capenable |= IFCAP_TSO4;
2799
2800	/*
2801	 * Tell the upper layer(s) we support long frames.
2802	 */
2803	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2804	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2805	ifp->if_capenable |= IFCAP_VLAN_MTU;
2806
2807#ifdef DEVICE_POLLING
2808	ifp->if_capabilities |= IFCAP_POLLING;
2809#endif
2810
2811	/*
2812	 * Specify the media types supported by this adapter and register
2813	 * callbacks to update media and link information
2814	 */
2815	ifmedia_init(&adapter->media, IFM_IMASK,
2816	    em_media_change, em_media_status);
2817	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2818	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2819		u_char fiber_type = IFM_1000_SX;	/* default type */
2820
2821		if (adapter->hw.mac.type == e1000_82545)
2822			fiber_type = IFM_1000_LX;
2823		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2824			    0, NULL);
2825		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2826	} else {
2827		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2828		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2829			    0, NULL);
2830		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2831			    0, NULL);
2832		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2833			    0, NULL);
2834		if (adapter->hw.phy.type != e1000_phy_ife) {
2835			ifmedia_add(&adapter->media,
2836				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2837			ifmedia_add(&adapter->media,
2838				IFM_ETHER | IFM_1000_T, 0, NULL);
2839		}
2840	}
2841	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2842	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2843}
2844
2845
2846/*********************************************************************
2847 *
2848 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2849 *
2850 **********************************************************************/
2851static void
2852em_smartspeed(struct adapter *adapter)
2853{
2854	uint16_t phy_tmp;
2855
2856	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2857	    adapter->hw.mac.autoneg == 0 ||
2858	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2859		return;
2860
2861	if (adapter->smartspeed == 0) {
2862		/* If Master/Slave config fault is asserted twice,
2863		 * we assume back-to-back */
2864		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2865		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2866			return;
2867		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2868		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2869			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2870			if(phy_tmp & CR_1000T_MS_ENABLE) {
2871				phy_tmp &= ~CR_1000T_MS_ENABLE;
2872				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2873				    phy_tmp);
2874				adapter->smartspeed++;
2875				if(adapter->hw.mac.autoneg &&
2876				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2877				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2878				    &phy_tmp)) {
2879					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2880						    MII_CR_RESTART_AUTO_NEG);
2881					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2882					    phy_tmp);
2883				}
2884			}
2885		}
2886		return;
2887	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2888		/* If still no link, perhaps using 2/3 pair cable */
2889		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2890		phy_tmp |= CR_1000T_MS_ENABLE;
2891		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2892		if(adapter->hw.mac.autoneg &&
2893		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2894		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2895			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2896				    MII_CR_RESTART_AUTO_NEG);
2897			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2898		}
2899	}
2900	/* Restart process after EM_SMARTSPEED_MAX iterations */
2901	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2902		adapter->smartspeed = 0;
2903}
2904
2905
2906/*
2907 * Manage DMA'able memory.
2908 */
2909static void
2910em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2911{
2912	if (error)
2913		return;
2914	*(bus_addr_t *) arg = segs[0].ds_addr;
2915}
2916
2917static int
2918em_dma_malloc(struct adapter *adapter, bus_size_t size,
2919        struct em_dma_alloc *dma, int mapflags)
2920{
2921	int error;
2922
2923	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2924				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2925				BUS_SPACE_MAXADDR,	/* lowaddr */
2926				BUS_SPACE_MAXADDR,	/* highaddr */
2927				NULL, NULL,		/* filter, filterarg */
2928				size,			/* maxsize */
2929				1,			/* nsegments */
2930				size,			/* maxsegsize */
2931				0,			/* flags */
2932				NULL,			/* lockfunc */
2933				NULL,			/* lockarg */
2934				&dma->dma_tag);
2935	if (error) {
2936		device_printf(adapter->dev,
2937		    "%s: bus_dma_tag_create failed: %d\n",
2938		    __func__, error);
2939		goto fail_0;
2940	}
2941
2942#ifdef __arm__
2943	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2944	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2945#else
2946	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2947	    BUS_DMA_NOWAIT, &dma->dma_map);
2948#endif
2949	if (error) {
2950		device_printf(adapter->dev,
2951		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2952		    __func__, (uintmax_t)size, error);
2953		goto fail_2;
2954	}
2955
2956	dma->dma_paddr = 0;
2957	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2958	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2959	if (error || dma->dma_paddr == 0) {
2960		device_printf(adapter->dev,
2961		    "%s: bus_dmamap_load failed: %d\n",
2962		    __func__, error);
2963		goto fail_3;
2964	}
2965
2966	return (0);
2967
2968fail_3:
2969	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2970fail_2:
2971	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2972	bus_dma_tag_destroy(dma->dma_tag);
2973fail_0:
2974	dma->dma_map = NULL;
2975	dma->dma_tag = NULL;
2976
2977	return (error);
2978}
2979
2980static void
2981em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2982{
2983	if (dma->dma_tag == NULL)
2984		return;
2985	if (dma->dma_map != NULL) {
2986		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2987		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2988		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2989		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2990		dma->dma_map = NULL;
2991	}
2992	bus_dma_tag_destroy(dma->dma_tag);
2993	dma->dma_tag = NULL;
2994}
2995
2996
2997/*********************************************************************
2998 *
2999 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3000 *  the information needed to transmit a packet on the wire.
3001 *
3002 **********************************************************************/
3003static int
3004em_allocate_transmit_structures(struct adapter *adapter)
3005{
3006	device_t dev = adapter->dev;
3007
3008	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
3009	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3010	if (adapter->tx_buffer_area == NULL) {
3011		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3012		return (ENOMEM);
3013	}
3014
3015	bzero(adapter->tx_buffer_area,
3016	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
3017
3018	return (0);
3019}
3020
3021/*********************************************************************
3022 *
3023 *  Initialize transmit structures.
3024 *
3025 **********************************************************************/
3026static int
3027em_setup_transmit_structures(struct adapter *adapter)
3028{
3029	device_t dev = adapter->dev;
3030	struct em_buffer *tx_buffer;
3031	int error, i;
3032
3033	/*
3034	 * Create DMA tags for tx descriptors
3035	 */
3036	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3037				1, 0,			/* alignment, bounds */
3038				BUS_SPACE_MAXADDR,	/* lowaddr */
3039				BUS_SPACE_MAXADDR,	/* highaddr */
3040				NULL, NULL,		/* filter, filterarg */
3041				EM_TSO_SIZE,		/* maxsize */
3042				EM_MAX_SCATTER,		/* nsegments */
3043				EM_TSO_SEG_SIZE,	/* maxsegsize */
3044				0,			/* flags */
3045				NULL,		/* lockfunc */
3046				NULL,		/* lockarg */
3047				&adapter->txtag)) != 0) {
3048		device_printf(dev, "Unable to allocate TX DMA tag\n");
3049		goto fail;
3050	}
3051
3052	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3053		goto fail;
3054
3055	/* Clear the old ring contents */
3056	bzero(adapter->tx_desc_base,
3057	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3058
3059	/* Create the descriptor buffer dma maps */
3060	tx_buffer = adapter->tx_buffer_area;
3061	for (i = 0; i < adapter->num_tx_desc; i++) {
3062		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3063		if (error != 0) {
3064			device_printf(dev, "Unable to create TX DMA map\n");
3065			goto fail;
3066		}
3067		tx_buffer->next_eop = -1;
3068		tx_buffer++;
3069	}
3070
3071	adapter->next_avail_tx_desc = 0;
3072	adapter->next_tx_to_clean = 0;
3073
3074	/* Set number of descriptors available */
3075	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3076
3077	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3078	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3079
3080	return (0);
3081
3082fail:
3083	em_free_transmit_structures(adapter);
3084	return (error);
3085}
3086
3087/*********************************************************************
3088 *
3089 *  Enable transmit unit.
3090 *
3091 **********************************************************************/
3092static void
3093em_initialize_transmit_unit(struct adapter *adapter)
3094{
3095	uint32_t	tctl, tarc, tipg = 0;
3096	uint64_t	bus_addr;
3097
3098	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3099	/* Setup the Base and Length of the Tx Descriptor Ring */
3100	bus_addr = adapter->txdma.dma_paddr;
3101	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3102	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3103	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3104	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3105
3106	/* Setup the HW Tx Head and Tail descriptor pointers */
3107	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3108	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3109
3110	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3111	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3112	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3113
3114	/* Set the default values for the Tx Inter Packet Gap timer */
3115	switch (adapter->hw.mac.type) {
3116	case e1000_82542:
3117		tipg = DEFAULT_82542_TIPG_IPGT;
3118		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3119		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3120		break;
3121	case e1000_80003es2lan:
3122		tipg = DEFAULT_82543_TIPG_IPGR1;
3123		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3124		    E1000_TIPG_IPGR2_SHIFT;
3125		break;
3126	default:
3127		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3128		    (adapter->hw.media_type ==
3129		    e1000_media_type_internal_serdes))
3130			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3131		else
3132			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3133		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3134		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3135	}
3136
3137	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3138	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3139	if(adapter->hw.mac.type >= e1000_82540)
3140		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3141		    adapter->tx_abs_int_delay.value);
3142
3143	if ((adapter->hw.mac.type == e1000_82571) ||
3144	    (adapter->hw.mac.type == e1000_82572)) {
3145		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3146		tarc |= SPEED_MODE_BIT;
3147		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3148	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3149		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3150		tarc |= 1;
3151		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3152		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3153		tarc |= 1;
3154		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3155	}
3156
3157	/* Program the Transmit Control Register */
3158	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3159	tctl &= ~E1000_TCTL_CT;
3160	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3161		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3162
3163	if (adapter->hw.mac.type >= e1000_82571)
3164		tctl |= E1000_TCTL_MULR;
3165
3166	/* This write will effectively turn on the transmit unit. */
3167	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3168
3169	/* Setup Transmit Descriptor Base Settings */
3170	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3171
3172	if ((adapter->tx_int_delay.value > 0) &&
3173	    (adapter->hw.mac.type != e1000_82575))
3174		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3175
3176        /* Set the function pointer for the transmit routine */
3177        if (adapter->hw.mac.type >= e1000_82575)
3178                adapter->em_xmit = em_adv_encap;
3179        else
3180                adapter->em_xmit = em_encap;
3181}
3182
3183/*********************************************************************
3184 *
3185 *  Free all transmit related data structures.
3186 *
3187 **********************************************************************/
3188static void
3189em_free_transmit_structures(struct adapter *adapter)
3190{
3191	struct em_buffer *tx_buffer;
3192	int i;
3193
3194	INIT_DEBUGOUT("free_transmit_structures: begin");
3195
3196	if (adapter->tx_buffer_area != NULL) {
3197		tx_buffer = adapter->tx_buffer_area;
3198		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3199			if (tx_buffer->m_head != NULL) {
3200				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3201				    BUS_DMASYNC_POSTWRITE);
3202				bus_dmamap_unload(adapter->txtag,
3203				    tx_buffer->map);
3204				m_freem(tx_buffer->m_head);
3205				tx_buffer->m_head = NULL;
3206			} else if (tx_buffer->map != NULL)
3207				bus_dmamap_unload(adapter->txtag,
3208				    tx_buffer->map);
3209			if (tx_buffer->map != NULL) {
3210				bus_dmamap_destroy(adapter->txtag,
3211				    tx_buffer->map);
3212				tx_buffer->map = NULL;
3213			}
3214		}
3215	}
3216	if (adapter->tx_buffer_area != NULL) {
3217		free(adapter->tx_buffer_area, M_DEVBUF);
3218		adapter->tx_buffer_area = NULL;
3219	}
3220	if (adapter->txtag != NULL) {
3221		bus_dma_tag_destroy(adapter->txtag);
3222		adapter->txtag = NULL;
3223	}
3224}
3225
3226/*********************************************************************
3227 *
3228 *  The offload context needs to be set when we transfer the first
3229 *  packet of a particular protocol (TCP/UDP). This routine has been
3230 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3231 *
3232 **********************************************************************/
3233static void
3234em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3235    uint32_t *txd_upper, uint32_t *txd_lower)
3236{
3237	struct e1000_context_desc *TXD;
3238	struct em_buffer *tx_buffer;
3239	struct ether_vlan_header *eh;
3240	struct ip *ip;
3241	struct ip6_hdr *ip6;
3242	struct tcp_hdr *th;
3243	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3244	uint32_t cmd = 0;
3245	uint16_t etype;
3246	uint8_t ipproto;
3247
3248	/* Setup checksum offload context. */
3249	curr_txd = adapter->next_avail_tx_desc;
3250	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3251	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3252
3253	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3254		     E1000_TXD_DTYP_D;		/* Data descr */
3255
3256	/*
3257	 * Determine where frame payload starts.
3258	 * Jump over vlan headers if already present,
3259	 * helpful for QinQ too.
3260	 */
3261	eh = mtod(mp, struct ether_vlan_header *);
3262	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3263		etype = ntohs(eh->evl_proto);
3264		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3265	} else {
3266		etype = ntohs(eh->evl_encap_proto);
3267		ehdrlen = ETHER_HDR_LEN;
3268	}
3269
3270	/*
3271	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3272	 * TODO: Support SCTP too when it hits the tree.
3273	 */
3274	switch (etype) {
3275	case ETHERTYPE_IP:
3276		ip = (struct ip *)(mp->m_data + ehdrlen);
3277		ip_hlen = ip->ip_hl << 2;
3278
3279		/* Setup of IP header checksum. */
3280		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3281			/*
3282			 * Start offset for header checksum calculation.
3283			 * End offset for header checksum calculation.
3284			 * Offset of place to put the checksum.
3285			 */
3286			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3287			TXD->lower_setup.ip_fields.ipcse =
3288			    htole16(ehdrlen + ip_hlen);
3289			TXD->lower_setup.ip_fields.ipcso =
3290			    ehdrlen + offsetof(struct ip, ip_sum);
3291			cmd |= E1000_TXD_CMD_IP;
3292			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3293		}
3294
3295		if (mp->m_len < ehdrlen + ip_hlen)
3296			return;	/* failure */
3297
3298		hdr_len = ehdrlen + ip_hlen;
3299		ipproto = ip->ip_p;
3300
3301		break;
3302	case ETHERTYPE_IPV6:
3303		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3304		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3305
3306		if (mp->m_len < ehdrlen + ip_hlen)
3307			return;	/* failure */
3308
3309		/* IPv6 doesn't have a header checksum. */
3310
3311		hdr_len = ehdrlen + ip_hlen;
3312		ipproto = ip6->ip6_nxt;
3313
3314		break;
3315	default:
3316		*txd_upper = 0;
3317		*txd_lower = 0;
3318		return;
3319	}
3320
3321	switch (ipproto) {
3322	case IPPROTO_TCP:
3323		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3324			/*
3325			 * Start offset for payload checksum calculation.
3326			 * End offset for payload checksum calculation.
3327			 * Offset of place to put the checksum.
3328			 */
3329			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3330			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3331			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3332			TXD->upper_setup.tcp_fields.tucso =
3333			    hdr_len + offsetof(struct tcphdr, th_sum);
3334			cmd |= E1000_TXD_CMD_TCP;
3335			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3336		}
3337		break;
3338	case IPPROTO_UDP:
3339		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3340			/*
3341			 * Start offset for header checksum calculation.
3342			 * End offset for header checksum calculation.
3343			 * Offset of place to put the checksum.
3344			 */
3345			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3346			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3347			TXD->upper_setup.tcp_fields.tucso =
3348			    hdr_len + offsetof(struct udphdr, uh_sum);
3349			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3350		}
3351		break;
3352	default:
3353		break;
3354	}
3355
3356	TXD->tcp_seg_setup.data = htole32(0);
3357	TXD->cmd_and_length =
3358	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3359	tx_buffer->m_head = NULL;
3360	tx_buffer->next_eop = -1;
3361
3362	if (++curr_txd == adapter->num_tx_desc)
3363		curr_txd = 0;
3364
3365	adapter->num_tx_desc_avail--;
3366	adapter->next_avail_tx_desc = curr_txd;
3367}
3368
3369/**********************************************************************
3370 *
3371 *  Setup work for hardware segmentation offload (TSO)
3372 *
3373 **********************************************************************/
3374static boolean_t
3375em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3376   uint32_t *txd_lower)
3377{
3378	struct e1000_context_desc *TXD;
3379	struct em_buffer *tx_buffer;
3380	struct ether_vlan_header *eh;
3381	struct ip *ip;
3382	struct ip6_hdr *ip6;
3383	struct tcphdr *th;
3384	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3385	uint16_t etype;
3386
3387	/*
3388	 * XXX: This is not really correct as the stack would not have
3389	 * set up all checksums.
3390	 * XXX: Return FALSE is not sufficient as we may have to return
3391	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3392	 * and 1 (success).
3393	 */
3394	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3395	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3396		return FALSE;
3397
3398	/*
3399	 * This function could/should be extended to support IP/IPv6
3400	 * fragmentation as well.  But as they say, one step at a time.
3401	 */
3402
3403	/*
3404	 * Determine where frame payload starts.
3405	 * Jump over vlan headers if already present,
3406	 * helpful for QinQ too.
3407	 */
3408	eh = mtod(mp, struct ether_vlan_header *);
3409	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3410		etype = ntohs(eh->evl_proto);
3411		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3412	} else {
3413		etype = ntohs(eh->evl_encap_proto);
3414		ehdrlen = ETHER_HDR_LEN;
3415	}
3416
3417	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3418	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3419		return FALSE;	/* -1 */
3420
3421	/*
3422	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3423	 * TODO: Support SCTP too when it hits the tree.
3424	 */
3425	switch (etype) {
3426	case ETHERTYPE_IP:
3427		isip6 = 0;
3428		ip = (struct ip *)(mp->m_data + ehdrlen);
3429		if (ip->ip_p != IPPROTO_TCP)
3430			return FALSE;	/* 0 */
3431		ip->ip_len = 0;
3432		ip->ip_sum = 0;
3433		ip_hlen = ip->ip_hl << 2;
3434		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3435			return FALSE;	/* -1 */
3436		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3437#if 1
3438		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3439		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3440#else
3441		th->th_sum = mp->m_pkthdr.csum_data;
3442#endif
3443		break;
3444	case ETHERTYPE_IPV6:
3445		isip6 = 1;
3446		return FALSE;			/* Not supported yet. */
3447		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3448		if (ip6->ip6_nxt != IPPROTO_TCP)
3449			return FALSE;	/* 0 */
3450		ip6->ip6_plen = 0;
3451		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3452		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3453			return FALSE;	/* -1 */
3454		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3455#if 0
3456		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3457		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3458#else
3459		th->th_sum = mp->m_pkthdr.csum_data;
3460#endif
3461		break;
3462	default:
3463		return FALSE;
3464	}
3465	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3466
3467	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3468		      E1000_TXD_DTYP_D |	/* Data descr type */
3469		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3470
3471	/* IP and/or TCP header checksum calculation and insertion. */
3472	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3473		      E1000_TXD_POPTS_TXSM) << 8;
3474
3475	curr_txd = adapter->next_avail_tx_desc;
3476	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3477	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3478
3479	/* IPv6 doesn't have a header checksum. */
3480	if (!isip6) {
3481		/*
3482		 * Start offset for header checksum calculation.
3483		 * End offset for header checksum calculation.
3484		 * Offset of place put the checksum.
3485		 */
3486		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3487		TXD->lower_setup.ip_fields.ipcse =
3488		    htole16(ehdrlen + ip_hlen - 1);
3489		TXD->lower_setup.ip_fields.ipcso =
3490		    ehdrlen + offsetof(struct ip, ip_sum);
3491	}
3492	/*
3493	 * Start offset for payload checksum calculation.
3494	 * End offset for payload checksum calculation.
3495	 * Offset of place to put the checksum.
3496	 */
3497	TXD->upper_setup.tcp_fields.tucss =
3498	    ehdrlen + ip_hlen;
3499	TXD->upper_setup.tcp_fields.tucse = 0;
3500	TXD->upper_setup.tcp_fields.tucso =
3501	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3502	/*
3503	 * Payload size per packet w/o any headers.
3504	 * Length of all headers up to payload.
3505	 */
3506	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3507	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3508
3509	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3510				E1000_TXD_CMD_DEXT |	/* Extended descr */
3511				E1000_TXD_CMD_TSE |	/* TSE context */
3512				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3513				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3514				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3515
3516	tx_buffer->m_head = NULL;
3517	tx_buffer->next_eop = -1;
3518
3519	if (++curr_txd == adapter->num_tx_desc)
3520		curr_txd = 0;
3521
3522	adapter->num_tx_desc_avail--;
3523	adapter->next_avail_tx_desc = curr_txd;
3524	adapter->tx_tso = TRUE;
3525
3526	return TRUE;
3527}
3528
3529
3530/**********************************************************************
3531 *
3532 *  Setup work for hardware segmentation offload (TSO) on
3533 *  adapters using advanced tx descriptors
3534 *
3535 **********************************************************************/
3536static boolean_t
3537em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3538{
3539	struct e1000_adv_tx_context_desc *TXD;
3540	struct em_buffer        *tx_buffer;
3541	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3542	u32 mss_l4len_idx = 0;
3543	u16 vtag = 0;
3544	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3545	struct ether_vlan_header *eh;
3546	struct ip *ip;
3547	struct tcphdr *th;
3548
3549	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3550	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3551		return FALSE;
3552
3553	/*
3554	 * Determine where frame payload starts.
3555	 * Jump over vlan headers if already present
3556	 */
3557	eh = mtod(mp, struct ether_vlan_header *);
3558	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3559		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3560	else
3561		ehdrlen = ETHER_HDR_LEN;
3562
3563	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3564	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3565		return FALSE;
3566
3567	/* Only supports IPV4 for now */
3568	ctxd = adapter->next_avail_tx_desc;
3569	tx_buffer = &adapter->tx_buffer_area[ctxd];
3570	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3571
3572	ip = (struct ip *)(mp->m_data + ehdrlen);
3573	if (ip->ip_p != IPPROTO_TCP)
3574                return FALSE;   /* 0 */
3575	ip->ip_len = 0;
3576	ip->ip_sum = 0;
3577	ip_hlen = ip->ip_hl << 2;
3578	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3579	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3580	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3581	tcp_hlen = th->th_off << 2;
3582	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3583	/* Calculate payload, this is used in the transmit desc in encap */
3584	*paylen = mp->m_pkthdr.len - hdrlen;
3585
3586	/* VLAN MACLEN IPLEN */
3587	if (mp->m_flags & M_VLANTAG) {
3588		vtag = htole16(mp->m_pkthdr.ether_vtag);
3589		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3590	}
3591	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3592	vlan_macip_lens |= ip_hlen;
3593	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3594
3595	/* ADV DTYPE TUCMD */
3596	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3597	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3598	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3599	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3600
3601	/* MSS L4LEN IDX */
3602	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3603	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3604	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3605
3606	TXD->seqnum_seed = htole32(0);
3607	tx_buffer->m_head = NULL;
3608	tx_buffer->next_eop = -1;
3609
3610	if (++ctxd == adapter->num_tx_desc)
3611		ctxd = 0;
3612
3613	adapter->num_tx_desc_avail--;
3614	adapter->next_avail_tx_desc = ctxd;
3615	return TRUE;
3616}
3617
3618
3619/*********************************************************************
3620 *
3621 *  Advanced Context Descriptor setup for VLAN or CSUM
3622 *
3623 **********************************************************************/
3624
3625static boolean_t
3626em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3627{
3628	struct e1000_adv_tx_context_desc *TXD;
3629	struct em_buffer        *tx_buffer;
3630	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3631	struct ether_vlan_header *eh;
3632	struct ip *ip;
3633	struct ip6_hdr *ip6;
3634	int  ehdrlen, ip_hlen;
3635	u16	etype;
3636	u8	ipproto;
3637
3638	int ctxd = adapter->next_avail_tx_desc;
3639	u16 vtag = 0;
3640
3641	tx_buffer = &adapter->tx_buffer_area[ctxd];
3642	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3643
3644	/*
3645	** In advanced descriptors the vlan tag must
3646	** be placed into the descriptor itself.
3647	*/
3648	if (mp->m_flags & M_VLANTAG) {
3649		vtag = htole16(mp->m_pkthdr.ether_vtag);
3650		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3651	}
3652
3653	/*
3654	 * Determine where frame payload starts.
3655	 * Jump over vlan headers if already present,
3656	 * helpful for QinQ too.
3657	 */
3658	eh = mtod(mp, struct ether_vlan_header *);
3659	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3660		etype = ntohs(eh->evl_proto);
3661		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3662	} else {
3663		etype = ntohs(eh->evl_encap_proto);
3664		ehdrlen = ETHER_HDR_LEN;
3665	}
3666
3667	/* Set the ether header length */
3668	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3669
3670	switch (etype) {
3671		case ETHERTYPE_IP:
3672			ip = (struct ip *)(mp->m_data + ehdrlen);
3673			ip_hlen = ip->ip_hl << 2;
3674			if (mp->m_len < ehdrlen + ip_hlen)
3675				return FALSE; /* failure */
3676			ipproto = ip->ip_p;
3677			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3678			break;
3679		case ETHERTYPE_IPV6:
3680			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3681			ip_hlen = sizeof(struct ip6_hdr);
3682			if (mp->m_len < ehdrlen + ip_hlen)
3683				return FALSE; /* failure */
3684			ipproto = ip6->ip6_nxt;
3685			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3686			break;
3687		default:
3688			return FALSE;
3689	}
3690
3691	vlan_macip_lens |= ip_hlen;
3692	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3693
3694	switch (ipproto) {
3695		case IPPROTO_TCP:
3696			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3697				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3698			break;
3699		case IPPROTO_UDP:
3700			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3701				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3702			break;
3703	}
3704
3705	/* Now copy bits into descriptor */
3706	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3707	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3708	TXD->seqnum_seed = htole32(0);
3709	TXD->mss_l4len_idx = htole32(0);
3710
3711	tx_buffer->m_head = NULL;
3712	tx_buffer->next_eop = -1;
3713
3714	/* We've consumed the first desc, adjust counters */
3715	if (++ctxd == adapter->num_tx_desc)
3716		ctxd = 0;
3717	adapter->next_avail_tx_desc = ctxd;
3718	--adapter->num_tx_desc_avail;
3719
3720        return TRUE;
3721}
3722
3723
3724/**********************************************************************
3725 *
3726 *  Examine each tx_buffer in the used queue. If the hardware is done
3727 *  processing the packet then free associated resources. The
3728 *  tx_buffer is put back on the free queue.
3729 *
3730 **********************************************************************/
3731static void
3732em_txeof(struct adapter *adapter)
3733{
3734        int first, last, done, num_avail;
3735        struct em_buffer *tx_buffer;
3736        struct e1000_tx_desc   *tx_desc, *eop_desc;
3737	struct ifnet   *ifp = adapter->ifp;
3738
3739	EM_LOCK_ASSERT(adapter);
3740
3741        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3742                return;
3743
3744        num_avail = adapter->num_tx_desc_avail;
3745        first = adapter->next_tx_to_clean;
3746        tx_desc = &adapter->tx_desc_base[first];
3747        tx_buffer = &adapter->tx_buffer_area[first];
3748	last = tx_buffer->next_eop;
3749        eop_desc = &adapter->tx_desc_base[last];
3750
3751	/*
3752	 * What this does is get the index of the
3753	 * first descriptor AFTER the EOP of the
3754	 * first packet, that way we can do the
3755	 * simple comparison on the inner while loop.
3756	 */
3757	if (++last == adapter->num_tx_desc)
3758 		last = 0;
3759	done = last;
3760
3761        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3762            BUS_DMASYNC_POSTREAD);
3763
3764        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3765		/* We clean the range of the packet */
3766		while (first != done) {
3767                	tx_desc->upper.data = 0;
3768                	tx_desc->lower.data = 0;
3769                	tx_desc->buffer_addr = 0;
3770                	num_avail++;
3771
3772			if (tx_buffer->m_head) {
3773				ifp->if_opackets++;
3774				bus_dmamap_sync(adapter->txtag,
3775				    tx_buffer->map,
3776				    BUS_DMASYNC_POSTWRITE);
3777				bus_dmamap_unload(adapter->txtag,
3778				    tx_buffer->map);
3779
3780                        	m_freem(tx_buffer->m_head);
3781                        	tx_buffer->m_head = NULL;
3782                	}
3783			tx_buffer->next_eop = -1;
3784
3785	                if (++first == adapter->num_tx_desc)
3786				first = 0;
3787
3788	                tx_buffer = &adapter->tx_buffer_area[first];
3789			tx_desc = &adapter->tx_desc_base[first];
3790		}
3791		/* See if we can continue to the next packet */
3792		last = tx_buffer->next_eop;
3793		if (last != -1) {
3794        		eop_desc = &adapter->tx_desc_base[last];
3795			/* Get new done point */
3796			if (++last == adapter->num_tx_desc) last = 0;
3797			done = last;
3798		} else
3799			break;
3800        }
3801        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3802            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3803
3804        adapter->next_tx_to_clean = first;
3805
3806        /*
3807         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3808         * that it is OK to send packets.
3809         * If there are no pending descriptors, clear the timeout. Otherwise,
3810         * if some descriptors have been freed, restart the timeout.
3811         */
3812        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3813                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3814		/* All clean, turn off the timer */
3815                if (num_avail == adapter->num_tx_desc)
3816			adapter->watchdog_timer = 0;
3817		/* Some cleaned, reset the timer */
3818                else if (num_avail != adapter->num_tx_desc_avail)
3819			adapter->watchdog_timer = EM_TX_TIMEOUT;
3820        }
3821        adapter->num_tx_desc_avail = num_avail;
3822        return;
3823}
3824
3825/*********************************************************************
3826 *
3827 *  Get a buffer from system mbuf buffer pool.
3828 *
3829 **********************************************************************/
3830static int
3831em_get_buf(struct adapter *adapter, int i)
3832{
3833	struct mbuf		*m;
3834	bus_dma_segment_t	segs[1];
3835	bus_dmamap_t		map;
3836	struct em_buffer	*rx_buffer;
3837	int			error, nsegs;
3838
3839	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3840	if (m == NULL) {
3841		adapter->mbuf_cluster_failed++;
3842		return (ENOBUFS);
3843	}
3844	m->m_len = m->m_pkthdr.len = MCLBYTES;
3845
3846	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3847		m_adj(m, ETHER_ALIGN);
3848
3849	/*
3850	 * Using memory from the mbuf cluster pool, invoke the
3851	 * bus_dma machinery to arrange the memory mapping.
3852	 */
3853	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3854	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3855	if (error != 0) {
3856		m_free(m);
3857		return (error);
3858	}
3859
3860	/* If nsegs is wrong then the stack is corrupt. */
3861	KASSERT(nsegs == 1, ("Too many segments returned!"));
3862
3863	rx_buffer = &adapter->rx_buffer_area[i];
3864	if (rx_buffer->m_head != NULL)
3865		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3866
3867	map = rx_buffer->map;
3868	rx_buffer->map = adapter->rx_sparemap;
3869	adapter->rx_sparemap = map;
3870	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3871	rx_buffer->m_head = m;
3872
3873	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3874	return (0);
3875}
3876
3877/*********************************************************************
3878 *
3879 *  Allocate memory for rx_buffer structures. Since we use one
3880 *  rx_buffer per received packet, the maximum number of rx_buffer's
3881 *  that we'll need is equal to the number of receive descriptors
3882 *  that we've allocated.
3883 *
3884 **********************************************************************/
3885static int
3886em_allocate_receive_structures(struct adapter *adapter)
3887{
3888	device_t dev = adapter->dev;
3889	struct em_buffer *rx_buffer;
3890	int i, error;
3891
3892	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3893	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3894	if (adapter->rx_buffer_area == NULL) {
3895		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3896		return (ENOMEM);
3897	}
3898
3899	bzero(adapter->rx_buffer_area,
3900	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3901
3902	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3903				1, 0,			/* alignment, bounds */
3904				BUS_SPACE_MAXADDR,	/* lowaddr */
3905				BUS_SPACE_MAXADDR,	/* highaddr */
3906				NULL, NULL,		/* filter, filterarg */
3907				MCLBYTES,		/* maxsize */
3908				1,			/* nsegments */
3909				MCLBYTES,		/* maxsegsize */
3910				0,			/* flags */
3911				NULL,			/* lockfunc */
3912				NULL,			/* lockarg */
3913				&adapter->rxtag);
3914	if (error) {
3915		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3916		    __func__, error);
3917		goto fail;
3918	}
3919
3920	/* Create the spare map (used by getbuf) */
3921	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3922	     &adapter->rx_sparemap);
3923	if (error) {
3924		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3925		    __func__, error);
3926		goto fail;
3927	}
3928
3929	rx_buffer = adapter->rx_buffer_area;
3930	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3931		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3932		    &rx_buffer->map);
3933		if (error) {
3934			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3935			    __func__, error);
3936			goto fail;
3937		}
3938	}
3939
3940	/* Setup the initial buffers */
3941	for (i = 0; i < adapter->num_rx_desc; i++) {
3942		error = em_get_buf(adapter, i);
3943		if (error)
3944			goto fail;
3945	}
3946	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3947	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3948
3949	return (0);
3950
3951fail:
3952	em_free_receive_structures(adapter);
3953	return (error);
3954}
3955
3956/*********************************************************************
3957 *
3958 *  Allocate and initialize receive structures.
3959 *
3960 **********************************************************************/
3961static int
3962em_setup_receive_structures(struct adapter *adapter)
3963{
3964	int error;
3965
3966	bzero(adapter->rx_desc_base,
3967	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3968
3969	if ((error = em_allocate_receive_structures(adapter)) !=0)
3970		return (error);
3971
3972	/* Setup our descriptor pointers */
3973	adapter->next_rx_desc_to_check = 0;
3974
3975	return (0);
3976}
3977
3978/*********************************************************************
3979 *
3980 *  Enable receive unit.
3981 *
3982 **********************************************************************/
3983static void
3984em_initialize_receive_unit(struct adapter *adapter)
3985{
3986	struct ifnet	*ifp = adapter->ifp;
3987	uint64_t	bus_addr;
3988	uint32_t	reg_rctl;
3989	uint32_t	reg_rxcsum;
3990
3991	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3992
3993	/*
3994	 * Make sure receives are disabled while setting
3995	 * up the descriptor ring
3996	 */
3997	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3998	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3999
4000	if(adapter->hw.mac.type >= e1000_82540) {
4001		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4002		    adapter->rx_abs_int_delay.value);
4003		/*
4004		 * Set the interrupt throttling rate. Value is calculated
4005		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4006		 */
4007#define MAX_INTS_PER_SEC	8000
4008#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4009		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
4010	}
4011
4012	/* Setup the Base and Length of the Rx Descriptor Ring */
4013	bus_addr = adapter->rxdma.dma_paddr;
4014	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
4015			sizeof(struct e1000_rx_desc));
4016	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
4017	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
4018
4019	/* Setup the Receive Control Register */
4020	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4021	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4022		   E1000_RCTL_RDMTS_HALF |
4023		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4024
4025	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
4026		reg_rctl |= E1000_RCTL_SBP;
4027	else
4028		reg_rctl &= ~E1000_RCTL_SBP;
4029
4030	switch (adapter->rx_buffer_len) {
4031	default:
4032	case 2048:
4033		reg_rctl |= E1000_RCTL_SZ_2048;
4034		break;
4035	case 4096:
4036		reg_rctl |= E1000_RCTL_SZ_4096 |
4037		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4038		break;
4039	case 8192:
4040		reg_rctl |= E1000_RCTL_SZ_8192 |
4041		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4042		break;
4043	case 16384:
4044		reg_rctl |= E1000_RCTL_SZ_16384 |
4045		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4046		break;
4047	}
4048
4049	if (ifp->if_mtu > ETHERMTU)
4050		reg_rctl |= E1000_RCTL_LPE;
4051	else
4052		reg_rctl &= ~E1000_RCTL_LPE;
4053
4054	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4055	if ((adapter->hw.mac.type >= e1000_82543) &&
4056	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4057		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4058		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4059		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4060	}
4061
4062	/*
4063	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4064	** long latencies are observed, like Lenovo X60. This
4065	** change eliminates the problem, but since having positive
4066	** values in RDTR is a known source of problems on other
4067	** platforms another solution is being sought.
4068	*/
4069	if (adapter->hw.mac.type == e1000_82573)
4070		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4071
4072	/* Enable Receives */
4073	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4074
4075	/*
4076	 * Setup the HW Rx Head and
4077	 * Tail Descriptor Pointers
4078	 */
4079	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4080	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4081
4082	return;
4083}
4084
4085/*********************************************************************
4086 *
4087 *  Free receive related data structures.
4088 *
4089 **********************************************************************/
4090static void
4091em_free_receive_structures(struct adapter *adapter)
4092{
4093	struct em_buffer *rx_buffer;
4094	int i;
4095
4096	INIT_DEBUGOUT("free_receive_structures: begin");
4097
4098	if (adapter->rx_sparemap) {
4099		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4100		adapter->rx_sparemap = NULL;
4101	}
4102
4103	/* Cleanup any existing buffers */
4104	if (adapter->rx_buffer_area != NULL) {
4105		rx_buffer = adapter->rx_buffer_area;
4106		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4107			if (rx_buffer->m_head != NULL) {
4108				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4109				    BUS_DMASYNC_POSTREAD);
4110				bus_dmamap_unload(adapter->rxtag,
4111				    rx_buffer->map);
4112				m_freem(rx_buffer->m_head);
4113				rx_buffer->m_head = NULL;
4114			} else if (rx_buffer->map != NULL)
4115				bus_dmamap_unload(adapter->rxtag,
4116				    rx_buffer->map);
4117			if (rx_buffer->map != NULL) {
4118				bus_dmamap_destroy(adapter->rxtag,
4119				    rx_buffer->map);
4120				rx_buffer->map = NULL;
4121			}
4122		}
4123	}
4124
4125	if (adapter->rx_buffer_area != NULL) {
4126		free(adapter->rx_buffer_area, M_DEVBUF);
4127		adapter->rx_buffer_area = NULL;
4128	}
4129
4130	if (adapter->rxtag != NULL) {
4131		bus_dma_tag_destroy(adapter->rxtag);
4132		adapter->rxtag = NULL;
4133	}
4134}
4135
4136/*********************************************************************
4137 *
4138 *  This routine executes in interrupt context. It replenishes
4139 *  the mbufs in the descriptor and sends data which has been
4140 *  dma'ed into host memory to upper layer.
4141 *
4142 *  We loop at most count times if count is > 0, or until done if
4143 *  count < 0.
4144 *
4145 *********************************************************************/
4146static int
4147em_rxeof(struct adapter *adapter, int count)
4148{
4149	struct ifnet	*ifp;
4150	struct mbuf	*mp;
4151	uint8_t		accept_frame = 0;
4152	uint8_t		eop = 0;
4153	uint16_t 	len, desc_len, prev_len_adj;
4154	int		i;
4155
4156	/* Pointer to the receive descriptor being examined. */
4157	struct e1000_rx_desc   *current_desc;
4158	uint8_t		status;
4159
4160	ifp = adapter->ifp;
4161	i = adapter->next_rx_desc_to_check;
4162	current_desc = &adapter->rx_desc_base[i];
4163	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4164	    BUS_DMASYNC_POSTREAD);
4165
4166	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4167		return (0);
4168
4169	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4170	    (count != 0) &&
4171	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4172		struct mbuf *m = NULL;
4173
4174		mp = adapter->rx_buffer_area[i].m_head;
4175		/*
4176		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4177		 * needs to access the last received byte in the mbuf.
4178		 */
4179		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4180		    BUS_DMASYNC_POSTREAD);
4181
4182		accept_frame = 1;
4183		prev_len_adj = 0;
4184		desc_len = le16toh(current_desc->length);
4185		status = current_desc->status;
4186		if (status & E1000_RXD_STAT_EOP) {
4187			count--;
4188			eop = 1;
4189			if (desc_len < ETHER_CRC_LEN) {
4190				len = 0;
4191				prev_len_adj = ETHER_CRC_LEN - desc_len;
4192			} else
4193				len = desc_len - ETHER_CRC_LEN;
4194		} else {
4195			eop = 0;
4196			len = desc_len;
4197		}
4198
4199		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4200			uint8_t		last_byte;
4201			uint32_t	pkt_len = desc_len;
4202
4203			if (adapter->fmp != NULL)
4204				pkt_len += adapter->fmp->m_pkthdr.len;
4205
4206			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4207			if (TBI_ACCEPT(&adapter->hw, status,
4208			    current_desc->errors, pkt_len, last_byte)) {
4209				e1000_tbi_adjust_stats_82543(&adapter->hw,
4210				    &adapter->stats, pkt_len,
4211				    adapter->hw.mac.addr);
4212				if (len > 0)
4213					len--;
4214			} else
4215				accept_frame = 0;
4216		}
4217
4218		if (accept_frame) {
4219			if (em_get_buf(adapter, i) != 0) {
4220				ifp->if_iqdrops++;
4221				goto discard;
4222			}
4223
4224			/* Assign correct length to the current fragment */
4225			mp->m_len = len;
4226
4227			if (adapter->fmp == NULL) {
4228				mp->m_pkthdr.len = len;
4229				adapter->fmp = mp; /* Store the first mbuf */
4230				adapter->lmp = mp;
4231			} else {
4232				/* Chain mbuf's together */
4233				mp->m_flags &= ~M_PKTHDR;
4234				/*
4235				 * Adjust length of previous mbuf in chain if
4236				 * we received less than 4 bytes in the last
4237				 * descriptor.
4238				 */
4239				if (prev_len_adj > 0) {
4240					adapter->lmp->m_len -= prev_len_adj;
4241					adapter->fmp->m_pkthdr.len -=
4242					    prev_len_adj;
4243				}
4244				adapter->lmp->m_next = mp;
4245				adapter->lmp = adapter->lmp->m_next;
4246				adapter->fmp->m_pkthdr.len += len;
4247			}
4248
4249			if (eop) {
4250				adapter->fmp->m_pkthdr.rcvif = ifp;
4251				ifp->if_ipackets++;
4252				em_receive_checksum(adapter, current_desc,
4253				    adapter->fmp);
4254#ifndef __NO_STRICT_ALIGNMENT
4255				if (adapter->hw.mac.max_frame_size >
4256				    (MCLBYTES - ETHER_ALIGN) &&
4257				    em_fixup_rx(adapter) != 0)
4258					goto skip;
4259#endif
4260				if (status & E1000_RXD_STAT_VP) {
4261					adapter->fmp->m_pkthdr.ether_vtag =
4262					    (le16toh(current_desc->special) &
4263					    E1000_RXD_SPC_VLAN_MASK);
4264					adapter->fmp->m_flags |= M_VLANTAG;
4265				}
4266#ifndef __NO_STRICT_ALIGNMENT
4267skip:
4268#endif
4269				m = adapter->fmp;
4270				adapter->fmp = NULL;
4271				adapter->lmp = NULL;
4272			}
4273		} else {
4274			ifp->if_ierrors++;
4275discard:
4276			/* Reuse loaded DMA map and just update mbuf chain */
4277			mp = adapter->rx_buffer_area[i].m_head;
4278			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4279			mp->m_data = mp->m_ext.ext_buf;
4280			mp->m_next = NULL;
4281			if (adapter->hw.mac.max_frame_size <=
4282			    (MCLBYTES - ETHER_ALIGN))
4283				m_adj(mp, ETHER_ALIGN);
4284			if (adapter->fmp != NULL) {
4285				m_freem(adapter->fmp);
4286				adapter->fmp = NULL;
4287				adapter->lmp = NULL;
4288			}
4289			m = NULL;
4290		}
4291
4292		/* Zero out the receive descriptors status. */
4293		current_desc->status = 0;
4294		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4295		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4296
4297		/* Advance our pointers to the next descriptor. */
4298		if (++i == adapter->num_rx_desc)
4299			i = 0;
4300		if (m != NULL) {
4301			adapter->next_rx_desc_to_check = i;
4302#ifdef DEVICE_POLLING
4303			EM_UNLOCK(adapter);
4304			(*ifp->if_input)(ifp, m);
4305			EM_LOCK(adapter);
4306#else
4307			/* Already running unlocked */
4308			(*ifp->if_input)(ifp, m);
4309#endif
4310			i = adapter->next_rx_desc_to_check;
4311		}
4312		current_desc = &adapter->rx_desc_base[i];
4313	}
4314	adapter->next_rx_desc_to_check = i;
4315
4316	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4317	if (--i < 0)
4318		i = adapter->num_rx_desc - 1;
4319	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4320	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4321		return (0);
4322
4323	return (1);
4324}
4325
4326#ifndef __NO_STRICT_ALIGNMENT
4327/*
4328 * When jumbo frames are enabled we should realign entire payload on
4329 * architecures with strict alignment. This is serious design mistake of 8254x
4330 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4331 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4332 * payload. On architecures without strict alignment restrictions 8254x still
4333 * performs unaligned memory access which would reduce the performance too.
4334 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4335 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4336 * existing mbuf chain.
4337 *
4338 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4339 * not used at all on architectures with strict alignment.
4340 */
4341static int
4342em_fixup_rx(struct adapter *adapter)
4343{
4344	struct mbuf *m, *n;
4345	int error;
4346
4347	error = 0;
4348	m = adapter->fmp;
4349	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4350		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4351		m->m_data += ETHER_HDR_LEN;
4352	} else {
4353		MGETHDR(n, M_DONTWAIT, MT_DATA);
4354		if (n != NULL) {
4355			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4356			m->m_data += ETHER_HDR_LEN;
4357			m->m_len -= ETHER_HDR_LEN;
4358			n->m_len = ETHER_HDR_LEN;
4359			M_MOVE_PKTHDR(n, m);
4360			n->m_next = m;
4361			adapter->fmp = n;
4362		} else {
4363			adapter->dropped_pkts++;
4364			m_freem(adapter->fmp);
4365			adapter->fmp = NULL;
4366			error = ENOMEM;
4367		}
4368	}
4369
4370	return (error);
4371}
4372#endif
4373
4374/*********************************************************************
4375 *
4376 *  Verify that the hardware indicated that the checksum is valid.
4377 *  Inform the stack about the status of checksum so that stack
4378 *  doesn't spend time verifying the checksum.
4379 *
4380 *********************************************************************/
4381static void
4382em_receive_checksum(struct adapter *adapter,
4383	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4384{
4385	/* 82543 or newer only */
4386	if ((adapter->hw.mac.type < e1000_82543) ||
4387	    /* Ignore Checksum bit is set */
4388	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4389		mp->m_pkthdr.csum_flags = 0;
4390		return;
4391	}
4392
4393	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4394		/* Did it pass? */
4395		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4396			/* IP Checksum Good */
4397			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4398			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4399
4400		} else {
4401			mp->m_pkthdr.csum_flags = 0;
4402		}
4403	}
4404
4405	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4406		/* Did it pass? */
4407		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4408			mp->m_pkthdr.csum_flags |=
4409			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4410			mp->m_pkthdr.csum_data = htons(0xffff);
4411		}
4412	}
4413}
4414
4415
4416static void
4417em_enable_vlans(struct adapter *adapter)
4418{
4419	uint32_t ctrl;
4420
4421	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4422
4423	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4424	ctrl |= E1000_CTRL_VME;
4425	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4426}
4427
4428static void
4429em_enable_intr(struct adapter *adapter)
4430{
4431	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4432	    (IMS_ENABLE_MASK));
4433}
4434
4435static void
4436em_disable_intr(struct adapter *adapter)
4437{
4438	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4439}
4440
4441/*
4442 * Bit of a misnomer, what this really means is
4443 * to enable OS management of the system... aka
4444 * to disable special hardware management features
4445 */
4446static void
4447em_init_manageability(struct adapter *adapter)
4448{
4449	/* A shared code workaround */
4450#define E1000_82542_MANC2H E1000_MANC2H
4451	if (adapter->has_manage) {
4452		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4453		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4454
4455		/* disable hardware interception of ARP */
4456		manc &= ~(E1000_MANC_ARP_EN);
4457
4458                /* enable receiving management packets to the host */
4459                if (adapter->hw.mac.type >= e1000_82571) {
4460			manc |= E1000_MANC_EN_MNG2HOST;
4461#define E1000_MNG2HOST_PORT_623 (1 << 5)
4462#define E1000_MNG2HOST_PORT_664 (1 << 6)
4463			manc2h |= E1000_MNG2HOST_PORT_623;
4464			manc2h |= E1000_MNG2HOST_PORT_664;
4465			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4466		}
4467
4468		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4469	}
4470}
4471
4472/*
4473 * Give control back to hardware management
4474 * controller if there is one.
4475 */
4476static void
4477em_release_manageability(struct adapter *adapter)
4478{
4479	if (adapter->has_manage) {
4480		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4481
4482		/* re-enable hardware interception of ARP */
4483		manc |= E1000_MANC_ARP_EN;
4484
4485		if (adapter->hw.mac.type >= e1000_82571)
4486			manc &= ~E1000_MANC_EN_MNG2HOST;
4487
4488		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4489	}
4490}
4491
4492/*
4493 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4494 * For ASF and Pass Through versions of f/w this means that
4495 * the driver is loaded. For AMT version (only with 82573)
4496 * of the f/w this means that the network i/f is open.
4497 *
4498 */
4499static void
4500em_get_hw_control(struct adapter *adapter)
4501{
4502	u32 ctrl_ext, swsm;
4503
4504	/* Let firmware know the driver has taken over */
4505	switch (adapter->hw.mac.type) {
4506	case e1000_82573:
4507		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4508		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4509		    swsm | E1000_SWSM_DRV_LOAD);
4510		break;
4511	case e1000_82571:
4512	case e1000_82572:
4513	case e1000_80003es2lan:
4514	case e1000_ich8lan:
4515	case e1000_ich9lan:
4516		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4517		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4518		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4519		break;
4520	default:
4521		break;
4522	}
4523}
4524
4525/*
4526 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4527 * For ASF and Pass Through versions of f/w this means that the
4528 * driver is no longer loaded. For AMT version (only with 82573) i
4529 * of the f/w this means that the network i/f is closed.
4530 *
4531 */
4532static void
4533em_release_hw_control(struct adapter *adapter)
4534{
4535	u32 ctrl_ext, swsm;
4536
4537	/* Let firmware taken over control of h/w */
4538	switch (adapter->hw.mac.type) {
4539	case e1000_82573:
4540		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4541		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4542		    swsm & ~E1000_SWSM_DRV_LOAD);
4543		break;
4544	case e1000_82571:
4545	case e1000_82572:
4546	case e1000_80003es2lan:
4547	case e1000_ich8lan:
4548	case e1000_ich9lan:
4549		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4550		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4551		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4552		break;
4553	default:
4554		break;
4555
4556	}
4557}
4558
4559static int
4560em_is_valid_ether_addr(uint8_t *addr)
4561{
4562	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4563
4564	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4565		return (FALSE);
4566	}
4567
4568	return (TRUE);
4569}
4570
4571/*
4572 * NOTE: the following routines using the e1000
4573 * 	naming style are provided to the shared
4574 *	code which expects that rather than 'em'
4575 */
4576
4577void
4578e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4579{
4580	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4581}
4582
4583void
4584e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4585{
4586	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4587}
4588
4589void
4590e1000_pci_set_mwi(struct e1000_hw *hw)
4591{
4592	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4593	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4594}
4595
4596void
4597e1000_pci_clear_mwi(struct e1000_hw *hw)
4598{
4599	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4600	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4601}
4602
4603/*
4604 * Read the PCI Express capabilities
4605 */
4606int32_t
4607e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4608{
4609	int32_t		error = E1000_SUCCESS;
4610	uint16_t	cap_off;
4611
4612	switch (hw->mac.type) {
4613
4614		case e1000_82571:
4615		case e1000_82572:
4616		case e1000_82573:
4617		case e1000_80003es2lan:
4618			cap_off = 0xE0;
4619			e1000_read_pci_cfg(hw, cap_off + reg, value);
4620			break;
4621		default:
4622			error = ~E1000_NOT_IMPLEMENTED;
4623			break;
4624	}
4625
4626	return (error);
4627}
4628
4629int32_t
4630e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4631{
4632	int32_t error = 0;
4633
4634	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4635	if (hw->dev_spec == NULL)
4636		error = ENOMEM;
4637
4638	return (error);
4639}
4640
4641void
4642e1000_free_dev_spec_struct(struct e1000_hw *hw)
4643{
4644	if (hw->dev_spec != NULL)
4645		free(hw->dev_spec, M_DEVBUF);
4646	return;
4647}
4648
4649/*
4650 * Enable PCI Wake On Lan capability
4651 */
4652void
4653em_enable_wakeup(device_t dev)
4654{
4655	u16     cap, status;
4656	u8      id;
4657
4658	/* First find the capabilities pointer*/
4659	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4660	/* Read the PM Capabilities */
4661	id = pci_read_config(dev, cap, 1);
4662	if (id != PCIY_PMG)     /* Something wrong */
4663		return;
4664	/* OK, we have the power capabilities, so
4665	   now get the status register */
4666	cap += PCIR_POWER_STATUS;
4667	status = pci_read_config(dev, cap, 2);
4668	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4669	pci_write_config(dev, cap, status, 2);
4670	return;
4671}
4672
4673
4674/*********************************************************************
4675* 82544 Coexistence issue workaround.
4676*    There are 2 issues.
4677*       1. Transmit Hang issue.
4678*    To detect this issue, following equation can be used...
4679*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4680*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4681*
4682*       2. DAC issue.
4683*    To detect this issue, following equation can be used...
4684*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4685*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4686*
4687*
4688*    WORKAROUND:
4689*	  Make sure we do not have ending address
4690*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4691*
4692*************************************************************************/
4693static uint32_t
4694em_fill_descriptors (bus_addr_t address, uint32_t length,
4695		PDESC_ARRAY desc_array)
4696{
4697	/* Since issue is sensitive to length and address.*/
4698	/* Let us first check the address...*/
4699	uint32_t safe_terminator;
4700	if (length <= 4) {
4701		desc_array->descriptor[0].address = address;
4702		desc_array->descriptor[0].length = length;
4703		desc_array->elements = 1;
4704		return (desc_array->elements);
4705	}
4706	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4707	    (length & 0xF)) & 0xF);
4708	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4709	if (safe_terminator == 0   ||
4710	(safe_terminator > 4   &&
4711	safe_terminator < 9)   ||
4712	(safe_terminator > 0xC &&
4713	safe_terminator <= 0xF)) {
4714		desc_array->descriptor[0].address = address;
4715		desc_array->descriptor[0].length = length;
4716		desc_array->elements = 1;
4717		return (desc_array->elements);
4718	}
4719
4720	desc_array->descriptor[0].address = address;
4721	desc_array->descriptor[0].length = length - 4;
4722	desc_array->descriptor[1].address = address + (length - 4);
4723	desc_array->descriptor[1].length = 4;
4724	desc_array->elements = 2;
4725	return (desc_array->elements);
4726}
4727
4728/**********************************************************************
4729 *
4730 *  Update the board statistics counters.
4731 *
4732 **********************************************************************/
4733static void
4734em_update_stats_counters(struct adapter *adapter)
4735{
4736	struct ifnet   *ifp;
4737
4738	if(adapter->hw.media_type == e1000_media_type_copper ||
4739	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4740		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4741		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4742	}
4743	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4744	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4745	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4746	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4747
4748	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4749	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4750	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4751	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4752	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4753	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4754	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4755	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4756	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4757	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4758	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4759	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4760	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4761	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4762	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4763	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4764	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4765	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4766	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4767	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4768
4769	/* For the 64-bit byte counters the low dword must be read first. */
4770	/* Both registers clear on the read of the high dword */
4771
4772	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4773	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4774	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4775	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4776
4777	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4778	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4779	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4780	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4781	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4782
4783	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4784	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4785	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4786	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4787
4788	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4789	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4790	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4791	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4792	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4793	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4794	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4795	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4796	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4797	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4798
4799	if (adapter->hw.mac.type >= e1000_82543) {
4800		adapter->stats.algnerrc +=
4801		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4802		adapter->stats.rxerrc +=
4803		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4804		adapter->stats.tncrs +=
4805		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4806		adapter->stats.cexterr +=
4807		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4808		adapter->stats.tsctc +=
4809		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4810		adapter->stats.tsctfc +=
4811		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4812	}
4813	ifp = adapter->ifp;
4814
4815	ifp->if_collisions = adapter->stats.colc;
4816
4817	/* Rx Errors */
4818	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4819	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4820	    adapter->stats.ruc + adapter->stats.roc +
4821	    adapter->stats.mpc + adapter->stats.cexterr;
4822
4823	/* Tx Errors */
4824	ifp->if_oerrors = adapter->stats.ecol +
4825	    adapter->stats.latecol + adapter->watchdog_events;
4826}
4827
4828
4829/**********************************************************************
4830 *
4831 *  This routine is called only when em_display_debug_stats is enabled.
4832 *  This routine provides a way to take a look at important statistics
4833 *  maintained by the driver and hardware.
4834 *
4835 **********************************************************************/
4836static void
4837em_print_debug_info(struct adapter *adapter)
4838{
4839	device_t dev = adapter->dev;
4840	uint8_t *hw_addr = adapter->hw.hw_addr;
4841
4842	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4843	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4844	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4845	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4846	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4847	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4848	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4849	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4850	    adapter->hw.mac.fc_high_water,
4851	    adapter->hw.mac.fc_low_water);
4852	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4853	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4854	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4855	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4856	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4857	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4858	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4859	    (long long)adapter->tx_fifo_wrk_cnt,
4860	    (long long)adapter->tx_fifo_reset_cnt);
4861	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4862	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4863	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4864	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4865	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4866	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4867	device_printf(dev, "Num Tx descriptors avail = %d\n",
4868	    adapter->num_tx_desc_avail);
4869	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4870	    adapter->no_tx_desc_avail1);
4871	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4872	    adapter->no_tx_desc_avail2);
4873	device_printf(dev, "Std mbuf failed = %ld\n",
4874	    adapter->mbuf_alloc_failed);
4875	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4876	    adapter->mbuf_cluster_failed);
4877	device_printf(dev, "Driver dropped packets = %ld\n",
4878	    adapter->dropped_pkts);
4879	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4880		adapter->no_tx_dma_setup);
4881}
4882
4883static void
4884em_print_hw_stats(struct adapter *adapter)
4885{
4886	device_t dev = adapter->dev;
4887
4888	device_printf(dev, "Excessive collisions = %lld\n",
4889	    (long long)adapter->stats.ecol);
4890#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4891	device_printf(dev, "Symbol errors = %lld\n",
4892	    (long long)adapter->stats.symerrs);
4893#endif
4894	device_printf(dev, "Sequence errors = %lld\n",
4895	    (long long)adapter->stats.sec);
4896	device_printf(dev, "Defer count = %lld\n",
4897	    (long long)adapter->stats.dc);
4898	device_printf(dev, "Missed Packets = %lld\n",
4899	    (long long)adapter->stats.mpc);
4900	device_printf(dev, "Receive No Buffers = %lld\n",
4901	    (long long)adapter->stats.rnbc);
4902	/* RLEC is inaccurate on some hardware, calculate our own. */
4903	device_printf(dev, "Receive Length Errors = %lld\n",
4904	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4905	device_printf(dev, "Receive errors = %lld\n",
4906	    (long long)adapter->stats.rxerrc);
4907	device_printf(dev, "Crc errors = %lld\n",
4908	    (long long)adapter->stats.crcerrs);
4909	device_printf(dev, "Alignment errors = %lld\n",
4910	    (long long)adapter->stats.algnerrc);
4911	device_printf(dev, "Carrier extension errors = %lld\n",
4912	    (long long)adapter->stats.cexterr);
4913	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4914	device_printf(dev, "watchdog timeouts = %ld\n",
4915	    adapter->watchdog_events);
4916	device_printf(dev, "XON Rcvd = %lld\n",
4917	    (long long)adapter->stats.xonrxc);
4918	device_printf(dev, "XON Xmtd = %lld\n",
4919	    (long long)adapter->stats.xontxc);
4920	device_printf(dev, "XOFF Rcvd = %lld\n",
4921	    (long long)adapter->stats.xoffrxc);
4922	device_printf(dev, "XOFF Xmtd = %lld\n",
4923	    (long long)adapter->stats.xofftxc);
4924	device_printf(dev, "Good Packets Rcvd = %lld\n",
4925	    (long long)adapter->stats.gprc);
4926	device_printf(dev, "Good Packets Xmtd = %lld\n",
4927	    (long long)adapter->stats.gptc);
4928	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4929	    (long long)adapter->stats.tsctc);
4930	device_printf(dev, "TSO Contexts Failed = %lld\n",
4931	    (long long)adapter->stats.tsctfc);
4932}
4933
4934static int
4935em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4936{
4937	struct adapter *adapter;
4938	int error;
4939	int result;
4940
4941	result = -1;
4942	error = sysctl_handle_int(oidp, &result, 0, req);
4943
4944	if (error || !req->newptr)
4945		return (error);
4946
4947	if (result == 1) {
4948		adapter = (struct adapter *)arg1;
4949		em_print_debug_info(adapter);
4950	}
4951
4952	return (error);
4953}
4954
4955
4956static int
4957em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4958{
4959	struct adapter *adapter;
4960	int error;
4961	int result;
4962
4963	result = -1;
4964	error = sysctl_handle_int(oidp, &result, 0, req);
4965
4966	if (error || !req->newptr)
4967		return (error);
4968
4969	if (result == 1) {
4970		adapter = (struct adapter *)arg1;
4971		em_print_hw_stats(adapter);
4972	}
4973
4974	return (error);
4975}
4976
4977static int
4978em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4979{
4980	struct em_int_delay_info *info;
4981	struct adapter *adapter;
4982	uint32_t regval;
4983	int error;
4984	int usecs;
4985	int ticks;
4986
4987	info = (struct em_int_delay_info *)arg1;
4988	usecs = info->value;
4989	error = sysctl_handle_int(oidp, &usecs, 0, req);
4990	if (error != 0 || req->newptr == NULL)
4991		return (error);
4992	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4993		return (EINVAL);
4994	info->value = usecs;
4995	ticks = EM_USECS_TO_TICKS(usecs);
4996
4997	adapter = info->adapter;
4998
4999	EM_LOCK(adapter);
5000	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5001	regval = (regval & ~0xffff) | (ticks & 0xffff);
5002	/* Handle a few special cases. */
5003	switch (info->offset) {
5004	case E1000_RDTR:
5005		break;
5006	case E1000_TIDV:
5007		if (ticks == 0) {
5008			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5009			/* Don't write 0 into the TIDV register. */
5010			regval++;
5011		} else
5012			if (adapter->hw.mac.type != e1000_82575)
5013				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5014		break;
5015	}
5016	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5017	EM_UNLOCK(adapter);
5018	return (0);
5019}
5020
5021static void
5022em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5023	const char *description, struct em_int_delay_info *info,
5024	int offset, int value)
5025{
5026	info->adapter = adapter;
5027	info->offset = offset;
5028	info->value = value;
5029	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5030	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5031	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5032	    info, 0, em_sysctl_int_delay, "I", description);
5033}
5034
5035#ifndef DEVICE_POLLING
5036static void
5037em_add_rx_process_limit(struct adapter *adapter, const char *name,
5038	const char *description, int *limit, int value)
5039{
5040	*limit = value;
5041	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5042	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5043	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5044}
5045#endif
5046