if_em.c revision 169637
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 169637 2007-05-17 00:14:03Z jfv $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79
80#include "e1000_api.h"
81#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.2";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware and mac info */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Get control from any management/hw control */
723	if (((adapter->hw.mac.type != e1000_82573) &&
724	    (adapter->hw.mac.type != e1000_ich8lan) &&
725	    (adapter->hw.mac.type != e1000_ich9lan)) ||
726	    !e1000_check_mng_mode(&adapter->hw))
727		em_get_hw_control(adapter);
728
729	/* Tell the stack that the interface is not active */
730	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
731
732	INIT_DEBUGOUT("em_attach: end");
733
734	return (0);
735
736err_hw_init:
737	em_release_hw_control(adapter);
738	e1000_remove_device(&adapter->hw);
739	em_dma_free(adapter, &adapter->rxdma);
740err_rx_desc:
741	em_dma_free(adapter, &adapter->txdma);
742err_tx_desc:
743err_pci:
744	em_free_intr(adapter);
745	em_free_pci_resources(adapter);
746	EM_LOCK_DESTROY(adapter);
747
748	return (error);
749}
750
751/*********************************************************************
752 *  Device removal routine
753 *
754 *  The detach entry point is called when the driver is being removed.
755 *  This routine stops the adapter and deallocates all the resources
756 *  that were allocated for driver operation.
757 *
758 *  return 0 on success, positive on failure
759 *********************************************************************/
760
761static int
762em_detach(device_t dev)
763{
764	struct adapter	*adapter = device_get_softc(dev);
765	struct ifnet	*ifp = adapter->ifp;
766
767	INIT_DEBUGOUT("em_detach: begin");
768
769#ifdef DEVICE_POLLING
770	if (ifp->if_capenable & IFCAP_POLLING)
771		ether_poll_deregister(ifp);
772#endif
773
774	em_disable_intr(adapter);
775	em_free_intr(adapter);
776	EM_LOCK(adapter);
777	adapter->in_detach = 1;
778	em_stop(adapter);
779	e1000_phy_hw_reset(&adapter->hw);
780
781	em_release_manageability(adapter);
782	if (((adapter->hw.mac.type != e1000_82573) &&
783	    (adapter->hw.mac.type != e1000_ich8lan) &&
784	    (adapter->hw.mac.type != e1000_ich9lan)) ||
785	    !e1000_check_mng_mode(&adapter->hw))
786		em_release_hw_control(adapter);
787	if (adapter->wol) {
788		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
789		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
790		em_enable_wakeup(dev);
791	}
792
793	EM_UNLOCK(adapter);
794	ether_ifdetach(adapter->ifp);
795
796	callout_drain(&adapter->timer);
797	callout_drain(&adapter->tx_fifo_timer);
798
799	em_free_pci_resources(adapter);
800	bus_generic_detach(dev);
801	if_free(ifp);
802
803	e1000_remove_device(&adapter->hw);
804	em_free_transmit_structures(adapter);
805	em_free_receive_structures(adapter);
806
807	/* Free Transmit Descriptor ring */
808	if (adapter->tx_desc_base) {
809		em_dma_free(adapter, &adapter->txdma);
810		adapter->tx_desc_base = NULL;
811	}
812
813	/* Free Receive Descriptor ring */
814	if (adapter->rx_desc_base) {
815		em_dma_free(adapter, &adapter->rxdma);
816		adapter->rx_desc_base = NULL;
817	}
818
819	EM_LOCK_DESTROY(adapter);
820
821	return (0);
822}
823
824/*********************************************************************
825 *
826 *  Shutdown entry point
827 *
828 **********************************************************************/
829
830static int
831em_shutdown(device_t dev)
832{
833	return em_suspend(dev);
834}
835
836/*
837 * Suspend/resume device methods.
838 */
839static int
840em_suspend(device_t dev)
841{
842	struct adapter *adapter = device_get_softc(dev);
843
844	EM_LOCK(adapter);
845	em_stop(adapter);
846
847        em_release_manageability(adapter);
848        if (((adapter->hw.mac.type != e1000_82573) &&
849            (adapter->hw.mac.type != e1000_ich8lan) &&
850            (adapter->hw.mac.type != e1000_ich9lan)) ||
851            !e1000_check_mng_mode(&adapter->hw))
852                em_release_hw_control(adapter);
853        if (adapter->wol) {
854                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
855                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
856                em_enable_wakeup(dev);
857        }
858
859	EM_UNLOCK(adapter);
860
861	return bus_generic_suspend(dev);
862}
863
864static int
865em_resume(device_t dev)
866{
867	struct adapter *adapter = device_get_softc(dev);
868	struct ifnet *ifp = adapter->ifp;
869
870	EM_LOCK(adapter);
871	em_init_locked(adapter);
872
873        /* Get control from any management/hw control */
874	if (((adapter->hw.mac.type != e1000_82573) &&
875	    (adapter->hw.mac.type != e1000_ich8lan) &&
876	    (adapter->hw.mac.type != e1000_ich9lan)) ||
877	    !e1000_check_mng_mode(&adapter->hw))
878		em_get_hw_control(adapter);
879	em_init_manageability(adapter);
880
881	if ((ifp->if_flags & IFF_UP) &&
882	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
883		em_start_locked(ifp);
884
885	EM_UNLOCK(adapter);
886
887	return bus_generic_resume(dev);
888}
889
890
891/*********************************************************************
892 *  Transmit entry point
893 *
894 *  em_start is called by the stack to initiate a transmit.
895 *  The driver will remain in this routine as long as there are
896 *  packets to transmit and transmit resources are available.
897 *  In case resources are not available stack is notified and
898 *  the packet is requeued.
899 **********************************************************************/
900
901static void
902em_start_locked(struct ifnet *ifp)
903{
904	struct adapter	*adapter = ifp->if_softc;
905	struct mbuf	*m_head;
906
907	EM_LOCK_ASSERT(adapter);
908
909	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
910	    IFF_DRV_RUNNING)
911		return;
912	if (!adapter->link_active)
913		return;
914
915	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
916
917		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
918		if (m_head == NULL)
919			break;
920		/*
921		 *  Encapsulation can modify our pointer, and or make it
922		 *  NULL on failure.  In that event, we can't requeue.
923		 *
924		 *  We now use a pointer to accomodate legacy and
925		 *  advanced transmit functions.
926		 */
927		if (adapter->em_xmit(adapter, &m_head)) {
928			if (m_head == NULL)
929				break;
930			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
931			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
932			break;
933		}
934
935		/* Send a copy of the frame to the BPF listener */
936		ETHER_BPF_MTAP(ifp, m_head);
937
938		/* Set timeout in case hardware has problems transmitting. */
939		adapter->watchdog_timer = EM_TX_TIMEOUT;
940	}
941}
942
943static void
944em_start(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
950		em_start_locked(ifp);
951	EM_UNLOCK(adapter);
952}
953
954/*********************************************************************
955 *  Ioctl entry point
956 *
957 *  em_ioctl is called when the user wants to configure the
958 *  interface.
959 *
960 *  return 0 on success, positive on failure
961 **********************************************************************/
962
963static int
964em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
965{
966	struct adapter	*adapter = ifp->if_softc;
967	struct ifreq *ifr = (struct ifreq *)data;
968	struct ifaddr *ifa = (struct ifaddr *)data;
969	int error = 0;
970
971	if (adapter->in_detach)
972		return (error);
973
974	switch (command) {
975	case SIOCSIFADDR:
976	case SIOCGIFADDR:
977		if (ifa->ifa_addr->sa_family == AF_INET) {
978			/*
979			 * XXX
980			 * Since resetting hardware takes a very long time
981			 * and results in link renegotiation we only
982			 * initialize the hardware only when it is absolutely
983			 * required.
984			 */
985			ifp->if_flags |= IFF_UP;
986			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
987				EM_LOCK(adapter);
988				em_init_locked(adapter);
989				EM_UNLOCK(adapter);
990			}
991			arp_ifinit(ifp, ifa);
992		} else
993			error = ether_ioctl(ifp, command, data);
994		break;
995	case SIOCSIFMTU:
996	    {
997		int max_frame_size;
998		uint16_t eeprom_data = 0;
999
1000		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1001
1002		EM_LOCK(adapter);
1003		switch (adapter->hw.mac.type) {
1004		case e1000_82573:
1005			/*
1006			 * 82573 only supports jumbo frames
1007			 * if ASPM is disabled.
1008			 */
1009			e1000_read_nvm(&adapter->hw,
1010			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1011			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1012				max_frame_size = ETHER_MAX_LEN;
1013				break;
1014			}
1015			/* Allow Jumbo frames - fall thru */
1016		case e1000_82571:
1017		case e1000_82572:
1018		case e1000_ich9lan:
1019		case e1000_82575:
1020		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1021			max_frame_size = 9234;
1022			break;
1023		case e1000_ich8lan:
1024			/* ICH8 does not support jumbo frames */
1025			max_frame_size = ETHER_MAX_LEN;
1026			break;
1027		default:
1028			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1029		}
1030		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1031		    ETHER_CRC_LEN) {
1032			EM_UNLOCK(adapter);
1033			error = EINVAL;
1034			break;
1035		}
1036
1037		ifp->if_mtu = ifr->ifr_mtu;
1038		adapter->hw.mac.max_frame_size =
1039		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1040		em_init_locked(adapter);
1041		EM_UNLOCK(adapter);
1042		break;
1043	    }
1044	case SIOCSIFFLAGS:
1045		IOCTL_DEBUGOUT("ioctl rcv'd:\
1046		    SIOCSIFFLAGS (Set Interface Flags)");
1047		EM_LOCK(adapter);
1048		if (ifp->if_flags & IFF_UP) {
1049			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1050				if ((ifp->if_flags ^ adapter->if_flags) &
1051				    IFF_PROMISC) {
1052					em_disable_promisc(adapter);
1053					em_set_promisc(adapter);
1054				}
1055			} else
1056				em_init_locked(adapter);
1057		} else
1058			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1059				em_stop(adapter);
1060		adapter->if_flags = ifp->if_flags;
1061		EM_UNLOCK(adapter);
1062		break;
1063	case SIOCADDMULTI:
1064	case SIOCDELMULTI:
1065		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1066		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1067			EM_LOCK(adapter);
1068			em_disable_intr(adapter);
1069			em_set_multi(adapter);
1070			if (adapter->hw.mac.type == e1000_82542 &&
1071	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1072				em_initialize_receive_unit(adapter);
1073			}
1074#ifdef DEVICE_POLLING
1075			if (!(ifp->if_capenable & IFCAP_POLLING))
1076#endif
1077				em_enable_intr(adapter);
1078			EM_UNLOCK(adapter);
1079		}
1080		break;
1081	case SIOCSIFMEDIA:
1082		/* Check SOL/IDER usage */
1083		EM_LOCK(adapter);
1084		if (e1000_check_reset_block(&adapter->hw)) {
1085			EM_UNLOCK(adapter);
1086			device_printf(adapter->dev, "Media change is"
1087			    " blocked due to SOL/IDER session.\n");
1088			break;
1089		}
1090		EM_UNLOCK(adapter);
1091	case SIOCGIFMEDIA:
1092		IOCTL_DEBUGOUT("ioctl rcv'd: \
1093		    SIOCxIFMEDIA (Get/Set Interface Media)");
1094		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1095		break;
1096	case SIOCSIFCAP:
1097	    {
1098		int mask, reinit;
1099
1100		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1101		reinit = 0;
1102		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1103#ifdef DEVICE_POLLING
1104		if (mask & IFCAP_POLLING) {
1105			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1106				error = ether_poll_register(em_poll, ifp);
1107				if (error)
1108					return (error);
1109				EM_LOCK(adapter);
1110				em_disable_intr(adapter);
1111				ifp->if_capenable |= IFCAP_POLLING;
1112				EM_UNLOCK(adapter);
1113			} else {
1114				error = ether_poll_deregister(ifp);
1115				/* Enable interrupt even in error case */
1116				EM_LOCK(adapter);
1117				em_enable_intr(adapter);
1118				ifp->if_capenable &= ~IFCAP_POLLING;
1119				EM_UNLOCK(adapter);
1120			}
1121		}
1122#endif
1123		if (mask & IFCAP_HWCSUM) {
1124			ifp->if_capenable ^= IFCAP_HWCSUM;
1125			reinit = 1;
1126		}
1127		if (mask & IFCAP_TSO4) {
1128			ifp->if_capenable ^= IFCAP_TSO4;
1129			reinit = 1;
1130		}
1131		if (mask & IFCAP_VLAN_HWTAGGING) {
1132			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1133			reinit = 1;
1134		}
1135		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1136			em_init(adapter);
1137		VLAN_CAPABILITIES(ifp);
1138		break;
1139	    }
1140	default:
1141		error = ether_ioctl(ifp, command, data);
1142		break;
1143	}
1144
1145	return (error);
1146}
1147
1148/*********************************************************************
1149 *  Watchdog timer:
1150 *
1151 *  This routine is called from the local timer every second.
1152 *  As long as transmit descriptors are being cleaned the value
1153 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1154 *  and we then reset the device.
1155 *
1156 **********************************************************************/
1157
1158static void
1159em_watchdog(struct adapter *adapter)
1160{
1161
1162	EM_LOCK_ASSERT(adapter);
1163
1164	/*
1165	** The timer is set to 5 every time start queues a packet.
1166	** Then txeof keeps resetting to 5 as long as it cleans at
1167	** least one descriptor.
1168	** Finally, anytime all descriptors are clean the timer is
1169	** set to 0.
1170	*/
1171	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1172		return;
1173
1174	/* If we are in this routine because of pause frames, then
1175	 * don't reset the hardware.
1176	 */
1177	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1178	    E1000_STATUS_TXOFF) {
1179		adapter->watchdog_timer = EM_TX_TIMEOUT;
1180		return;
1181	}
1182
1183	if (e1000_check_for_link(&adapter->hw) == 0)
1184		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1185	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1186	adapter->watchdog_events++;
1187
1188	em_init_locked(adapter);
1189}
1190
1191/*********************************************************************
1192 *  Init entry point
1193 *
1194 *  This routine is used in two ways. It is used by the stack as
1195 *  init entry point in network interface structure. It is also used
1196 *  by the driver as a hw/sw initialization routine to get to a
1197 *  consistent state.
1198 *
1199 *  return 0 on success, positive on failure
1200 **********************************************************************/
1201
1202static void
1203em_init_locked(struct adapter *adapter)
1204{
1205	struct ifnet	*ifp = adapter->ifp;
1206	device_t	dev = adapter->dev;
1207	uint32_t	pba;
1208
1209	INIT_DEBUGOUT("em_init: begin");
1210
1211	EM_LOCK_ASSERT(adapter);
1212
1213	em_stop(adapter);
1214
1215	/*
1216	 * Packet Buffer Allocation (PBA)
1217	 * Writing PBA sets the receive portion of the buffer
1218	 * the remainder is used for the transmit buffer.
1219	 *
1220	 * Devices before the 82547 had a Packet Buffer of 64K.
1221	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1222	 * After the 82547 the buffer was reduced to 40K.
1223	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1224	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1225	 */
1226	switch (adapter->hw.mac.type) {
1227	case e1000_82547:
1228	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1229		if (adapter->hw.mac.max_frame_size > 8192)
1230			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1231		else
1232			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1233		adapter->tx_fifo_head = 0;
1234		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1235		adapter->tx_fifo_size =
1236		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1237		break;
1238	/* Total Packet Buffer on these is 48K */
1239	case e1000_82571:
1240	case e1000_82572:
1241	case e1000_82575:
1242	case e1000_80003es2lan:
1243			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1244		break;
1245	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1246			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1247		break;
1248	case e1000_ich9lan:
1249#define E1000_PBA_10K	0x000A
1250		pba = E1000_PBA_10K;
1251		break;
1252	case e1000_ich8lan:
1253		pba = E1000_PBA_8K;
1254		break;
1255	default:
1256		/* Devices before 82547 had a Packet Buffer of 64K.   */
1257		if (adapter->hw.mac.max_frame_size > 8192)
1258			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1259		else
1260			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1261	}
1262
1263	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1264	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1265
1266	/* Get the latest mac address, User can use a LAA */
1267        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1268              ETHER_ADDR_LEN);
1269
1270	/* Put the address into the Receive Address Array */
1271	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1272
1273	/*
1274	 * With 82571 controllers, LAA may be overwritten
1275	 * due to controller reset from the other port.
1276	 */
1277	if (adapter->hw.mac.type == e1000_82571)
1278                e1000_set_laa_state_82571(&adapter->hw, TRUE);
1279
1280	/* Initialize the hardware */
1281	if (em_hardware_init(adapter)) {
1282		device_printf(dev, "Unable to initialize the hardware\n");
1283		return;
1284	}
1285	em_update_link_status(adapter);
1286
1287	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1288		em_enable_vlans(adapter);
1289
1290	/* Set hardware offload abilities */
1291	ifp->if_hwassist = 0;
1292	if (adapter->hw.mac.type >= e1000_82543) {
1293		if (ifp->if_capenable & IFCAP_TXCSUM)
1294			ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1295		if (ifp->if_capenable & IFCAP_TSO4)
1296			ifp->if_hwassist |= CSUM_TSO;
1297	}
1298
1299	/* Configure for OS presence */
1300	em_init_manageability(adapter);
1301
1302	/* Prepare transmit descriptors and buffers */
1303	if (em_setup_transmit_structures(adapter)) {
1304		device_printf(dev, "Could not setup transmit structures\n");
1305		em_stop(adapter);
1306		return;
1307	}
1308	em_initialize_transmit_unit(adapter);
1309
1310	/* Setup Multicast table */
1311	em_set_multi(adapter);
1312
1313	/* Prepare receive descriptors and buffers */
1314	if (em_setup_receive_structures(adapter)) {
1315		device_printf(dev, "Could not setup receive structures\n");
1316		em_stop(adapter);
1317		return;
1318	}
1319	em_initialize_receive_unit(adapter);
1320
1321	/* Don't lose promiscuous settings */
1322	em_set_promisc(adapter);
1323
1324	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1325	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1326
1327	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1328	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1329
1330#ifdef DEVICE_POLLING
1331	/*
1332	 * Only enable interrupts if we are not polling, make sure
1333	 * they are off otherwise.
1334	 */
1335	if (ifp->if_capenable & IFCAP_POLLING)
1336		em_disable_intr(adapter);
1337	else
1338#endif /* DEVICE_POLLING */
1339		em_enable_intr(adapter);
1340
1341	/* Don't reset the phy next time init gets called */
1342	adapter->hw.phy.reset_disable = TRUE;
1343}
1344
1345static void
1346em_init(void *arg)
1347{
1348	struct adapter *adapter = arg;
1349
1350	EM_LOCK(adapter);
1351	em_init_locked(adapter);
1352	EM_UNLOCK(adapter);
1353}
1354
1355
1356#ifdef DEVICE_POLLING
1357/*********************************************************************
1358 *
1359 *  Legacy polling routine
1360 *
1361 *********************************************************************/
1362static void
1363em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1364{
1365	struct adapter *adapter = ifp->if_softc;
1366	uint32_t reg_icr;
1367
1368	EM_LOCK(adapter);
1369	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1370		EM_UNLOCK(adapter);
1371		return;
1372	}
1373
1374	if (cmd == POLL_AND_CHECK_STATUS) {
1375		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1376		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1377			callout_stop(&adapter->timer);
1378			adapter->hw.mac.get_link_status = 1;
1379			e1000_check_for_link(&adapter->hw);
1380			em_update_link_status(adapter);
1381			callout_reset(&adapter->timer, hz,
1382			    em_local_timer, adapter);
1383		}
1384	}
1385	em_rxeof(adapter, count);
1386	em_txeof(adapter);
1387
1388	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1389		em_start_locked(ifp);
1390	EM_UNLOCK(adapter);
1391}
1392
1393/*********************************************************************
1394 *
1395 *  Legacy Interrupt Service routine
1396 *
1397 *********************************************************************/
1398
1399static void
1400em_intr(void *arg)
1401{
1402	struct adapter	*adapter = arg;
1403	struct ifnet	*ifp;
1404	uint32_t	reg_icr;
1405
1406	EM_LOCK(adapter);
1407	ifp = adapter->ifp;
1408
1409	if (ifp->if_capenable & IFCAP_POLLING) {
1410		EM_UNLOCK(adapter);
1411		return;
1412	}
1413
1414	for (;;) {
1415		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1416
1417		if (adapter->hw.mac.type >= e1000_82571 &&
1418	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1419			break;
1420		else if (reg_icr == 0)
1421			break;
1422
1423		/*
1424		 * XXX: some laptops trigger several spurious interrupts
1425		 * on em(4) when in the resume cycle. The ICR register
1426		 * reports all-ones value in this case. Processing such
1427		 * interrupts would lead to a freeze. I don't know why.
1428		 */
1429		if (reg_icr == 0xffffffff)
1430			break;
1431
1432		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1433			em_rxeof(adapter, -1);
1434			em_txeof(adapter);
1435		}
1436
1437		/* Link status change */
1438		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1439			callout_stop(&adapter->timer);
1440			adapter->hw.mac.get_link_status = 1;
1441			e1000_check_for_link(&adapter->hw);
1442			em_update_link_status(adapter);
1443			callout_reset(&adapter->timer, hz,
1444			    em_local_timer, adapter);
1445		}
1446
1447		if (reg_icr & E1000_ICR_RXO)
1448			adapter->rx_overruns++;
1449	}
1450
1451	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1452	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1453		em_start_locked(ifp);
1454	EM_UNLOCK(adapter);
1455}
1456
1457#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1458
1459static void
1460em_handle_link(void *context, int pending)
1461{
1462	struct adapter	*adapter = context;
1463	struct ifnet *ifp;
1464
1465	ifp = adapter->ifp;
1466
1467	EM_LOCK(adapter);
1468	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1469		EM_UNLOCK(adapter);
1470		return;
1471	}
1472
1473	callout_stop(&adapter->timer);
1474	adapter->hw.mac.get_link_status = 1;
1475	e1000_check_for_link(&adapter->hw);
1476	em_update_link_status(adapter);
1477	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1478	EM_UNLOCK(adapter);
1479}
1480
1481static void
1482em_handle_rxtx(void *context, int pending)
1483{
1484	struct adapter	*adapter = context;
1485	struct ifnet	*ifp;
1486
1487	NET_LOCK_GIANT();
1488	ifp = adapter->ifp;
1489
1490	/*
1491	 * TODO:
1492	 * It should be possible to run the tx clean loop without the lock.
1493	 */
1494	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1495		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1496			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1497		EM_LOCK(adapter);
1498		em_txeof(adapter);
1499
1500		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1501			em_start_locked(ifp);
1502		EM_UNLOCK(adapter);
1503	}
1504
1505	em_enable_intr(adapter);
1506	NET_UNLOCK_GIANT();
1507}
1508
1509/*********************************************************************
1510 *
1511 *  Fast Interrupt Service routine
1512 *
1513 *********************************************************************/
1514static int
1515em_intr_fast(void *arg)
1516{
1517	struct adapter	*adapter = arg;
1518	struct ifnet	*ifp;
1519	uint32_t	reg_icr;
1520
1521	ifp = adapter->ifp;
1522
1523	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1524
1525	/* Hot eject?  */
1526	if (reg_icr == 0xffffffff)
1527		return (FILTER_STRAY);
1528
1529	/* Definitely not our interrupt.  */
1530	if (reg_icr == 0x0)
1531		return (FILTER_STRAY);
1532
1533	/*
1534	 * Starting with the 82571 chip, bit 31 should be used to
1535	 * determine whether the interrupt belongs to us.
1536	 */
1537	if (adapter->hw.mac.type >= e1000_82571 &&
1538	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1539		return (FILTER_STRAY);
1540
1541	/*
1542	 * Mask interrupts until the taskqueue is finished running.  This is
1543	 * cheap, just assume that it is needed.  This also works around the
1544	 * MSI message reordering errata on certain systems.
1545	 */
1546	em_disable_intr(adapter);
1547	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1548
1549	/* Link status change */
1550	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1551		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1552
1553	if (reg_icr & E1000_ICR_RXO)
1554		adapter->rx_overruns++;
1555	return (FILTER_HANDLED);
1556}
1557#endif /* ! DEVICE_POLLING */
1558
1559/*********************************************************************
1560 *
1561 *  Media Ioctl callback
1562 *
1563 *  This routine is called whenever the user queries the status of
1564 *  the interface using ifconfig.
1565 *
1566 **********************************************************************/
1567static void
1568em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1569{
1570	struct adapter *adapter = ifp->if_softc;
1571	u_char fiber_type = IFM_1000_SX;
1572
1573	INIT_DEBUGOUT("em_media_status: begin");
1574
1575	EM_LOCK(adapter);
1576	e1000_check_for_link(&adapter->hw);
1577	em_update_link_status(adapter);
1578
1579	ifmr->ifm_status = IFM_AVALID;
1580	ifmr->ifm_active = IFM_ETHER;
1581
1582	if (!adapter->link_active) {
1583		EM_UNLOCK(adapter);
1584		return;
1585	}
1586
1587	ifmr->ifm_status |= IFM_ACTIVE;
1588
1589	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1590	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1591		if (adapter->hw.mac.type == e1000_82545)
1592			fiber_type = IFM_1000_LX;
1593		ifmr->ifm_active |= fiber_type | IFM_FDX;
1594	} else {
1595		switch (adapter->link_speed) {
1596		case 10:
1597			ifmr->ifm_active |= IFM_10_T;
1598			break;
1599		case 100:
1600			ifmr->ifm_active |= IFM_100_TX;
1601			break;
1602		case 1000:
1603			ifmr->ifm_active |= IFM_1000_T;
1604			break;
1605		}
1606		if (adapter->link_duplex == FULL_DUPLEX)
1607			ifmr->ifm_active |= IFM_FDX;
1608		else
1609			ifmr->ifm_active |= IFM_HDX;
1610	}
1611	EM_UNLOCK(adapter);
1612}
1613
1614/*********************************************************************
1615 *
1616 *  Media Ioctl callback
1617 *
1618 *  This routine is called when the user changes speed/duplex using
1619 *  media/mediopt option with ifconfig.
1620 *
1621 **********************************************************************/
1622static int
1623em_media_change(struct ifnet *ifp)
1624{
1625	struct adapter *adapter = ifp->if_softc;
1626	struct ifmedia  *ifm = &adapter->media;
1627
1628	INIT_DEBUGOUT("em_media_change: begin");
1629
1630	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1631		return (EINVAL);
1632
1633	EM_LOCK(adapter);
1634	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1635	case IFM_AUTO:
1636		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1637		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1638		break;
1639	case IFM_1000_LX:
1640	case IFM_1000_SX:
1641	case IFM_1000_T:
1642		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1643		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1644		break;
1645	case IFM_100_TX:
1646		adapter->hw.mac.autoneg = FALSE;
1647		adapter->hw.phy.autoneg_advertised = 0;
1648		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1650		else
1651			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1652		break;
1653	case IFM_10_T:
1654		adapter->hw.mac.autoneg = FALSE;
1655		adapter->hw.phy.autoneg_advertised = 0;
1656		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1657			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1658		else
1659			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1660		break;
1661	default:
1662		device_printf(adapter->dev, "Unsupported media type\n");
1663	}
1664
1665	/* As the speed/duplex settings my have changed we need to
1666	 * reset the PHY.
1667	 */
1668	adapter->hw.phy.reset_disable = FALSE;
1669
1670	em_init_locked(adapter);
1671	EM_UNLOCK(adapter);
1672
1673	return (0);
1674}
1675
1676/*********************************************************************
1677 *
1678 *  This routine maps the mbufs to tx descriptors.
1679 *
1680 *  return 0 on success, positive on failure
1681 **********************************************************************/
1682
1683static int
1684em_encap(struct adapter *adapter, struct mbuf **m_headp)
1685{
1686	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1687	bus_dmamap_t		map;
1688	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1689	struct e1000_tx_desc	*ctxd = NULL;
1690	struct mbuf		*m_head;
1691	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1692	int			nsegs, i, j, first, last = 0;
1693	int			error, do_tso, tso_desc = 0;
1694
1695	m_head = *m_headp;
1696	txd_upper = txd_lower = txd_used = txd_saved = 0;
1697
1698	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1699
1700        /*
1701         * Force a cleanup if number of TX descriptors
1702         * available hits the threshold
1703         */
1704	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1705		em_txeof(adapter);
1706		/* Now do we at least have a minimal? */
1707		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1708			adapter->no_tx_desc_avail1++;
1709			return (ENOBUFS);
1710		}
1711	}
1712
1713
1714	/*
1715	 * TSO workaround:
1716	 *  If an mbuf is only header we need
1717	 *     to pull 4 bytes of data into it.
1718	 */
1719	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1720		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1721		*m_headp = m_head;
1722		if (m_head == NULL)
1723			return (ENOBUFS);
1724	}
1725
1726	/*
1727	 * Map the packet for DMA
1728	 *
1729	 * Capture the first descriptor index,
1730	 * this descriptor will have the index
1731	 * of the EOP which is the only one that
1732	 * now gets a DONE bit writeback.
1733	 */
1734	first = adapter->next_avail_tx_desc;
1735	tx_buffer = &adapter->tx_buffer_area[first];
1736	tx_buffer_mapped = tx_buffer;
1737	map = tx_buffer->map;
1738
1739	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1740	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1741
1742	/*
1743	 * There are two types of errors we can (try) to handle:
1744	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1745	 *   out of segments.  Defragment the mbuf chain and try again.
1746	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1747	 *   at this point in time.  Defer sending and try again later.
1748	 * All other errors, in particular EINVAL, are fatal and prevent the
1749	 * mbuf chain from ever going through.  Drop it and report error.
1750	 */
1751	if (error == EFBIG) {
1752		struct mbuf *m;
1753
1754		m = m_defrag(*m_headp, M_DONTWAIT);
1755		if (m == NULL) {
1756			adapter->mbuf_alloc_failed++;
1757			m_freem(*m_headp);
1758			*m_headp = NULL;
1759			return (ENOBUFS);
1760		}
1761		*m_headp = m;
1762
1763		/* Try it again */
1764		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1765		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1766
1767		if (error == ENOMEM) {
1768			adapter->no_tx_dma_setup++;
1769			return (error);
1770		} else if (error != 0) {
1771			adapter->no_tx_dma_setup++;
1772			m_freem(*m_headp);
1773			*m_headp = NULL;
1774			return (error);
1775		}
1776	} else if (error == ENOMEM) {
1777		adapter->no_tx_dma_setup++;
1778		return (error);
1779	} else if (error != 0) {
1780		adapter->no_tx_dma_setup++;
1781		m_freem(*m_headp);
1782		*m_headp = NULL;
1783		return (error);
1784	}
1785
1786	/*
1787	 * TSO Hardware workaround, if this packet is not
1788	 * TSO, and is only a single descriptor long, and
1789	 * it follows a TSO burst, then we need to add a
1790	 * sentinel descriptor to prevent premature writeback.
1791	 */
1792	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1793		if (nsegs == 1)
1794			tso_desc = TRUE;
1795		adapter->tx_tso = FALSE;
1796	}
1797
1798        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1799                adapter->no_tx_desc_avail2++;
1800		bus_dmamap_unload(adapter->txtag, map);
1801		return (ENOBUFS);
1802        }
1803	m_head = *m_headp;
1804
1805	/* Do hardware assists */
1806	if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower))
1807		/* we need to make a final sentinel transmit desc */
1808		tso_desc = TRUE;
1809	else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1810		em_transmit_checksum_setup(adapter,  m_head,
1811		    &txd_upper, &txd_lower);
1812
1813	i = adapter->next_avail_tx_desc;
1814	if (adapter->pcix_82544)
1815		txd_saved = i;
1816
1817	/* Set up our transmit descriptors */
1818	for (j = 0; j < nsegs; j++) {
1819		bus_size_t seg_len;
1820		bus_addr_t seg_addr;
1821		/* If adapter is 82544 and on PCIX bus */
1822		if(adapter->pcix_82544) {
1823			DESC_ARRAY	desc_array;
1824			uint32_t	array_elements, counter;
1825			/*
1826			 * Check the Address and Length combination and
1827			 * split the data accordingly
1828			 */
1829			array_elements = em_fill_descriptors(segs[j].ds_addr,
1830			    segs[j].ds_len, &desc_array);
1831			for (counter = 0; counter < array_elements; counter++) {
1832				if (txd_used == adapter->num_tx_desc_avail) {
1833					adapter->next_avail_tx_desc = txd_saved;
1834					adapter->no_tx_desc_avail2++;
1835					bus_dmamap_unload(adapter->txtag, map);
1836					return (ENOBUFS);
1837				}
1838				tx_buffer = &adapter->tx_buffer_area[i];
1839				ctxd = &adapter->tx_desc_base[i];
1840				ctxd->buffer_addr = htole64(
1841				    desc_array.descriptor[counter].address);
1842				ctxd->lower.data = htole32(
1843				    (adapter->txd_cmd | txd_lower | (uint16_t)
1844				    desc_array.descriptor[counter].length));
1845				ctxd->upper.data =
1846				    htole32((txd_upper));
1847				last = i;
1848				if (++i == adapter->num_tx_desc)
1849                                         i = 0;
1850				tx_buffer->m_head = NULL;
1851				tx_buffer->next_eop = -1;
1852				txd_used++;
1853                        }
1854		} else {
1855			tx_buffer = &adapter->tx_buffer_area[i];
1856			ctxd = &adapter->tx_desc_base[i];
1857			seg_addr = segs[j].ds_addr;
1858			seg_len  = segs[j].ds_len;
1859			/*
1860			** TSO Workaround:
1861			** If this is the last descriptor, we want to
1862			** split it so we have a small final sentinel
1863			*/
1864			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1865				seg_len -= 4;
1866				ctxd->buffer_addr = htole64(seg_addr);
1867				ctxd->lower.data = htole32(
1868				adapter->txd_cmd | txd_lower | seg_len);
1869				ctxd->upper.data =
1870				    htole32(txd_upper);
1871				if (++i == adapter->num_tx_desc)
1872					i = 0;
1873				/* Now make the sentinel */
1874				++txd_used; /* using an extra txd */
1875				ctxd = &adapter->tx_desc_base[i];
1876				tx_buffer = &adapter->tx_buffer_area[i];
1877				ctxd->buffer_addr =
1878				    htole64(seg_addr + seg_len);
1879				ctxd->lower.data = htole32(
1880				adapter->txd_cmd | txd_lower | 4);
1881				ctxd->upper.data =
1882				    htole32(txd_upper);
1883				last = i;
1884				if (++i == adapter->num_tx_desc)
1885					i = 0;
1886			} else {
1887				ctxd->buffer_addr = seg_addr;
1888				ctxd->lower.data = htole32(
1889				adapter->txd_cmd | txd_lower | seg_len);
1890				ctxd->upper.data =
1891				    htole32(txd_upper);
1892				last = i;
1893				if (++i == adapter->num_tx_desc)
1894					i = 0;
1895			}
1896			tx_buffer->m_head = NULL;
1897			tx_buffer->next_eop = -1;
1898		}
1899	}
1900
1901	adapter->next_avail_tx_desc = i;
1902	if (adapter->pcix_82544)
1903		adapter->num_tx_desc_avail -= txd_used;
1904	else {
1905		adapter->num_tx_desc_avail -= nsegs;
1906		if (tso_desc) /* TSO used an extra for sentinel */
1907			adapter->num_tx_desc_avail -= txd_used;
1908	}
1909
1910	if (m_head->m_flags & M_VLANTAG) {
1911		/* Set the vlan id. */
1912		ctxd->upper.fields.special =
1913		    htole16(m_head->m_pkthdr.ether_vtag);
1914                /* Tell hardware to add tag */
1915                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1916        }
1917
1918        tx_buffer->m_head = m_head;
1919	tx_buffer_mapped->map = tx_buffer->map;
1920	tx_buffer->map = map;
1921        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1922
1923        /*
1924         * Last Descriptor of Packet
1925	 * needs End Of Packet (EOP)
1926	 * and Report Status (RS)
1927         */
1928        ctxd->lower.data |=
1929	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1930	/*
1931	 * Keep track in the first buffer which
1932	 * descriptor will be written back
1933	 */
1934	tx_buffer = &adapter->tx_buffer_area[first];
1935	tx_buffer->next_eop = last;
1936
1937	/*
1938	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1939	 * that this frame is available to transmit.
1940	 */
1941	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1942	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1943	if (adapter->hw.mac.type == e1000_82547 &&
1944	    adapter->link_duplex == HALF_DUPLEX)
1945		em_82547_move_tail(adapter);
1946	else {
1947		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1948		if (adapter->hw.mac.type == e1000_82547)
1949			em_82547_update_fifo_head(adapter,
1950			    m_head->m_pkthdr.len);
1951	}
1952
1953	return (0);
1954}
1955
1956/*********************************************************************
1957 *
1958 *  This routine maps the mbufs to Advanced TX descriptors.
1959 *  used by the 82575 adapter. It also needs no workarounds.
1960 *
1961 **********************************************************************/
1962
1963static int
1964em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1965{
1966	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1967	bus_dmamap_t		map;
1968	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1969	union e1000_adv_tx_desc	*txd = NULL;
1970	struct mbuf		*m_head;
1971	u32			olinfo_status = 0, cmd_type_len = 0;
1972	u32			paylen = 0;
1973	int			nsegs, i, j, error, first, last = 0;
1974
1975	m_head = *m_headp;
1976
1977
1978	/* Set basic descriptor constants */
1979	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1980	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1981
1982        /*
1983         * Force a cleanup if number of TX descriptors
1984         * available hits the threshold
1985         */
1986	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1987		em_txeof(adapter);
1988		/* Now do we at least have a minimal? */
1989		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1990			adapter->no_tx_desc_avail1++;
1991			return (ENOBUFS);
1992		}
1993	}
1994
1995	/*
1996         * Map the packet for DMA.
1997	 *
1998	 * Capture the first descriptor index,
1999	 * this descriptor will have the index
2000	 * of the EOP which is the only one that
2001	 * now gets a DONE bit writeback.
2002	 */
2003	first = adapter->next_avail_tx_desc;
2004	tx_buffer = &adapter->tx_buffer_area[first];
2005	tx_buffer_mapped = tx_buffer;
2006	map = tx_buffer->map;
2007
2008	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2009	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2010
2011	if (error == EFBIG) {
2012		struct mbuf *m;
2013
2014		m = m_defrag(*m_headp, M_DONTWAIT);
2015		if (m == NULL) {
2016			adapter->mbuf_alloc_failed++;
2017			m_freem(*m_headp);
2018			*m_headp = NULL;
2019			return (ENOBUFS);
2020		}
2021		*m_headp = m;
2022
2023		/* Try it again */
2024		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2025		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2026
2027		if (error == ENOMEM) {
2028			adapter->no_tx_dma_setup++;
2029			return (error);
2030		} else if (error != 0) {
2031			adapter->no_tx_dma_setup++;
2032			m_freem(*m_headp);
2033			*m_headp = NULL;
2034			return (error);
2035		}
2036	} else if (error == ENOMEM) {
2037		adapter->no_tx_dma_setup++;
2038		return (error);
2039	} else if (error != 0) {
2040		adapter->no_tx_dma_setup++;
2041		m_freem(*m_headp);
2042		*m_headp = NULL;
2043		return (error);
2044	}
2045
2046	/* Check again to be sure we have enough descriptors */
2047        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2048                adapter->no_tx_desc_avail2++;
2049		bus_dmamap_unload(adapter->txtag, map);
2050		return (ENOBUFS);
2051        }
2052	m_head = *m_headp;
2053
2054        /*
2055         * Set up the context descriptor:
2056         * used when any hardware offload is done.
2057	 * This includes CSUM, VLAN, and TSO. It
2058	 * will use the first descriptor.
2059         */
2060	/* First try TSO */
2061	if (em_tso_adv_setup(adapter, m_head, &paylen)) {
2062		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2063		olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2064		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2065		olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2066	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
2067		if (em_tx_adv_ctx_setup(adapter, m_head))
2068			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2069	}
2070
2071	/* Set up our transmit descriptors */
2072	i = adapter->next_avail_tx_desc;
2073	for (j = 0; j < nsegs; j++) {
2074		bus_size_t seg_len;
2075		bus_addr_t seg_addr;
2076
2077		tx_buffer = &adapter->tx_buffer_area[i];
2078		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2079		seg_addr = segs[j].ds_addr;
2080		seg_len  = segs[j].ds_len;
2081
2082		txd->read.buffer_addr = htole64(seg_addr);
2083		txd->read.cmd_type_len = htole32(
2084		    adapter->txd_cmd | cmd_type_len | seg_len);
2085		txd->read.olinfo_status = htole32(olinfo_status);
2086		last = i;
2087		if (++i == adapter->num_tx_desc)
2088			i = 0;
2089		tx_buffer->m_head = NULL;
2090		tx_buffer->next_eop = -1;
2091	}
2092
2093	adapter->next_avail_tx_desc = i;
2094	adapter->num_tx_desc_avail -= nsegs;
2095
2096        tx_buffer->m_head = m_head;
2097	tx_buffer_mapped->map = tx_buffer->map;
2098	tx_buffer->map = map;
2099        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2100
2101        /*
2102         * Last Descriptor of Packet
2103	 * needs End Of Packet (EOP)
2104	 * and Report Status (RS)
2105         */
2106        txd->read.cmd_type_len |=
2107	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2108	/*
2109	 * Keep track in the first buffer which
2110	 * descriptor will be written back
2111	 */
2112	tx_buffer = &adapter->tx_buffer_area[first];
2113	tx_buffer->next_eop = last;
2114
2115	/*
2116	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2117	 * that this frame is available to transmit.
2118	 */
2119	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2120	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2121	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2122
2123	return (0);
2124
2125}
2126
2127/*********************************************************************
2128 *
2129 * 82547 workaround to avoid controller hang in half-duplex environment.
2130 * The workaround is to avoid queuing a large packet that would span
2131 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2132 * in this case. We do that only when FIFO is quiescent.
2133 *
2134 **********************************************************************/
2135static void
2136em_82547_move_tail(void *arg)
2137{
2138	struct adapter *adapter = arg;
2139	uint16_t hw_tdt;
2140	uint16_t sw_tdt;
2141	struct e1000_tx_desc *tx_desc;
2142	uint16_t length = 0;
2143	boolean_t eop = 0;
2144
2145	EM_LOCK_ASSERT(adapter);
2146
2147	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2148	sw_tdt = adapter->next_avail_tx_desc;
2149
2150	while (hw_tdt != sw_tdt) {
2151		tx_desc = &adapter->tx_desc_base[hw_tdt];
2152		length += tx_desc->lower.flags.length;
2153		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2154		if (++hw_tdt == adapter->num_tx_desc)
2155			hw_tdt = 0;
2156
2157		if (eop) {
2158			if (em_82547_fifo_workaround(adapter, length)) {
2159				adapter->tx_fifo_wrk_cnt++;
2160				callout_reset(&adapter->tx_fifo_timer, 1,
2161					em_82547_move_tail, adapter);
2162				break;
2163			}
2164			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2165			em_82547_update_fifo_head(adapter, length);
2166			length = 0;
2167		}
2168	}
2169}
2170
2171static int
2172em_82547_fifo_workaround(struct adapter *adapter, int len)
2173{
2174	int fifo_space, fifo_pkt_len;
2175
2176	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2177
2178	if (adapter->link_duplex == HALF_DUPLEX) {
2179		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2180
2181		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2182			if (em_82547_tx_fifo_reset(adapter))
2183				return (0);
2184			else
2185				return (1);
2186		}
2187	}
2188
2189	return (0);
2190}
2191
2192static void
2193em_82547_update_fifo_head(struct adapter *adapter, int len)
2194{
2195	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2196
2197	/* tx_fifo_head is always 16 byte aligned */
2198	adapter->tx_fifo_head += fifo_pkt_len;
2199	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2200		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2201	}
2202}
2203
2204
2205static int
2206em_82547_tx_fifo_reset(struct adapter *adapter)
2207{
2208	uint32_t tctl;
2209
2210	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2211	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2212	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2213	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2214	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2215	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2216	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2217		/* Disable TX unit */
2218		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2219		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2220		    tctl & ~E1000_TCTL_EN);
2221
2222		/* Reset FIFO pointers */
2223		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2224		    adapter->tx_head_addr);
2225		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2226		    adapter->tx_head_addr);
2227		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2228		    adapter->tx_head_addr);
2229		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2230		    adapter->tx_head_addr);
2231
2232		/* Re-enable TX unit */
2233		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2234		E1000_WRITE_FLUSH(&adapter->hw);
2235
2236		adapter->tx_fifo_head = 0;
2237		adapter->tx_fifo_reset_cnt++;
2238
2239		return (TRUE);
2240	}
2241	else {
2242		return (FALSE);
2243	}
2244}
2245
2246static void
2247em_set_promisc(struct adapter *adapter)
2248{
2249	struct ifnet	*ifp = adapter->ifp;
2250	uint32_t	reg_rctl;
2251
2252	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253
2254	if (ifp->if_flags & IFF_PROMISC) {
2255		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2256		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2257	} else if (ifp->if_flags & IFF_ALLMULTI) {
2258		reg_rctl |= E1000_RCTL_MPE;
2259		reg_rctl &= ~E1000_RCTL_UPE;
2260		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2261	}
2262}
2263
2264static void
2265em_disable_promisc(struct adapter *adapter)
2266{
2267	uint32_t	reg_rctl;
2268
2269	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2270
2271	reg_rctl &=  (~E1000_RCTL_UPE);
2272	reg_rctl &=  (~E1000_RCTL_MPE);
2273	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2274}
2275
2276
2277/*********************************************************************
2278 *  Multicast Update
2279 *
2280 *  This routine is called whenever multicast address list is updated.
2281 *
2282 **********************************************************************/
2283
2284static void
2285em_set_multi(struct adapter *adapter)
2286{
2287	struct ifnet	*ifp = adapter->ifp;
2288	struct ifmultiaddr *ifma;
2289	uint32_t reg_rctl = 0;
2290	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2291	int mcnt = 0;
2292
2293	IOCTL_DEBUGOUT("em_set_multi: begin");
2294
2295	if (adapter->hw.mac.type == e1000_82542 &&
2296	    adapter->hw.revision_id == E1000_REVISION_2) {
2297		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2298		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2299			e1000_pci_clear_mwi(&adapter->hw);
2300		reg_rctl |= E1000_RCTL_RST;
2301		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2302		msec_delay(5);
2303	}
2304
2305	IF_ADDR_LOCK(ifp);
2306	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2307		if (ifma->ifma_addr->sa_family != AF_LINK)
2308			continue;
2309
2310		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2311			break;
2312
2313		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2314		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2315		mcnt++;
2316	}
2317	IF_ADDR_UNLOCK(ifp);
2318
2319	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2320		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2321		reg_rctl |= E1000_RCTL_MPE;
2322		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2323	} else
2324		e1000_mc_addr_list_update(&adapter->hw, mta,
2325		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2326
2327	if (adapter->hw.mac.type == e1000_82542 &&
2328	    adapter->hw.revision_id == E1000_REVISION_2) {
2329		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2330		reg_rctl &= ~E1000_RCTL_RST;
2331		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2332		msec_delay(5);
2333		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2334			e1000_pci_set_mwi(&adapter->hw);
2335	}
2336}
2337
2338
2339/*********************************************************************
2340 *  Timer routine
2341 *
2342 *  This routine checks for link status and updates statistics.
2343 *
2344 **********************************************************************/
2345
2346static void
2347em_local_timer(void *arg)
2348{
2349	struct adapter	*adapter = arg;
2350	struct ifnet	*ifp = adapter->ifp;
2351
2352	EM_LOCK_ASSERT(adapter);
2353
2354	e1000_check_for_link(&adapter->hw);
2355	em_update_link_status(adapter);
2356	em_update_stats_counters(adapter);
2357	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2358		em_print_hw_stats(adapter);
2359	em_smartspeed(adapter);
2360	/*
2361	 * Each second we check the watchdog to
2362	 * protect against hardware hangs.
2363	 */
2364	em_watchdog(adapter);
2365
2366	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2367
2368}
2369
2370static void
2371em_update_link_status(struct adapter *adapter)
2372{
2373	struct ifnet *ifp = adapter->ifp;
2374	device_t dev = adapter->dev;
2375
2376	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2377	    E1000_STATUS_LU) {
2378		if (adapter->link_active == 0) {
2379			e1000_get_speed_and_duplex(&adapter->hw,
2380			    &adapter->link_speed, &adapter->link_duplex);
2381			/* Check if we must disable SPEED_MODE bit on PCI-E */
2382			if ((adapter->link_speed != SPEED_1000) &&
2383			    ((adapter->hw.mac.type == e1000_82571) ||
2384			    (adapter->hw.mac.type == e1000_82572))) {
2385				int tarc0;
2386
2387				tarc0 = E1000_READ_REG(&adapter->hw,
2388				    E1000_TARC0);
2389				tarc0 &= ~SPEED_MODE_BIT;
2390				E1000_WRITE_REG(&adapter->hw,
2391				    E1000_TARC0, tarc0);
2392			}
2393			if (bootverbose)
2394				device_printf(dev, "Link is up %d Mbps %s\n",
2395				    adapter->link_speed,
2396				    ((adapter->link_duplex == FULL_DUPLEX) ?
2397				    "Full Duplex" : "Half Duplex"));
2398			adapter->link_active = 1;
2399			adapter->smartspeed = 0;
2400			ifp->if_baudrate = adapter->link_speed * 1000000;
2401			if_link_state_change(ifp, LINK_STATE_UP);
2402		}
2403	} else {
2404		if (adapter->link_active == 1) {
2405			ifp->if_baudrate = adapter->link_speed = 0;
2406			adapter->link_duplex = 0;
2407			if (bootverbose)
2408				device_printf(dev, "Link is Down\n");
2409			adapter->link_active = 0;
2410			if_link_state_change(ifp, LINK_STATE_DOWN);
2411		}
2412	}
2413}
2414
2415/*********************************************************************
2416 *
2417 *  This routine disables all traffic on the adapter by issuing a
2418 *  global reset on the MAC and deallocates TX/RX buffers.
2419 *
2420 **********************************************************************/
2421
2422static void
2423em_stop(void *arg)
2424{
2425	struct adapter	*adapter = arg;
2426	struct ifnet	*ifp = adapter->ifp;
2427
2428	EM_LOCK_ASSERT(adapter);
2429
2430	INIT_DEBUGOUT("em_stop: begin");
2431
2432	em_disable_intr(adapter);
2433	callout_stop(&adapter->timer);
2434	callout_stop(&adapter->tx_fifo_timer);
2435	em_free_transmit_structures(adapter);
2436	em_free_receive_structures(adapter);
2437
2438	/* Tell the stack that the interface is no longer active */
2439	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2440
2441	e1000_reset_hw(&adapter->hw);
2442	if (adapter->hw.mac.type >= e1000_82544)
2443		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2444}
2445
2446
2447/*********************************************************************
2448 *
2449 *  Determine hardware revision.
2450 *
2451 **********************************************************************/
2452static void
2453em_identify_hardware(struct adapter *adapter)
2454{
2455	device_t dev = adapter->dev;
2456
2457	/* Make sure our PCI config space has the necessary stuff set */
2458	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2459	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2460	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2461		device_printf(dev, "Memory Access and/or Bus Master bits "
2462		    "were not set!\n");
2463		adapter->hw.bus.pci_cmd_word |=
2464		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2465		pci_write_config(dev, PCIR_COMMAND,
2466		    adapter->hw.bus.pci_cmd_word, 2);
2467	}
2468
2469	/* Save off the information about this board */
2470	adapter->hw.vendor_id = pci_get_vendor(dev);
2471	adapter->hw.device_id = pci_get_device(dev);
2472	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2473	adapter->hw.subsystem_vendor_id =
2474	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2475	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2476
2477	/* Do Shared Code Init and Setup */
2478	if (e1000_set_mac_type(&adapter->hw)) {
2479		device_printf(dev, "Setup init failure\n");
2480		return;
2481	}
2482}
2483
2484static int
2485em_allocate_pci_resources(struct adapter *adapter)
2486{
2487	device_t	dev = adapter->dev;
2488	int		val, rid;
2489
2490	rid = PCIR_BAR(0);
2491	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2492	    &rid, RF_ACTIVE);
2493	if (adapter->res_memory == NULL) {
2494		device_printf(dev, "Unable to allocate bus resource: memory\n");
2495		return (ENXIO);
2496	}
2497	adapter->osdep.mem_bus_space_tag =
2498	    rman_get_bustag(adapter->res_memory);
2499	adapter->osdep.mem_bus_space_handle =
2500	    rman_get_bushandle(adapter->res_memory);
2501	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2502
2503	/* Only older adapters use IO mapping */
2504	if ((adapter->hw.mac.type > e1000_82542) &&
2505	    (adapter->hw.mac.type < e1000_82571)) {
2506		/* Figure our where our IO BAR is ? */
2507		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2508			val = pci_read_config(dev, rid, 4);
2509			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2510				adapter->io_rid = rid;
2511				break;
2512			}
2513			rid += 4;
2514			/* check for 64bit BAR */
2515			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2516				rid += 4;
2517		}
2518		if (rid >= PCIR_CIS) {
2519			device_printf(dev, "Unable to locate IO BAR\n");
2520			return (ENXIO);
2521		}
2522		adapter->res_ioport = bus_alloc_resource_any(dev,
2523		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2524		if (adapter->res_ioport == NULL) {
2525			device_printf(dev, "Unable to allocate bus resource: "
2526			    "ioport\n");
2527			return (ENXIO);
2528		}
2529		adapter->hw.io_base = 0;
2530		adapter->osdep.io_bus_space_tag =
2531		    rman_get_bustag(adapter->res_ioport);
2532		adapter->osdep.io_bus_space_handle =
2533		    rman_get_bushandle(adapter->res_ioport);
2534	}
2535
2536	/*
2537	 * Setup MSI/X or MSI if PCI Express
2538	 * only the latest can use MSI/X and
2539	 * real support for it is forthcoming
2540	 */
2541	adapter->msi = 0; /* Set defaults */
2542	rid = 0x0;
2543	if (adapter->hw.mac.type >= e1000_82575) {
2544		/*
2545		 * Setup MSI/X
2546		 */
2547		rid = PCIR_BAR(EM_MSIX_BAR);
2548		adapter->msix_mem = bus_alloc_resource_any(dev,
2549		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2550        	if (!adapter->msix_mem) {
2551                	device_printf(dev,"Unable to map MSIX table \n");
2552                        return (ENXIO);
2553        	}
2554		/*
2555		 * Eventually this may be used
2556		 * for Multiqueue, for now we will
2557		 * just use one vector.
2558		 *
2559        	 * val = pci_msix_count(dev);
2560		 */
2561		val = 1;
2562		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2563                	rid = 1;
2564                	adapter->msi = 1;
2565		}
2566	} else if (adapter->hw.mac.type > e1000_82571) {
2567        	val = pci_msi_count(dev);
2568        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2569                	rid = 1;
2570                	adapter->msi = 1;
2571        	}
2572	}
2573	adapter->res_interrupt = bus_alloc_resource_any(dev,
2574	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2575	if (adapter->res_interrupt == NULL) {
2576		device_printf(dev, "Unable to allocate bus resource: "
2577		    "interrupt\n");
2578		return (ENXIO);
2579	}
2580
2581	adapter->hw.back = &adapter->osdep;
2582
2583	return (0);
2584}
2585
2586/*********************************************************************
2587 *
2588 *  Setup the appropriate Interrupt handlers.
2589 *
2590 **********************************************************************/
2591int
2592em_allocate_intr(struct adapter *adapter)
2593{
2594	device_t dev = adapter->dev;
2595	int error;
2596
2597	/* Manually turn off all interrupts */
2598	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2599
2600#ifdef DEVICE_POLLING
2601	/* We do Legacy setup */
2602	if (adapter->int_handler_tag == NULL &&
2603	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2604	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2605	    &adapter->int_handler_tag)) != 0) {
2606		device_printf(dev, "Failed to register interrupt handler");
2607		return (error);
2608	}
2609
2610#else
2611	/*
2612	 * Try allocating a fast interrupt and the associated deferred
2613	 * processing contexts.
2614	 */
2615	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2616	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2617	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2618	    taskqueue_thread_enqueue, &adapter->tq);
2619	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2620	    device_get_nameunit(adapter->dev));
2621	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2622	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2623	    &adapter->int_handler_tag)) != 0) {
2624		device_printf(dev, "Failed to register fast interrupt "
2625			    "handler: %d\n", error);
2626		taskqueue_free(adapter->tq);
2627		adapter->tq = NULL;
2628		return (error);
2629	}
2630#endif
2631
2632	em_enable_intr(adapter);
2633	return (0);
2634}
2635
2636static void
2637em_free_intr(struct adapter *adapter)
2638{
2639	device_t dev = adapter->dev;
2640
2641	if (adapter->res_interrupt != NULL) {
2642		bus_teardown_intr(dev, adapter->res_interrupt,
2643			adapter->int_handler_tag);
2644		adapter->int_handler_tag = NULL;
2645	}
2646	if (adapter->tq != NULL) {
2647		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2648		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2649		taskqueue_free(adapter->tq);
2650		adapter->tq = NULL;
2651	}
2652}
2653
2654static void
2655em_free_pci_resources(struct adapter *adapter)
2656{
2657	device_t dev = adapter->dev;
2658
2659	if (adapter->res_interrupt != NULL)
2660		bus_release_resource(dev, SYS_RES_IRQ,
2661		    adapter->msi ? 1 : 0, adapter->res_interrupt);
2662
2663	if (adapter->msix_mem != NULL)
2664		bus_release_resource(dev, SYS_RES_MEMORY,
2665		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2666
2667	if (adapter->msi)
2668		pci_release_msi(dev);
2669
2670	if (adapter->res_memory != NULL)
2671		bus_release_resource(dev, SYS_RES_MEMORY,
2672		    PCIR_BAR(0), adapter->res_memory);
2673
2674	if (adapter->flash_mem != NULL)
2675		bus_release_resource(dev, SYS_RES_MEMORY,
2676		    EM_FLASH, adapter->flash_mem);
2677
2678	if (adapter->res_ioport != NULL)
2679		bus_release_resource(dev, SYS_RES_IOPORT,
2680		    adapter->io_rid, adapter->res_ioport);
2681}
2682
2683/*********************************************************************
2684 *
2685 *  Initialize the hardware to a configuration
2686 *  as specified by the adapter structure.
2687 *
2688 **********************************************************************/
2689static int
2690em_hardware_init(struct adapter *adapter)
2691{
2692	device_t dev = adapter->dev;
2693	uint16_t rx_buffer_size;
2694
2695	INIT_DEBUGOUT("em_hardware_init: begin");
2696
2697	/* Issue a global reset */
2698	e1000_reset_hw(&adapter->hw);
2699
2700	/* When hardware is reset, fifo_head is also reset */
2701	adapter->tx_fifo_head = 0;
2702
2703	/* Set up smart power down as default off on newer adapters. */
2704	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2705	    adapter->hw.mac.type == e1000_82572)) {
2706		uint16_t phy_tmp = 0;
2707
2708		/* Speed up time to link by disabling smart power down. */
2709		e1000_read_phy_reg(&adapter->hw,
2710		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2711		phy_tmp &= ~IGP02E1000_PM_SPD;
2712		e1000_write_phy_reg(&adapter->hw,
2713		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2714	}
2715
2716	/*
2717	 * These parameters control the automatic generation (Tx) and
2718	 * response (Rx) to Ethernet PAUSE frames.
2719	 * - High water mark should allow for at least two frames to be
2720	 *   received after sending an XOFF.
2721	 * - Low water mark works best when it is very near the high water mark.
2722	 *   This allows the receiver to restart by sending XON when it has
2723	 *   drained a bit. Here we use an arbitary value of 1500 which will
2724	 *   restart after one full frame is pulled from the buffer. There
2725	 *   could be several smaller frames in the buffer and if so they will
2726	 *   not trigger the XON until their total number reduces the buffer
2727	 *   by 1500.
2728	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2729	 */
2730	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2731	    0xffff) << 10 );
2732
2733	adapter->hw.mac.fc_high_water = rx_buffer_size -
2734	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2735	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2736	if (adapter->hw.mac.type == e1000_80003es2lan)
2737		adapter->hw.mac.fc_pause_time = 0xFFFF;
2738	else
2739		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2740	adapter->hw.mac.fc_send_xon = TRUE;
2741	adapter->hw.mac.fc = e1000_fc_full;
2742
2743	if (e1000_init_hw(&adapter->hw) < 0) {
2744		device_printf(dev, "Hardware Initialization Failed\n");
2745		return (EIO);
2746	}
2747
2748	e1000_check_for_link(&adapter->hw);
2749
2750	return (0);
2751}
2752
2753/*********************************************************************
2754 *
2755 *  Setup networking device structure and register an interface.
2756 *
2757 **********************************************************************/
2758static void
2759em_setup_interface(device_t dev, struct adapter *adapter)
2760{
2761	struct ifnet   *ifp;
2762
2763	INIT_DEBUGOUT("em_setup_interface: begin");
2764
2765	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2766	if (ifp == NULL)
2767		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2768	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2769	ifp->if_mtu = ETHERMTU;
2770	ifp->if_init =  em_init;
2771	ifp->if_softc = adapter;
2772	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2773	ifp->if_ioctl = em_ioctl;
2774	ifp->if_start = em_start;
2775	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2776	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2777	IFQ_SET_READY(&ifp->if_snd);
2778
2779	ether_ifattach(ifp, adapter->hw.mac.addr);
2780
2781	ifp->if_capabilities = ifp->if_capenable = 0;
2782
2783	if (adapter->hw.mac.type >= e1000_82543) {
2784		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2785		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2786	}
2787
2788	/* Identify TSO capable adapters */
2789	if ((adapter->hw.mac.type > e1000_82544) &&
2790	    (adapter->hw.mac.type != e1000_82547))
2791		ifp->if_capabilities |= IFCAP_TSO4;
2792	/*
2793	 * By default only enable on PCI-E, this
2794	 * can be overriden by ifconfig.
2795	 */
2796	if (adapter->hw.mac.type >= e1000_82571)
2797		ifp->if_capenable |= IFCAP_TSO4;
2798
2799	/*
2800	 * Tell the upper layer(s) we support long frames.
2801	 */
2802	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2803	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2804	ifp->if_capenable |= IFCAP_VLAN_MTU;
2805
2806#ifdef DEVICE_POLLING
2807	ifp->if_capabilities |= IFCAP_POLLING;
2808#endif
2809
2810	/*
2811	 * Specify the media types supported by this adapter and register
2812	 * callbacks to update media and link information
2813	 */
2814	ifmedia_init(&adapter->media, IFM_IMASK,
2815	    em_media_change, em_media_status);
2816	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2817	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2818		u_char fiber_type = IFM_1000_SX;	/* default type */
2819
2820		if (adapter->hw.mac.type == e1000_82545)
2821			fiber_type = IFM_1000_LX;
2822		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2823			    0, NULL);
2824		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2825	} else {
2826		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2827		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2828			    0, NULL);
2829		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2830			    0, NULL);
2831		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2832			    0, NULL);
2833		if (adapter->hw.phy.type != e1000_phy_ife) {
2834			ifmedia_add(&adapter->media,
2835				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2836			ifmedia_add(&adapter->media,
2837				IFM_ETHER | IFM_1000_T, 0, NULL);
2838		}
2839	}
2840	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2841	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2842}
2843
2844
2845/*********************************************************************
2846 *
2847 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2848 *
2849 **********************************************************************/
2850static void
2851em_smartspeed(struct adapter *adapter)
2852{
2853	uint16_t phy_tmp;
2854
2855	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2856	    adapter->hw.mac.autoneg == 0 ||
2857	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2858		return;
2859
2860	if (adapter->smartspeed == 0) {
2861		/* If Master/Slave config fault is asserted twice,
2862		 * we assume back-to-back */
2863		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2864		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2865			return;
2866		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2867		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2868			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2869			if(phy_tmp & CR_1000T_MS_ENABLE) {
2870				phy_tmp &= ~CR_1000T_MS_ENABLE;
2871				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2872				    phy_tmp);
2873				adapter->smartspeed++;
2874				if(adapter->hw.mac.autoneg &&
2875				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2876				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2877				    &phy_tmp)) {
2878					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2879						    MII_CR_RESTART_AUTO_NEG);
2880					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2881					    phy_tmp);
2882				}
2883			}
2884		}
2885		return;
2886	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2887		/* If still no link, perhaps using 2/3 pair cable */
2888		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2889		phy_tmp |= CR_1000T_MS_ENABLE;
2890		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2891		if(adapter->hw.mac.autoneg &&
2892		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2893		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2894			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2895				    MII_CR_RESTART_AUTO_NEG);
2896			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2897		}
2898	}
2899	/* Restart process after EM_SMARTSPEED_MAX iterations */
2900	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2901		adapter->smartspeed = 0;
2902}
2903
2904
2905/*
2906 * Manage DMA'able memory.
2907 */
2908static void
2909em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2910{
2911	if (error)
2912		return;
2913	*(bus_addr_t *) arg = segs[0].ds_addr;
2914}
2915
2916static int
2917em_dma_malloc(struct adapter *adapter, bus_size_t size,
2918        struct em_dma_alloc *dma, int mapflags)
2919{
2920	int error;
2921
2922	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2923				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2924				BUS_SPACE_MAXADDR,	/* lowaddr */
2925				BUS_SPACE_MAXADDR,	/* highaddr */
2926				NULL, NULL,		/* filter, filterarg */
2927				size,			/* maxsize */
2928				1,			/* nsegments */
2929				size,			/* maxsegsize */
2930				0,			/* flags */
2931				NULL,			/* lockfunc */
2932				NULL,			/* lockarg */
2933				&dma->dma_tag);
2934	if (error) {
2935		device_printf(adapter->dev,
2936		    "%s: bus_dma_tag_create failed: %d\n",
2937		    __func__, error);
2938		goto fail_0;
2939	}
2940
2941	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2942	    BUS_DMA_NOWAIT, &dma->dma_map);
2943	if (error) {
2944		device_printf(adapter->dev,
2945		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2946		    __func__, (uintmax_t)size, error);
2947		goto fail_2;
2948	}
2949
2950	dma->dma_paddr = 0;
2951	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2952	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2953	if (error || dma->dma_paddr == 0) {
2954		device_printf(adapter->dev,
2955		    "%s: bus_dmamap_load failed: %d\n",
2956		    __func__, error);
2957		goto fail_3;
2958	}
2959
2960	return (0);
2961
2962fail_3:
2963	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2964fail_2:
2965	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2966	bus_dma_tag_destroy(dma->dma_tag);
2967fail_0:
2968	dma->dma_map = NULL;
2969	dma->dma_tag = NULL;
2970
2971	return (error);
2972}
2973
2974static void
2975em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2976{
2977	if (dma->dma_tag == NULL)
2978		return;
2979	if (dma->dma_map != NULL) {
2980		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2981		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2982		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2983		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2984		dma->dma_map = NULL;
2985	}
2986	bus_dma_tag_destroy(dma->dma_tag);
2987	dma->dma_tag = NULL;
2988}
2989
2990
2991/*********************************************************************
2992 *
2993 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2994 *  the information needed to transmit a packet on the wire.
2995 *
2996 **********************************************************************/
2997static int
2998em_allocate_transmit_structures(struct adapter *adapter)
2999{
3000	device_t dev = adapter->dev;
3001
3002	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
3003	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3004	if (adapter->tx_buffer_area == NULL) {
3005		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3006		return (ENOMEM);
3007	}
3008
3009	bzero(adapter->tx_buffer_area,
3010	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
3011
3012	return (0);
3013}
3014
3015/*********************************************************************
3016 *
3017 *  Initialize transmit structures.
3018 *
3019 **********************************************************************/
3020static int
3021em_setup_transmit_structures(struct adapter *adapter)
3022{
3023	device_t dev = adapter->dev;
3024	struct em_buffer *tx_buffer;
3025	int error, i;
3026
3027	/*
3028	 * Create DMA tags for tx descriptors
3029	 */
3030	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3031				1, 0,			/* alignment, bounds */
3032				BUS_SPACE_MAXADDR,	/* lowaddr */
3033				BUS_SPACE_MAXADDR,	/* highaddr */
3034				NULL, NULL,		/* filter, filterarg */
3035				EM_TSO_SIZE,		/* maxsize */
3036				EM_MAX_SCATTER,		/* nsegments */
3037				EM_TSO_SEG_SIZE,	/* maxsegsize */
3038				0,			/* flags */
3039				NULL,		/* lockfunc */
3040				NULL,		/* lockarg */
3041				&adapter->txtag)) != 0) {
3042		device_printf(dev, "Unable to allocate TX DMA tag\n");
3043		goto fail;
3044	}
3045
3046	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3047		goto fail;
3048
3049	/* Clear the old ring contents */
3050	bzero(adapter->tx_desc_base,
3051	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3052
3053	/* Create the descriptor buffer dma maps */
3054	tx_buffer = adapter->tx_buffer_area;
3055	for (i = 0; i < adapter->num_tx_desc; i++) {
3056		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3057		if (error != 0) {
3058			device_printf(dev, "Unable to create TX DMA map\n");
3059			goto fail;
3060		}
3061		tx_buffer->next_eop = -1;
3062		tx_buffer++;
3063	}
3064
3065	adapter->next_avail_tx_desc = 0;
3066	adapter->next_tx_to_clean = 0;
3067
3068	/* Set number of descriptors available */
3069	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3070
3071	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3072	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3073
3074	return (0);
3075
3076fail:
3077	em_free_transmit_structures(adapter);
3078	return (error);
3079}
3080
3081/*********************************************************************
3082 *
3083 *  Enable transmit unit.
3084 *
3085 **********************************************************************/
3086static void
3087em_initialize_transmit_unit(struct adapter *adapter)
3088{
3089	uint32_t	tctl, tarc, tipg = 0;
3090	uint64_t	bus_addr;
3091
3092	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3093	/* Setup the Base and Length of the Tx Descriptor Ring */
3094	bus_addr = adapter->txdma.dma_paddr;
3095	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3096	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3097	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3098	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3099
3100	/* Setup the HW Tx Head and Tail descriptor pointers */
3101	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3102	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3103
3104	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3105	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3106	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3107
3108	/* Set the default values for the Tx Inter Packet Gap timer */
3109	switch (adapter->hw.mac.type) {
3110	case e1000_82542:
3111		tipg = DEFAULT_82542_TIPG_IPGT;
3112		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3113		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3114		break;
3115	case e1000_80003es2lan:
3116		tipg = DEFAULT_82543_TIPG_IPGR1;
3117		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3118		    E1000_TIPG_IPGR2_SHIFT;
3119		break;
3120	default:
3121		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3122		    (adapter->hw.media_type ==
3123		    e1000_media_type_internal_serdes))
3124			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3125		else
3126			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3127		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3128		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3129	}
3130
3131	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3132	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3133	if(adapter->hw.mac.type >= e1000_82540)
3134		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3135		    adapter->tx_abs_int_delay.value);
3136
3137	if ((adapter->hw.mac.type == e1000_82571) ||
3138	    (adapter->hw.mac.type == e1000_82572)) {
3139		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3140		tarc |= SPEED_MODE_BIT;
3141		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3142	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3143		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3144		tarc |= 1;
3145		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3146		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3147		tarc |= 1;
3148		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3149	}
3150
3151	/* Program the Transmit Control Register */
3152	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3153	tctl &= ~E1000_TCTL_CT;
3154	tctl = E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3155		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3156
3157	if (adapter->hw.mac.type >= e1000_82571)
3158		tctl |= E1000_TCTL_MULR;
3159
3160	/* This write will effectively turn on the transmit unit. */
3161	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3162
3163	/* Setup Transmit Descriptor Base Settings */
3164	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3165
3166	if ((adapter->tx_int_delay.value > 0) &&
3167	    (adapter->hw.mac.type != e1000_82575))
3168		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3169
3170        /* Set the function pointer for the transmit routine */
3171        if (adapter->hw.mac.type >= e1000_82575)
3172                adapter->em_xmit = em_adv_encap;
3173        else
3174                adapter->em_xmit = em_encap;
3175}
3176
3177/*********************************************************************
3178 *
3179 *  Free all transmit related data structures.
3180 *
3181 **********************************************************************/
3182static void
3183em_free_transmit_structures(struct adapter *adapter)
3184{
3185	struct em_buffer *tx_buffer;
3186	int i;
3187
3188	INIT_DEBUGOUT("free_transmit_structures: begin");
3189
3190	if (adapter->tx_buffer_area != NULL) {
3191		tx_buffer = adapter->tx_buffer_area;
3192		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3193			if (tx_buffer->m_head != NULL) {
3194				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3195				    BUS_DMASYNC_POSTWRITE);
3196				bus_dmamap_unload(adapter->txtag,
3197				    tx_buffer->map);
3198				m_freem(tx_buffer->m_head);
3199				tx_buffer->m_head = NULL;
3200			} else if (tx_buffer->map != NULL)
3201				bus_dmamap_unload(adapter->txtag,
3202				    tx_buffer->map);
3203			if (tx_buffer->map != NULL) {
3204				bus_dmamap_destroy(adapter->txtag,
3205				    tx_buffer->map);
3206				tx_buffer->map = NULL;
3207			}
3208		}
3209	}
3210	if (adapter->tx_buffer_area != NULL) {
3211		free(adapter->tx_buffer_area, M_DEVBUF);
3212		adapter->tx_buffer_area = NULL;
3213	}
3214	if (adapter->txtag != NULL) {
3215		bus_dma_tag_destroy(adapter->txtag);
3216		adapter->txtag = NULL;
3217	}
3218}
3219
3220/*********************************************************************
3221 *
3222 *  The offload context needs to be set when we transfer the first
3223 *  packet of a particular protocol (TCP/UDP). This routine has been
3224 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3225 *
3226 **********************************************************************/
3227static void
3228em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3229    uint32_t *txd_upper, uint32_t *txd_lower)
3230{
3231	struct e1000_context_desc *TXD;
3232	struct em_buffer *tx_buffer;
3233	struct ether_vlan_header *eh;
3234	struct ip *ip;
3235	struct ip6_hdr *ip6;
3236	struct tcp_hdr *th;
3237	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3238	uint32_t cmd = 0;
3239	uint16_t etype;
3240	uint8_t ipproto;
3241
3242	/* Setup checksum offload context. */
3243	curr_txd = adapter->next_avail_tx_desc;
3244	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3245	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3246
3247	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3248		     E1000_TXD_DTYP_D;		/* Data descr */
3249
3250	/*
3251	 * Determine where frame payload starts.
3252	 * Jump over vlan headers if already present,
3253	 * helpful for QinQ too.
3254	 */
3255	eh = mtod(mp, struct ether_vlan_header *);
3256	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3257		etype = ntohs(eh->evl_proto);
3258		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3259	} else {
3260		etype = ntohs(eh->evl_encap_proto);
3261		ehdrlen = ETHER_HDR_LEN;
3262	}
3263
3264	/*
3265	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3266	 * TODO: Support SCTP too when it hits the tree.
3267	 */
3268	switch (etype) {
3269	case ETHERTYPE_IP:
3270		ip = (struct ip *)(mp->m_data + ehdrlen);
3271		ip_hlen = ip->ip_hl << 2;
3272
3273		/* Setup of IP header checksum. */
3274		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3275			/*
3276			 * Start offset for header checksum calculation.
3277			 * End offset for header checksum calculation.
3278			 * Offset of place to put the checksum.
3279			 */
3280			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3281			TXD->lower_setup.ip_fields.ipcse =
3282			    htole16(ehdrlen + ip_hlen);
3283			TXD->lower_setup.ip_fields.ipcso =
3284			    ehdrlen + offsetof(struct ip, ip_sum);
3285			cmd |= E1000_TXD_CMD_IP;
3286			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3287		}
3288
3289		if (mp->m_len < ehdrlen + ip_hlen)
3290			return;	/* failure */
3291
3292		hdr_len = ehdrlen + ip_hlen;
3293		ipproto = ip->ip_p;
3294
3295		break;
3296	case ETHERTYPE_IPV6:
3297		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3298		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3299
3300		if (mp->m_len < ehdrlen + ip_hlen)
3301			return;	/* failure */
3302
3303		/* IPv6 doesn't have a header checksum. */
3304
3305		hdr_len = ehdrlen + ip_hlen;
3306		ipproto = ip6->ip6_nxt;
3307
3308		break;
3309	default:
3310		*txd_upper = 0;
3311		*txd_lower = 0;
3312		return;
3313	}
3314
3315	switch (ipproto) {
3316	case IPPROTO_TCP:
3317		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3318			/*
3319			 * Start offset for payload checksum calculation.
3320			 * End offset for payload checksum calculation.
3321			 * Offset of place to put the checksum.
3322			 */
3323			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3324			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3325			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3326			TXD->upper_setup.tcp_fields.tucso =
3327			    hdr_len + offsetof(struct tcphdr, th_sum);
3328			cmd |= E1000_TXD_CMD_TCP;
3329			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3330		}
3331		break;
3332	case IPPROTO_UDP:
3333		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3334			/*
3335			 * Start offset for header checksum calculation.
3336			 * End offset for header checksum calculation.
3337			 * Offset of place to put the checksum.
3338			 */
3339			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3340			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3341			TXD->upper_setup.tcp_fields.tucso =
3342			    hdr_len + offsetof(struct udphdr, uh_sum);
3343			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3344		}
3345		break;
3346	default:
3347		break;
3348	}
3349
3350	TXD->tcp_seg_setup.data = htole32(0);
3351	TXD->cmd_and_length =
3352	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3353	tx_buffer->m_head = NULL;
3354	tx_buffer->next_eop = -1;
3355
3356	if (++curr_txd == adapter->num_tx_desc)
3357		curr_txd = 0;
3358
3359	adapter->num_tx_desc_avail--;
3360	adapter->next_avail_tx_desc = curr_txd;
3361}
3362
3363/**********************************************************************
3364 *
3365 *  Setup work for hardware segmentation offload (TSO)
3366 *
3367 **********************************************************************/
3368static boolean_t
3369em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3370   uint32_t *txd_lower)
3371{
3372	struct e1000_context_desc *TXD;
3373	struct em_buffer *tx_buffer;
3374	struct ether_vlan_header *eh;
3375	struct ip *ip;
3376	struct ip6_hdr *ip6;
3377	struct tcphdr *th;
3378	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3379	uint16_t etype;
3380
3381	/*
3382	 * XXX: This is not really correct as the stack would not have
3383	 * set up all checksums.
3384	 * XXX: Return FALSE is not sufficient as we may have to return
3385	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3386	 * and 1 (success).
3387	 */
3388	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3389	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3390		return FALSE;
3391
3392	/*
3393	 * This function could/should be extended to support IP/IPv6
3394	 * fragmentation as well.  But as they say, one step at a time.
3395	 */
3396
3397	/*
3398	 * Determine where frame payload starts.
3399	 * Jump over vlan headers if already present,
3400	 * helpful for QinQ too.
3401	 */
3402	eh = mtod(mp, struct ether_vlan_header *);
3403	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3404		etype = ntohs(eh->evl_proto);
3405		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3406	} else {
3407		etype = ntohs(eh->evl_encap_proto);
3408		ehdrlen = ETHER_HDR_LEN;
3409	}
3410
3411	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3412	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3413		return FALSE;	/* -1 */
3414
3415	/*
3416	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3417	 * TODO: Support SCTP too when it hits the tree.
3418	 */
3419	switch (etype) {
3420	case ETHERTYPE_IP:
3421		isip6 = 0;
3422		ip = (struct ip *)(mp->m_data + ehdrlen);
3423		if (ip->ip_p != IPPROTO_TCP)
3424			return FALSE;	/* 0 */
3425		ip->ip_len = 0;
3426		ip->ip_sum = 0;
3427		ip_hlen = ip->ip_hl << 2;
3428		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3429			return FALSE;	/* -1 */
3430		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3431#if 1
3432		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3433		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3434#else
3435		th->th_sum = mp->m_pkthdr.csum_data;
3436#endif
3437		break;
3438	case ETHERTYPE_IPV6:
3439		isip6 = 1;
3440		return FALSE;			/* Not supported yet. */
3441		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3442		if (ip6->ip6_nxt != IPPROTO_TCP)
3443			return FALSE;	/* 0 */
3444		ip6->ip6_plen = 0;
3445		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3446		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3447			return FALSE;	/* -1 */
3448		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3449#if 0
3450		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3451		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3452#else
3453		th->th_sum = mp->m_pkthdr.csum_data;
3454#endif
3455		break;
3456	default:
3457		return FALSE;
3458	}
3459	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3460
3461	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3462		      E1000_TXD_DTYP_D |	/* Data descr type */
3463		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3464
3465	/* IP and/or TCP header checksum calculation and insertion. */
3466	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3467		      E1000_TXD_POPTS_TXSM) << 8;
3468
3469	curr_txd = adapter->next_avail_tx_desc;
3470	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3471	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3472
3473	/* IPv6 doesn't have a header checksum. */
3474	if (!isip6) {
3475		/*
3476		 * Start offset for header checksum calculation.
3477		 * End offset for header checksum calculation.
3478		 * Offset of place put the checksum.
3479		 */
3480		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3481		TXD->lower_setup.ip_fields.ipcse =
3482		    htole16(ehdrlen + ip_hlen - 1);
3483		TXD->lower_setup.ip_fields.ipcso =
3484		    ehdrlen + offsetof(struct ip, ip_sum);
3485	}
3486	/*
3487	 * Start offset for payload checksum calculation.
3488	 * End offset for payload checksum calculation.
3489	 * Offset of place to put the checksum.
3490	 */
3491	TXD->upper_setup.tcp_fields.tucss =
3492	    ehdrlen + ip_hlen;
3493	TXD->upper_setup.tcp_fields.tucse = 0;
3494	TXD->upper_setup.tcp_fields.tucso =
3495	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3496	/*
3497	 * Payload size per packet w/o any headers.
3498	 * Length of all headers up to payload.
3499	 */
3500	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3501	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3502
3503	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3504				E1000_TXD_CMD_DEXT |	/* Extended descr */
3505				E1000_TXD_CMD_TSE |	/* TSE context */
3506				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3507				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3508				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3509
3510	tx_buffer->m_head = NULL;
3511	tx_buffer->next_eop = -1;
3512
3513	if (++curr_txd == adapter->num_tx_desc)
3514		curr_txd = 0;
3515
3516	adapter->num_tx_desc_avail--;
3517	adapter->next_avail_tx_desc = curr_txd;
3518	adapter->tx_tso = TRUE;
3519
3520	return TRUE;
3521}
3522
3523
3524/**********************************************************************
3525 *
3526 *  Setup work for hardware segmentation offload (TSO) on
3527 *  adapters using advanced tx descriptors
3528 *
3529 **********************************************************************/
3530static boolean_t
3531em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3532{
3533	struct e1000_adv_tx_context_desc *TXD;
3534	struct em_buffer        *tx_buffer;
3535	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3536	u32 mss_l4len_idx = 0;
3537	u16 vtag = 0;
3538	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3539	struct ether_vlan_header *eh;
3540	struct ip *ip;
3541	struct tcphdr *th;
3542
3543	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3544	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3545		return FALSE;
3546
3547	/*
3548	 * Determine where frame payload starts.
3549	 * Jump over vlan headers if already present
3550	 */
3551	eh = mtod(mp, struct ether_vlan_header *);
3552	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3553		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3554	else
3555		ehdrlen = ETHER_HDR_LEN;
3556
3557	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3558	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3559		return FALSE;
3560
3561	/* Only supports IPV4 for now */
3562	ctxd = adapter->next_avail_tx_desc;
3563	tx_buffer = &adapter->tx_buffer_area[ctxd];
3564	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3565
3566	ip = (struct ip *)(mp->m_data + ehdrlen);
3567	if (ip->ip_p != IPPROTO_TCP)
3568                return FALSE;   /* 0 */
3569	ip->ip_len = 0;
3570	ip->ip_sum = 0;
3571	ip_hlen = ip->ip_hl << 2;
3572	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3573	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3574	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3575	tcp_hlen = th->th_off << 2;
3576	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3577	/* Calculate payload, this is used in the transmit desc in encap */
3578	*paylen = mp->m_pkthdr.len - hdrlen;
3579
3580	/* VLAN MACLEN IPLEN */
3581	if (mp->m_flags & M_VLANTAG) {
3582		vtag = htole16(mp->m_pkthdr.ether_vtag);
3583		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3584	}
3585	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3586	vlan_macip_lens |= ip_hlen;
3587	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3588
3589	/* ADV DTYPE TUCMD */
3590	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3591	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3592	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3593	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3594
3595	/* MSS L4LEN IDX */
3596	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3597	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3598	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3599
3600	TXD->seqnum_seed = htole32(0);
3601	tx_buffer->m_head = NULL;
3602	tx_buffer->next_eop = -1;
3603
3604	if (++ctxd == adapter->num_tx_desc)
3605		ctxd = 0;
3606
3607	adapter->num_tx_desc_avail--;
3608	adapter->next_avail_tx_desc = ctxd;
3609	return TRUE;
3610}
3611
3612
3613/*********************************************************************
3614 *
3615 *  Advanced Context Descriptor setup for VLAN or CSUM
3616 *
3617 **********************************************************************/
3618
3619static boolean_t
3620em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3621{
3622	struct e1000_adv_tx_context_desc *TXD;
3623	struct em_buffer        *tx_buffer;
3624	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3625	struct ether_vlan_header *eh;
3626	struct ip *ip;
3627	struct ip6_hdr *ip6;
3628	int  ehdrlen, ip_hlen;
3629	u16	etype;
3630	u8	ipproto;
3631
3632	int ctxd = adapter->next_avail_tx_desc;
3633	u16 vtag = 0;
3634
3635	tx_buffer = &adapter->tx_buffer_area[ctxd];
3636	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3637
3638	/*
3639	** In advanced descriptors the vlan tag must
3640	** be placed into the descriptor itself.
3641	*/
3642	if (mp->m_flags & M_VLANTAG) {
3643		vtag = htole16(mp->m_pkthdr.ether_vtag);
3644		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3645	}
3646
3647	/*
3648	 * Determine where frame payload starts.
3649	 * Jump over vlan headers if already present,
3650	 * helpful for QinQ too.
3651	 */
3652	eh = mtod(mp, struct ether_vlan_header *);
3653	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3654		etype = ntohs(eh->evl_proto);
3655		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3656	} else {
3657		etype = ntohs(eh->evl_encap_proto);
3658		ehdrlen = ETHER_HDR_LEN;
3659	}
3660
3661	/* Set the ether header length */
3662	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3663
3664	switch (etype) {
3665		case ETHERTYPE_IP:
3666			ip = (struct ip *)(mp->m_data + ehdrlen);
3667			ip_hlen = ip->ip_hl << 2;
3668			if (mp->m_len < ehdrlen + ip_hlen)
3669				return FALSE; /* failure */
3670			ipproto = ip->ip_p;
3671			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3672			break;
3673		case ETHERTYPE_IPV6:
3674			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3675			ip_hlen = sizeof(struct ip6_hdr);
3676			if (mp->m_len < ehdrlen + ip_hlen)
3677				return FALSE; /* failure */
3678			ipproto = ip6->ip6_nxt;
3679			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3680			break;
3681		default:
3682			return FALSE;
3683	}
3684
3685	vlan_macip_lens |= ip_hlen;
3686	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3687
3688	switch (ipproto) {
3689		case IPPROTO_TCP:
3690			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3691				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3692			break;
3693		case IPPROTO_UDP:
3694			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3695				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3696			break;
3697	}
3698
3699	/* Now copy bits into descriptor */
3700	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3701	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3702	TXD->seqnum_seed = htole32(0);
3703	TXD->mss_l4len_idx = htole32(0);
3704
3705	tx_buffer->m_head = NULL;
3706	tx_buffer->next_eop = -1;
3707
3708	/* We've consumed the first desc, adjust counters */
3709	if (++ctxd == adapter->num_tx_desc)
3710		ctxd = 0;
3711	adapter->next_avail_tx_desc = ctxd;
3712	--adapter->num_tx_desc_avail;
3713
3714        return TRUE;
3715}
3716
3717
3718/**********************************************************************
3719 *
3720 *  Examine each tx_buffer in the used queue. If the hardware is done
3721 *  processing the packet then free associated resources. The
3722 *  tx_buffer is put back on the free queue.
3723 *
3724 **********************************************************************/
3725static void
3726em_txeof(struct adapter *adapter)
3727{
3728        int first, last, done, num_avail;
3729        struct em_buffer *tx_buffer;
3730        struct e1000_tx_desc   *tx_desc, *eop_desc;
3731	struct ifnet   *ifp = adapter->ifp;
3732
3733	EM_LOCK_ASSERT(adapter);
3734
3735        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3736                return;
3737
3738        num_avail = adapter->num_tx_desc_avail;
3739        first = adapter->next_tx_to_clean;
3740        tx_desc = &adapter->tx_desc_base[first];
3741        tx_buffer = &adapter->tx_buffer_area[first];
3742	last = tx_buffer->next_eop;
3743        eop_desc = &adapter->tx_desc_base[last];
3744
3745	/*
3746	 * What this does is get the index of the
3747	 * first descriptor AFTER the EOP of the
3748	 * first packet, that way we can do the
3749	 * simple comparison on the inner while loop.
3750	 */
3751	if (++last == adapter->num_tx_desc)
3752 		last = 0;
3753	done = last;
3754
3755        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3756            BUS_DMASYNC_POSTREAD);
3757
3758        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3759		/* We clean the range of the packet */
3760		while (first != done) {
3761                	tx_desc->upper.data = 0;
3762                	tx_desc->lower.data = 0;
3763                	tx_desc->buffer_addr = 0;
3764                	num_avail++;
3765
3766			if (tx_buffer->m_head) {
3767				ifp->if_opackets++;
3768				bus_dmamap_sync(adapter->txtag,
3769				    tx_buffer->map,
3770				    BUS_DMASYNC_POSTWRITE);
3771				bus_dmamap_unload(adapter->txtag,
3772				    tx_buffer->map);
3773
3774                        	m_freem(tx_buffer->m_head);
3775                        	tx_buffer->m_head = NULL;
3776                	}
3777			tx_buffer->next_eop = -1;
3778
3779	                if (++first == adapter->num_tx_desc)
3780				first = 0;
3781
3782	                tx_buffer = &adapter->tx_buffer_area[first];
3783			tx_desc = &adapter->tx_desc_base[first];
3784		}
3785		/* See if we can continue to the next packet */
3786		last = tx_buffer->next_eop;
3787		if (last != -1) {
3788        		eop_desc = &adapter->tx_desc_base[last];
3789			/* Get new done point */
3790			if (++last == adapter->num_tx_desc) last = 0;
3791			done = last;
3792		} else
3793			break;
3794        }
3795        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3796            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3797
3798        adapter->next_tx_to_clean = first;
3799
3800        /*
3801         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3802         * that it is OK to send packets.
3803         * If there are no pending descriptors, clear the timeout. Otherwise,
3804         * if some descriptors have been freed, restart the timeout.
3805         */
3806        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3807                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3808		/* All clean, turn off the timer */
3809                if (num_avail == adapter->num_tx_desc)
3810			adapter->watchdog_timer = 0;
3811		/* Some cleaned, reset the timer */
3812                else if (num_avail != adapter->num_tx_desc_avail)
3813			adapter->watchdog_timer = EM_TX_TIMEOUT;
3814        }
3815        adapter->num_tx_desc_avail = num_avail;
3816        return;
3817}
3818
3819/*********************************************************************
3820 *
3821 *  Get a buffer from system mbuf buffer pool.
3822 *
3823 **********************************************************************/
3824static int
3825em_get_buf(struct adapter *adapter, int i)
3826{
3827	struct mbuf		*m;
3828	bus_dma_segment_t	segs[1];
3829	bus_dmamap_t		map;
3830	struct em_buffer	*rx_buffer;
3831	int			error, nsegs;
3832
3833	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3834	if (m == NULL) {
3835		adapter->mbuf_cluster_failed++;
3836		return (ENOBUFS);
3837	}
3838	m->m_len = m->m_pkthdr.len = MCLBYTES;
3839
3840	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3841		m_adj(m, ETHER_ALIGN);
3842
3843	/*
3844	 * Using memory from the mbuf cluster pool, invoke the
3845	 * bus_dma machinery to arrange the memory mapping.
3846	 */
3847	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3848	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3849	if (error != 0) {
3850		m_free(m);
3851		return (error);
3852	}
3853
3854	/* If nsegs is wrong then the stack is corrupt. */
3855	KASSERT(nsegs == 1, ("Too many segments returned!"));
3856
3857	rx_buffer = &adapter->rx_buffer_area[i];
3858	if (rx_buffer->m_head != NULL)
3859		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3860
3861	map = rx_buffer->map;
3862	rx_buffer->map = adapter->rx_sparemap;
3863	adapter->rx_sparemap = map;
3864	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3865	rx_buffer->m_head = m;
3866
3867	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3868	return (0);
3869}
3870
3871/*********************************************************************
3872 *
3873 *  Allocate memory for rx_buffer structures. Since we use one
3874 *  rx_buffer per received packet, the maximum number of rx_buffer's
3875 *  that we'll need is equal to the number of receive descriptors
3876 *  that we've allocated.
3877 *
3878 **********************************************************************/
3879static int
3880em_allocate_receive_structures(struct adapter *adapter)
3881{
3882	device_t dev = adapter->dev;
3883	struct em_buffer *rx_buffer;
3884	int i, error;
3885
3886	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3887	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3888	if (adapter->rx_buffer_area == NULL) {
3889		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3890		return (ENOMEM);
3891	}
3892
3893	bzero(adapter->rx_buffer_area,
3894	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3895
3896	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3897				1, 0,			/* alignment, bounds */
3898				BUS_SPACE_MAXADDR,	/* lowaddr */
3899				BUS_SPACE_MAXADDR,	/* highaddr */
3900				NULL, NULL,		/* filter, filterarg */
3901				MCLBYTES,		/* maxsize */
3902				1,			/* nsegments */
3903				MCLBYTES,		/* maxsegsize */
3904				0,			/* flags */
3905				NULL,			/* lockfunc */
3906				NULL,			/* lockarg */
3907				&adapter->rxtag);
3908	if (error) {
3909		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3910		    __func__, error);
3911		goto fail;
3912	}
3913
3914	/* Create the spare map (used by getbuf) */
3915	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3916	     &adapter->rx_sparemap);
3917	if (error) {
3918		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3919		    __func__, error);
3920		goto fail;
3921	}
3922
3923	rx_buffer = adapter->rx_buffer_area;
3924	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3925		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3926		    &rx_buffer->map);
3927		if (error) {
3928			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3929			    __func__, error);
3930			goto fail;
3931		}
3932	}
3933
3934	/* Setup the initial buffers */
3935	for (i = 0; i < adapter->num_rx_desc; i++) {
3936		error = em_get_buf(adapter, i);
3937		if (error)
3938			goto fail;
3939	}
3940	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3941	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3942
3943	return (0);
3944
3945fail:
3946	em_free_receive_structures(adapter);
3947	return (error);
3948}
3949
3950/*********************************************************************
3951 *
3952 *  Allocate and initialize receive structures.
3953 *
3954 **********************************************************************/
3955static int
3956em_setup_receive_structures(struct adapter *adapter)
3957{
3958	int error;
3959
3960	bzero(adapter->rx_desc_base,
3961	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3962
3963	if ((error = em_allocate_receive_structures(adapter)) !=0)
3964		return (error);
3965
3966	/* Setup our descriptor pointers */
3967	adapter->next_rx_desc_to_check = 0;
3968
3969	return (0);
3970}
3971
3972/*********************************************************************
3973 *
3974 *  Enable receive unit.
3975 *
3976 **********************************************************************/
3977static void
3978em_initialize_receive_unit(struct adapter *adapter)
3979{
3980	struct ifnet	*ifp = adapter->ifp;
3981	uint64_t	bus_addr;
3982	uint32_t	reg_rctl;
3983	uint32_t	reg_rxcsum;
3984
3985	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3986
3987	/*
3988	 * Make sure receives are disabled while setting
3989	 * up the descriptor ring
3990	 */
3991	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3992	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3993
3994	if(adapter->hw.mac.type >= e1000_82540) {
3995		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3996		    adapter->rx_abs_int_delay.value);
3997		/*
3998		 * Set the interrupt throttling rate. Value is calculated
3999		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4000		 */
4001#define MAX_INTS_PER_SEC	8000
4002#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4003		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
4004	}
4005
4006	/* Setup the Base and Length of the Rx Descriptor Ring */
4007	bus_addr = adapter->rxdma.dma_paddr;
4008	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
4009			sizeof(struct e1000_rx_desc));
4010	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
4011	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
4012
4013	/* Setup the Receive Control Register */
4014	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4015	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4016		   E1000_RCTL_RDMTS_HALF |
4017		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4018
4019	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
4020		reg_rctl |= E1000_RCTL_SBP;
4021	else
4022		reg_rctl &= ~E1000_RCTL_SBP;
4023
4024	switch (adapter->rx_buffer_len) {
4025	default:
4026	case 2048:
4027		reg_rctl |= E1000_RCTL_SZ_2048;
4028		break;
4029	case 4096:
4030		reg_rctl |= E1000_RCTL_SZ_4096 |
4031		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4032		break;
4033	case 8192:
4034		reg_rctl |= E1000_RCTL_SZ_8192 |
4035		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4036		break;
4037	case 16384:
4038		reg_rctl |= E1000_RCTL_SZ_16384 |
4039		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4040		break;
4041	}
4042
4043	if (ifp->if_mtu > ETHERMTU)
4044		reg_rctl |= E1000_RCTL_LPE;
4045	else
4046		reg_rctl &= ~E1000_RCTL_LPE;
4047
4048	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4049	if ((adapter->hw.mac.type >= e1000_82543) &&
4050	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4051		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4052		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4053		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4054	}
4055
4056	/*
4057	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4058	** long latencies are observed, like Lenovo X60. This
4059	** change eliminates the problem, but since having positive
4060	** values in RDTR is a known source of problems on other
4061	** platforms another solution is being sought.
4062	*/
4063	if (adapter->hw.mac.type == e1000_82573)
4064		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4065
4066	/* Enable Receives */
4067	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4068
4069	/*
4070	 * Setup the HW Rx Head and
4071	 * Tail Descriptor Pointers
4072	 */
4073	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4074	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4075
4076	return;
4077}
4078
4079/*********************************************************************
4080 *
4081 *  Free receive related data structures.
4082 *
4083 **********************************************************************/
4084static void
4085em_free_receive_structures(struct adapter *adapter)
4086{
4087	struct em_buffer *rx_buffer;
4088	int i;
4089
4090	INIT_DEBUGOUT("free_receive_structures: begin");
4091
4092	if (adapter->rx_sparemap) {
4093		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4094		adapter->rx_sparemap = NULL;
4095	}
4096
4097	/* Cleanup any existing buffers */
4098	if (adapter->rx_buffer_area != NULL) {
4099		rx_buffer = adapter->rx_buffer_area;
4100		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4101			if (rx_buffer->m_head != NULL) {
4102				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4103				    BUS_DMASYNC_POSTREAD);
4104				bus_dmamap_unload(adapter->rxtag,
4105				    rx_buffer->map);
4106				m_freem(rx_buffer->m_head);
4107				rx_buffer->m_head = NULL;
4108			} else if (rx_buffer->map != NULL)
4109				bus_dmamap_unload(adapter->rxtag,
4110				    rx_buffer->map);
4111			if (rx_buffer->map != NULL) {
4112				bus_dmamap_destroy(adapter->rxtag,
4113				    rx_buffer->map);
4114				rx_buffer->map = NULL;
4115			}
4116		}
4117	}
4118
4119	if (adapter->rx_buffer_area != NULL) {
4120		free(adapter->rx_buffer_area, M_DEVBUF);
4121		adapter->rx_buffer_area = NULL;
4122	}
4123
4124	if (adapter->rxtag != NULL) {
4125		bus_dma_tag_destroy(adapter->rxtag);
4126		adapter->rxtag = NULL;
4127	}
4128}
4129
4130/*********************************************************************
4131 *
4132 *  This routine executes in interrupt context. It replenishes
4133 *  the mbufs in the descriptor and sends data which has been
4134 *  dma'ed into host memory to upper layer.
4135 *
4136 *  We loop at most count times if count is > 0, or until done if
4137 *  count < 0.
4138 *
4139 *********************************************************************/
4140static int
4141em_rxeof(struct adapter *adapter, int count)
4142{
4143	struct ifnet	*ifp;
4144	struct mbuf	*mp;
4145	uint8_t		accept_frame = 0;
4146	uint8_t		eop = 0;
4147	uint16_t 	len, desc_len, prev_len_adj;
4148	int		i;
4149
4150	/* Pointer to the receive descriptor being examined. */
4151	struct e1000_rx_desc   *current_desc;
4152	uint8_t		status;
4153
4154	ifp = adapter->ifp;
4155	i = adapter->next_rx_desc_to_check;
4156	current_desc = &adapter->rx_desc_base[i];
4157	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4158	    BUS_DMASYNC_POSTREAD);
4159
4160	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4161		return (0);
4162
4163	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4164	    (count != 0) &&
4165	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4166		struct mbuf *m = NULL;
4167
4168		mp = adapter->rx_buffer_area[i].m_head;
4169		/*
4170		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4171		 * needs to access the last received byte in the mbuf.
4172		 */
4173		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4174		    BUS_DMASYNC_POSTREAD);
4175
4176		accept_frame = 1;
4177		prev_len_adj = 0;
4178		desc_len = le16toh(current_desc->length);
4179		status = current_desc->status;
4180		if (status & E1000_RXD_STAT_EOP) {
4181			count--;
4182			eop = 1;
4183			if (desc_len < ETHER_CRC_LEN) {
4184				len = 0;
4185				prev_len_adj = ETHER_CRC_LEN - desc_len;
4186			} else
4187				len = desc_len - ETHER_CRC_LEN;
4188		} else {
4189			eop = 0;
4190			len = desc_len;
4191		}
4192
4193		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4194			uint8_t		last_byte;
4195			uint32_t	pkt_len = desc_len;
4196
4197			if (adapter->fmp != NULL)
4198				pkt_len += adapter->fmp->m_pkthdr.len;
4199
4200			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4201			if (TBI_ACCEPT(&adapter->hw, status,
4202			    current_desc->errors, pkt_len, last_byte)) {
4203				e1000_tbi_adjust_stats_82543(&adapter->hw,
4204				    &adapter->stats, pkt_len,
4205				    adapter->hw.mac.addr);
4206				if (len > 0)
4207					len--;
4208			} else
4209				accept_frame = 0;
4210		}
4211
4212		if (accept_frame) {
4213			if (em_get_buf(adapter, i) != 0) {
4214				ifp->if_iqdrops++;
4215				goto discard;
4216			}
4217
4218			/* Assign correct length to the current fragment */
4219			mp->m_len = len;
4220
4221			if (adapter->fmp == NULL) {
4222				mp->m_pkthdr.len = len;
4223				adapter->fmp = mp; /* Store the first mbuf */
4224				adapter->lmp = mp;
4225			} else {
4226				/* Chain mbuf's together */
4227				mp->m_flags &= ~M_PKTHDR;
4228				/*
4229				 * Adjust length of previous mbuf in chain if
4230				 * we received less than 4 bytes in the last
4231				 * descriptor.
4232				 */
4233				if (prev_len_adj > 0) {
4234					adapter->lmp->m_len -= prev_len_adj;
4235					adapter->fmp->m_pkthdr.len -=
4236					    prev_len_adj;
4237				}
4238				adapter->lmp->m_next = mp;
4239				adapter->lmp = adapter->lmp->m_next;
4240				adapter->fmp->m_pkthdr.len += len;
4241			}
4242
4243			if (eop) {
4244				adapter->fmp->m_pkthdr.rcvif = ifp;
4245				ifp->if_ipackets++;
4246				em_receive_checksum(adapter, current_desc,
4247				    adapter->fmp);
4248#ifndef __NO_STRICT_ALIGNMENT
4249				if (adapter->hw.mac.max_frame_size >
4250				    (MCLBYTES - ETHER_ALIGN) &&
4251				    em_fixup_rx(adapter) != 0)
4252					goto skip;
4253#endif
4254				if (status & E1000_RXD_STAT_VP) {
4255					adapter->fmp->m_pkthdr.ether_vtag =
4256					    (le16toh(current_desc->special) &
4257					    E1000_RXD_SPC_VLAN_MASK);
4258					adapter->fmp->m_flags |= M_VLANTAG;
4259				}
4260#ifndef __NO_STRICT_ALIGNMENT
4261skip:
4262#endif
4263				m = adapter->fmp;
4264				adapter->fmp = NULL;
4265				adapter->lmp = NULL;
4266			}
4267		} else {
4268			ifp->if_ierrors++;
4269discard:
4270			/* Reuse loaded DMA map and just update mbuf chain */
4271			mp = adapter->rx_buffer_area[i].m_head;
4272			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4273			mp->m_data = mp->m_ext.ext_buf;
4274			mp->m_next = NULL;
4275			if (adapter->hw.mac.max_frame_size <=
4276			    (MCLBYTES - ETHER_ALIGN))
4277				m_adj(mp, ETHER_ALIGN);
4278			if (adapter->fmp != NULL) {
4279				m_freem(adapter->fmp);
4280				adapter->fmp = NULL;
4281				adapter->lmp = NULL;
4282			}
4283			m = NULL;
4284		}
4285
4286		/* Zero out the receive descriptors status. */
4287		current_desc->status = 0;
4288		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4289		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4290
4291		/* Advance our pointers to the next descriptor. */
4292		if (++i == adapter->num_rx_desc)
4293			i = 0;
4294		if (m != NULL) {
4295			adapter->next_rx_desc_to_check = i;
4296#ifdef DEVICE_POLLING
4297			EM_UNLOCK(adapter);
4298			(*ifp->if_input)(ifp, m);
4299			EM_LOCK(adapter);
4300#else
4301			/* Already running unlocked */
4302			(*ifp->if_input)(ifp, m);
4303#endif
4304			i = adapter->next_rx_desc_to_check;
4305		}
4306		current_desc = &adapter->rx_desc_base[i];
4307	}
4308	adapter->next_rx_desc_to_check = i;
4309
4310	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4311	if (--i < 0)
4312		i = adapter->num_rx_desc - 1;
4313	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4314	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4315		return (0);
4316
4317	return (1);
4318}
4319
4320#ifndef __NO_STRICT_ALIGNMENT
4321/*
4322 * When jumbo frames are enabled we should realign entire payload on
4323 * architecures with strict alignment. This is serious design mistake of 8254x
4324 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4325 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4326 * payload. On architecures without strict alignment restrictions 8254x still
4327 * performs unaligned memory access which would reduce the performance too.
4328 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4329 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4330 * existing mbuf chain.
4331 *
4332 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4333 * not used at all on architectures with strict alignment.
4334 */
4335static int
4336em_fixup_rx(struct adapter *adapter)
4337{
4338	struct mbuf *m, *n;
4339	int error;
4340
4341	error = 0;
4342	m = adapter->fmp;
4343	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4344		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4345		m->m_data += ETHER_HDR_LEN;
4346	} else {
4347		MGETHDR(n, M_DONTWAIT, MT_DATA);
4348		if (n != NULL) {
4349			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4350			m->m_data += ETHER_HDR_LEN;
4351			m->m_len -= ETHER_HDR_LEN;
4352			n->m_len = ETHER_HDR_LEN;
4353			M_MOVE_PKTHDR(n, m);
4354			n->m_next = m;
4355			adapter->fmp = n;
4356		} else {
4357			adapter->dropped_pkts++;
4358			m_freem(adapter->fmp);
4359			adapter->fmp = NULL;
4360			error = ENOMEM;
4361		}
4362	}
4363
4364	return (error);
4365}
4366#endif
4367
4368/*********************************************************************
4369 *
4370 *  Verify that the hardware indicated that the checksum is valid.
4371 *  Inform the stack about the status of checksum so that stack
4372 *  doesn't spend time verifying the checksum.
4373 *
4374 *********************************************************************/
4375static void
4376em_receive_checksum(struct adapter *adapter,
4377	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4378{
4379	/* 82543 or newer only */
4380	if ((adapter->hw.mac.type < e1000_82543) ||
4381	    /* Ignore Checksum bit is set */
4382	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4383		mp->m_pkthdr.csum_flags = 0;
4384		return;
4385	}
4386
4387	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4388		/* Did it pass? */
4389		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4390			/* IP Checksum Good */
4391			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4392			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4393
4394		} else {
4395			mp->m_pkthdr.csum_flags = 0;
4396		}
4397	}
4398
4399	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4400		/* Did it pass? */
4401		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4402			mp->m_pkthdr.csum_flags |=
4403			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4404			mp->m_pkthdr.csum_data = htons(0xffff);
4405		}
4406	}
4407}
4408
4409
4410static void
4411em_enable_vlans(struct adapter *adapter)
4412{
4413	uint32_t ctrl;
4414
4415	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4416
4417	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4418	ctrl |= E1000_CTRL_VME;
4419	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4420}
4421
4422static void
4423em_enable_intr(struct adapter *adapter)
4424{
4425	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4426	    (IMS_ENABLE_MASK));
4427}
4428
4429static void
4430em_disable_intr(struct adapter *adapter)
4431{
4432	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4433}
4434
4435/*
4436 * Bit of a misnomer, what this really means is
4437 * to enable OS management of the system... aka
4438 * to disable special hardware management features
4439 */
4440static void
4441em_init_manageability(struct adapter *adapter)
4442{
4443	/* A shared code workaround */
4444#define E1000_82542_MANC2H E1000_MANC2H
4445	if (adapter->has_manage) {
4446		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4447		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4448
4449		/* disable hardware interception of ARP */
4450		manc &= ~(E1000_MANC_ARP_EN);
4451
4452                /* enable receiving management packets to the host */
4453                if (adapter->hw.mac.type >= e1000_82571) {
4454			manc |= E1000_MANC_EN_MNG2HOST;
4455#define E1000_MNG2HOST_PORT_623 (1 << 5)
4456#define E1000_MNG2HOST_PORT_664 (1 << 6)
4457			manc2h |= E1000_MNG2HOST_PORT_623;
4458			manc2h |= E1000_MNG2HOST_PORT_664;
4459			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4460		}
4461
4462		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4463	}
4464}
4465
4466/*
4467 * Give control back to hardware management
4468 * controller if there is one.
4469 */
4470static void
4471em_release_manageability(struct adapter *adapter)
4472{
4473	if (adapter->has_manage) {
4474		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4475
4476		/* re-enable hardware interception of ARP */
4477		manc |= E1000_MANC_ARP_EN;
4478
4479		if (adapter->hw.mac.type >= e1000_82571)
4480			manc &= ~E1000_MANC_EN_MNG2HOST;
4481
4482		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4483	}
4484}
4485
4486/*
4487 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4488 * For ASF and Pass Through versions of f/w this means that
4489 * the driver is loaded. For AMT version (only with 82573)
4490 * of the f/w this means that the network i/f is open.
4491 *
4492 */
4493static void
4494em_get_hw_control(struct adapter *adapter)
4495{
4496	u32 ctrl_ext, swsm;
4497
4498	/* Let firmware know the driver has taken over */
4499	switch (adapter->hw.mac.type) {
4500	case e1000_82573:
4501		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4502		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4503		    swsm | E1000_SWSM_DRV_LOAD);
4504		break;
4505	case e1000_82571:
4506	case e1000_82572:
4507	case e1000_80003es2lan:
4508	case e1000_ich8lan:
4509	case e1000_ich9lan:
4510		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4511		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4512		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4513		break;
4514	default:
4515		break;
4516	}
4517}
4518
4519/*
4520 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4521 * For ASF and Pass Through versions of f/w this means that the
4522 * driver is no longer loaded. For AMT version (only with 82573) i
4523 * of the f/w this means that the network i/f is closed.
4524 *
4525 */
4526static void
4527em_release_hw_control(struct adapter *adapter)
4528{
4529	u32 ctrl_ext, swsm;
4530
4531	/* Let firmware taken over control of h/w */
4532	switch (adapter->hw.mac.type) {
4533	case e1000_82573:
4534		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4535		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4536		    swsm & ~E1000_SWSM_DRV_LOAD);
4537		break;
4538	case e1000_82571:
4539	case e1000_82572:
4540	case e1000_80003es2lan:
4541	case e1000_ich8lan:
4542	case e1000_ich9lan:
4543		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4544		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4545		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4546		break;
4547	default:
4548		break;
4549
4550	}
4551}
4552
4553static int
4554em_is_valid_ether_addr(uint8_t *addr)
4555{
4556	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4557
4558	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4559		return (FALSE);
4560	}
4561
4562	return (TRUE);
4563}
4564
4565/*
4566 * NOTE: the following routines using the e1000
4567 * 	naming style are provided to the shared
4568 *	code which expects that rather than 'em'
4569 */
4570
4571void
4572e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4573{
4574	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4575}
4576
4577void
4578e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4579{
4580	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4581}
4582
4583void
4584e1000_pci_set_mwi(struct e1000_hw *hw)
4585{
4586	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4587	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4588}
4589
4590void
4591e1000_pci_clear_mwi(struct e1000_hw *hw)
4592{
4593	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4594	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4595}
4596
4597/*
4598 * Read the PCI Express capabilities
4599 */
4600int32_t
4601e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4602{
4603	int32_t		error = E1000_SUCCESS;
4604	uint16_t	cap_off;
4605
4606	switch (hw->mac.type) {
4607
4608		case e1000_82571:
4609		case e1000_82572:
4610		case e1000_82573:
4611		case e1000_80003es2lan:
4612			cap_off = 0xE0;
4613			e1000_read_pci_cfg(hw, cap_off + reg, value);
4614			break;
4615		default:
4616			error = ~E1000_NOT_IMPLEMENTED;
4617			break;
4618	}
4619
4620	return (error);
4621}
4622
4623int32_t
4624e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4625{
4626	int32_t error = 0;
4627
4628	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4629	if (hw->dev_spec == NULL)
4630		error = ENOMEM;
4631	bzero(hw->dev_spec, size);
4632
4633	return (error);
4634}
4635
4636void
4637e1000_free_dev_spec_struct(struct e1000_hw *hw)
4638{
4639	if (hw->dev_spec != NULL)
4640		free(hw->dev_spec, M_DEVBUF);
4641	return;
4642}
4643
4644/*
4645 * Enable PCI Wake On Lan capability
4646 */
4647void
4648em_enable_wakeup(device_t dev)
4649{
4650	u16     cap, status;
4651	u8      id;
4652
4653	/* First find the capabilities pointer*/
4654	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4655	/* Read the PM Capabilities */
4656	id = pci_read_config(dev, cap, 1);
4657	if (id != PCIY_PMG)     /* Something wrong */
4658		return;
4659	/* OK, we have the power capabilities, so
4660	   now get the status register */
4661	cap += PCIR_POWER_STATUS;
4662	status = pci_read_config(dev, cap, 2);
4663	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4664	pci_write_config(dev, cap, status, 2);
4665	return;
4666}
4667
4668
4669/*********************************************************************
4670* 82544 Coexistence issue workaround.
4671*    There are 2 issues.
4672*       1. Transmit Hang issue.
4673*    To detect this issue, following equation can be used...
4674*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4675*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4676*
4677*       2. DAC issue.
4678*    To detect this issue, following equation can be used...
4679*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4680*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4681*
4682*
4683*    WORKAROUND:
4684*	  Make sure we do not have ending address
4685*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4686*
4687*************************************************************************/
4688static uint32_t
4689em_fill_descriptors (bus_addr_t address, uint32_t length,
4690		PDESC_ARRAY desc_array)
4691{
4692	/* Since issue is sensitive to length and address.*/
4693	/* Let us first check the address...*/
4694	uint32_t safe_terminator;
4695	if (length <= 4) {
4696		desc_array->descriptor[0].address = address;
4697		desc_array->descriptor[0].length = length;
4698		desc_array->elements = 1;
4699		return (desc_array->elements);
4700	}
4701	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4702	    (length & 0xF)) & 0xF);
4703	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4704	if (safe_terminator == 0   ||
4705	(safe_terminator > 4   &&
4706	safe_terminator < 9)   ||
4707	(safe_terminator > 0xC &&
4708	safe_terminator <= 0xF)) {
4709		desc_array->descriptor[0].address = address;
4710		desc_array->descriptor[0].length = length;
4711		desc_array->elements = 1;
4712		return (desc_array->elements);
4713	}
4714
4715	desc_array->descriptor[0].address = address;
4716	desc_array->descriptor[0].length = length - 4;
4717	desc_array->descriptor[1].address = address + (length - 4);
4718	desc_array->descriptor[1].length = 4;
4719	desc_array->elements = 2;
4720	return (desc_array->elements);
4721}
4722
4723/**********************************************************************
4724 *
4725 *  Update the board statistics counters.
4726 *
4727 **********************************************************************/
4728static void
4729em_update_stats_counters(struct adapter *adapter)
4730{
4731	struct ifnet   *ifp;
4732
4733	if(adapter->hw.media_type == e1000_media_type_copper ||
4734	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4735		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4736		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4737	}
4738	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4739	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4740	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4741	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4742
4743	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4744	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4745	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4746	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4747	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4748	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4749	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4750	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4751	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4752	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4753	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4754	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4755	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4756	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4757	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4758	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4759	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4760	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4761	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4762	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4763
4764	/* For the 64-bit byte counters the low dword must be read first. */
4765	/* Both registers clear on the read of the high dword */
4766
4767	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4768	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4769	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4770	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4771
4772	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4773	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4774	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4775	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4776	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4777
4778	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4779	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4780	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4781	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4782
4783	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4784	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4785	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4786	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4787	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4788	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4789	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4790	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4791	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4792	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4793
4794	if (adapter->hw.mac.type >= e1000_82543) {
4795		adapter->stats.algnerrc +=
4796		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4797		adapter->stats.rxerrc +=
4798		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4799		adapter->stats.tncrs +=
4800		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4801		adapter->stats.cexterr +=
4802		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4803		adapter->stats.tsctc +=
4804		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4805		adapter->stats.tsctfc +=
4806		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4807	}
4808	ifp = adapter->ifp;
4809
4810	ifp->if_collisions = adapter->stats.colc;
4811
4812	/* Rx Errors */
4813	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4814	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4815	    adapter->stats.ruc + adapter->stats.roc +
4816	    adapter->stats.mpc + adapter->stats.cexterr;
4817
4818	/* Tx Errors */
4819	ifp->if_oerrors = adapter->stats.ecol +
4820	    adapter->stats.latecol + adapter->watchdog_events;
4821}
4822
4823
4824/**********************************************************************
4825 *
4826 *  This routine is called only when em_display_debug_stats is enabled.
4827 *  This routine provides a way to take a look at important statistics
4828 *  maintained by the driver and hardware.
4829 *
4830 **********************************************************************/
4831static void
4832em_print_debug_info(struct adapter *adapter)
4833{
4834	device_t dev = adapter->dev;
4835	uint8_t *hw_addr = adapter->hw.hw_addr;
4836
4837	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4838	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4839	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4840	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4841	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4842	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4843	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4844	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4845	    adapter->hw.mac.fc_high_water,
4846	    adapter->hw.mac.fc_low_water);
4847	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4848	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4849	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4850	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4851	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4852	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4853	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4854	    (long long)adapter->tx_fifo_wrk_cnt,
4855	    (long long)adapter->tx_fifo_reset_cnt);
4856	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4857	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4858	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4859	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4860	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4861	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4862	device_printf(dev, "Num Tx descriptors avail = %d\n",
4863	    adapter->num_tx_desc_avail);
4864	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4865	    adapter->no_tx_desc_avail1);
4866	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4867	    adapter->no_tx_desc_avail2);
4868	device_printf(dev, "Std mbuf failed = %ld\n",
4869	    adapter->mbuf_alloc_failed);
4870	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4871	    adapter->mbuf_cluster_failed);
4872	device_printf(dev, "Driver dropped packets = %ld\n",
4873	    adapter->dropped_pkts);
4874	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4875		adapter->no_tx_dma_setup);
4876}
4877
4878static void
4879em_print_hw_stats(struct adapter *adapter)
4880{
4881	device_t dev = adapter->dev;
4882
4883	device_printf(dev, "Excessive collisions = %lld\n",
4884	    (long long)adapter->stats.ecol);
4885#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4886	device_printf(dev, "Symbol errors = %lld\n",
4887	    (long long)adapter->stats.symerrs);
4888#endif
4889	device_printf(dev, "Sequence errors = %lld\n",
4890	    (long long)adapter->stats.sec);
4891	device_printf(dev, "Defer count = %lld\n",
4892	    (long long)adapter->stats.dc);
4893	device_printf(dev, "Missed Packets = %lld\n",
4894	    (long long)adapter->stats.mpc);
4895	device_printf(dev, "Receive No Buffers = %lld\n",
4896	    (long long)adapter->stats.rnbc);
4897	/* RLEC is inaccurate on some hardware, calculate our own. */
4898	device_printf(dev, "Receive Length Errors = %lld\n",
4899	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4900	device_printf(dev, "Receive errors = %lld\n",
4901	    (long long)adapter->stats.rxerrc);
4902	device_printf(dev, "Crc errors = %lld\n",
4903	    (long long)adapter->stats.crcerrs);
4904	device_printf(dev, "Alignment errors = %lld\n",
4905	    (long long)adapter->stats.algnerrc);
4906	device_printf(dev, "Carrier extension errors = %lld\n",
4907	    (long long)adapter->stats.cexterr);
4908	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4909	device_printf(dev, "watchdog timeouts = %ld\n",
4910	    adapter->watchdog_events);
4911	device_printf(dev, "XON Rcvd = %lld\n",
4912	    (long long)adapter->stats.xonrxc);
4913	device_printf(dev, "XON Xmtd = %lld\n",
4914	    (long long)adapter->stats.xontxc);
4915	device_printf(dev, "XOFF Rcvd = %lld\n",
4916	    (long long)adapter->stats.xoffrxc);
4917	device_printf(dev, "XOFF Xmtd = %lld\n",
4918	    (long long)adapter->stats.xofftxc);
4919	device_printf(dev, "Good Packets Rcvd = %lld\n",
4920	    (long long)adapter->stats.gprc);
4921	device_printf(dev, "Good Packets Xmtd = %lld\n",
4922	    (long long)adapter->stats.gptc);
4923	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4924	    (long long)adapter->stats.tsctc);
4925	device_printf(dev, "TSO Contexts Failed = %lld\n",
4926	    (long long)adapter->stats.tsctfc);
4927}
4928
4929static int
4930em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4931{
4932	struct adapter *adapter;
4933	int error;
4934	int result;
4935
4936	result = -1;
4937	error = sysctl_handle_int(oidp, &result, 0, req);
4938
4939	if (error || !req->newptr)
4940		return (error);
4941
4942	if (result == 1) {
4943		adapter = (struct adapter *)arg1;
4944		em_print_debug_info(adapter);
4945	}
4946
4947	return (error);
4948}
4949
4950
4951static int
4952em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4953{
4954	struct adapter *adapter;
4955	int error;
4956	int result;
4957
4958	result = -1;
4959	error = sysctl_handle_int(oidp, &result, 0, req);
4960
4961	if (error || !req->newptr)
4962		return (error);
4963
4964	if (result == 1) {
4965		adapter = (struct adapter *)arg1;
4966		em_print_hw_stats(adapter);
4967	}
4968
4969	return (error);
4970}
4971
4972static int
4973em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4974{
4975	struct em_int_delay_info *info;
4976	struct adapter *adapter;
4977	uint32_t regval;
4978	int error;
4979	int usecs;
4980	int ticks;
4981
4982	info = (struct em_int_delay_info *)arg1;
4983	usecs = info->value;
4984	error = sysctl_handle_int(oidp, &usecs, 0, req);
4985	if (error != 0 || req->newptr == NULL)
4986		return (error);
4987	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4988		return (EINVAL);
4989	info->value = usecs;
4990	ticks = EM_USECS_TO_TICKS(usecs);
4991
4992	adapter = info->adapter;
4993
4994	EM_LOCK(adapter);
4995	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4996	regval = (regval & ~0xffff) | (ticks & 0xffff);
4997	/* Handle a few special cases. */
4998	switch (info->offset) {
4999	case E1000_RDTR:
5000		break;
5001	case E1000_TIDV:
5002		if (ticks == 0) {
5003			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5004			/* Don't write 0 into the TIDV register. */
5005			regval++;
5006		} else
5007			if (adapter->hw.mac.type != e1000_82575)
5008				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5009		break;
5010	}
5011	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5012	EM_UNLOCK(adapter);
5013	return (0);
5014}
5015
5016static void
5017em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5018	const char *description, struct em_int_delay_info *info,
5019	int offset, int value)
5020{
5021	info->adapter = adapter;
5022	info->offset = offset;
5023	info->value = value;
5024	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5025	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5026	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5027	    info, 0, em_sysctl_int_delay, "I", description);
5028}
5029
5030#ifndef DEVICE_POLLING
5031static void
5032em_add_rx_process_limit(struct adapter *adapter, const char *name,
5033	const char *description, int *limit, int value)
5034{
5035	*limit = value;
5036	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5037	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5038	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5039}
5040#endif
5041