if_em.c revision 169589
1/**************************************************************************
2
3Copyright (c) 2001-2007, Intel Corporation
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34/*$FreeBSD: head/sys/dev/em/if_em.c 169589 2007-05-16 00:14:23Z jfv $*/
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54
55#include <machine/bus.h>
56#include <machine/resource.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64
65#include <net/if_types.h>
66#include <net/if_vlan_var.h>
67
68#include <netinet/in_systm.h>
69#include <netinet/in.h>
70#include <netinet/if_ether.h>
71#include <netinet/ip.h>
72#include <netinet/ip6.h>
73#include <netinet/tcp.h>
74#include <netinet/udp.h>
75
76#include <machine/in_cksum.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pcireg.h>
79
80#include "e1000_api.h"
81#include "e1000_82575.h"
82#include "if_em.h"
83
84/*********************************************************************
85 *  Set this to one to display debug statistics
86 *********************************************************************/
87int	em_display_debug_stats = 0;
88
89/*********************************************************************
90 *  Driver version:
91 *********************************************************************/
92char em_driver_version[] = "Version - 6.5.2";
93
94
95/*********************************************************************
96 *  PCI Device ID Table
97 *
98 *  Used by probe to select devices to load on
99 *  Last field stores an index into e1000_strings
100 *  Last entry must be all 0s
101 *
102 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
103 *********************************************************************/
104
105static em_vendor_info_t em_vendor_info_array[] =
106{
107	/* Intel(R) PRO/1000 Network Connection */
108	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
110	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
111	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
112	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
113
114	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
121
122	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
123
124	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126
127	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137
138	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148
149	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
152
153	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
166
167	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
171						PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
173						PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
175						PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
177						PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
185
186	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
191
192	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_82575EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_82575EM_FIBER_SERDES,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	/* required last entry */
201	{ 0, 0, 0, 0, 0}
202};
203
204/*********************************************************************
205 *  Table of branding strings for all supported NICs.
206 *********************************************************************/
207
208static char *em_strings[] = {
209	"Intel(R) PRO/1000 Network Connection"
210};
211
212/*********************************************************************
213 *  Function prototypes
214 *********************************************************************/
215static int	em_probe(device_t);
216static int	em_attach(device_t);
217static int	em_detach(device_t);
218static int	em_shutdown(device_t);
219static int	em_suspend(device_t);
220static int	em_resume(device_t);
221static void	em_start(struct ifnet *);
222static void	em_start_locked(struct ifnet *ifp);
223static int	em_ioctl(struct ifnet *, u_long, caddr_t);
224static void	em_watchdog(struct adapter *);
225static void	em_init(void *);
226static void	em_init_locked(struct adapter *);
227static void	em_stop(void *);
228static void	em_media_status(struct ifnet *, struct ifmediareq *);
229static int	em_media_change(struct ifnet *);
230static void	em_identify_hardware(struct adapter *);
231static int	em_allocate_pci_resources(struct adapter *);
232static int	em_allocate_intr(struct adapter *);
233static void	em_free_intr(struct adapter *);
234static void	em_free_pci_resources(struct adapter *);
235static void	em_local_timer(void *);
236static int	em_hardware_init(struct adapter *);
237static void	em_setup_interface(device_t, struct adapter *);
238static int	em_setup_transmit_structures(struct adapter *);
239static void	em_initialize_transmit_unit(struct adapter *);
240static int	em_setup_receive_structures(struct adapter *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_enable_intr(struct adapter *);
243static void	em_disable_intr(struct adapter *);
244static void	em_free_transmit_structures(struct adapter *);
245static void	em_free_receive_structures(struct adapter *);
246static void	em_update_stats_counters(struct adapter *);
247static void	em_txeof(struct adapter *);
248static int	em_allocate_receive_structures(struct adapter *);
249static int	em_allocate_transmit_structures(struct adapter *);
250static int	em_rxeof(struct adapter *, int);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct adapter *);
253#endif
254static void	em_receive_checksum(struct adapter *, struct e1000_rx_desc *,
255		    struct mbuf *);
256static void	em_transmit_checksum_setup(struct adapter *, struct mbuf *,
257		    uint32_t *, uint32_t *);
258static boolean_t em_tx_adv_ctx_setup(struct adapter *, struct mbuf *);
259static boolean_t em_tso_setup(struct adapter *, struct mbuf *, uint32_t *,
260		    uint32_t *);
261static boolean_t em_tso_adv_setup(struct adapter *, struct mbuf *, uint32_t *);
262static void	em_set_promisc(struct adapter *);
263static void	em_disable_promisc(struct adapter *);
264static void	em_set_multi(struct adapter *);
265static void	em_print_hw_stats(struct adapter *);
266static void	em_update_link_status(struct adapter *);
267static int	em_get_buf(struct adapter *, int);
268static void	em_enable_vlans(struct adapter *);
269static int	em_encap(struct adapter *, struct mbuf **);
270static int	em_adv_encap(struct adapter *, struct mbuf **);
271static void	em_smartspeed(struct adapter *);
272static int	em_82547_fifo_workaround(struct adapter *, int);
273static void	em_82547_update_fifo_head(struct adapter *, int);
274static int	em_82547_tx_fifo_reset(struct adapter *);
275static void	em_82547_move_tail(void *);
276static int	em_dma_malloc(struct adapter *, bus_size_t,
277		    struct em_dma_alloc *, int);
278static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
279static void	em_print_debug_info(struct adapter *);
280static int 	em_is_valid_ether_addr(uint8_t *);
281static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
282static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
283static uint32_t	em_fill_descriptors (bus_addr_t address, uint32_t length,
284		    PDESC_ARRAY desc_array);
285static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
286static void	em_add_int_delay_sysctl(struct adapter *, const char *,
287		    const char *, struct em_int_delay_info *, int, int);
288/* Management and WOL Support */
289static void	em_init_manageability(struct adapter *);
290static void	em_release_manageability(struct adapter *);
291static void     em_get_hw_control(struct adapter *);
292static void     em_release_hw_control(struct adapter *);
293static void     em_enable_wakeup(device_t);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297static void	em_intr(void *);
298#else
299static int	em_intr_fast(void *);
300static void	em_add_rx_process_limit(struct adapter *, const char *,
301		    const char *, int *, int);
302static void	em_handle_rxtx(void *context, int pending);
303static void	em_handle_link(void *context, int pending);
304#endif
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	{0, 0}
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325static devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
341static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
342static int em_rxd = EM_DEFAULT_RXD;
343static int em_txd = EM_DEFAULT_TXD;
344static int em_smart_pwr_down = FALSE;
345
346TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
347TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
348TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
349TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rxd", &em_rxd);
351TUNABLE_INT("hw.em.txd", &em_txd);
352TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
353#ifndef DEVICE_POLLING
354/* How many packets rxeof tries to clean at a time */
355static int em_rx_process_limit = 100;
356TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
357#endif
358/* Global used in WOL setup with multiport cards */
359static int global_quad_port_a = 0;
360
361/*********************************************************************
362 *  Device identification routine
363 *
364 *  em_probe determines if the driver should be loaded on
365 *  adapter based on PCI vendor/device id of the adapter.
366 *
367 *  return BUS_PROBE_DEFAULT on success, positive on failure
368 *********************************************************************/
369
370static int
371em_probe(device_t dev)
372{
373	char		adapter_name[60];
374	uint16_t	pci_vendor_id = 0;
375	uint16_t	pci_device_id = 0;
376	uint16_t	pci_subvendor_id = 0;
377	uint16_t	pci_subdevice_id = 0;
378	em_vendor_info_t *ent;
379
380	INIT_DEBUGOUT("em_probe: begin");
381
382	pci_vendor_id = pci_get_vendor(dev);
383	if (pci_vendor_id != EM_VENDOR_ID)
384		return (ENXIO);
385
386	pci_device_id = pci_get_device(dev);
387	pci_subvendor_id = pci_get_subvendor(dev);
388	pci_subdevice_id = pci_get_subdevice(dev);
389
390	ent = em_vendor_info_array;
391	while (ent->vendor_id != 0) {
392		if ((pci_vendor_id == ent->vendor_id) &&
393		    (pci_device_id == ent->device_id) &&
394
395		    ((pci_subvendor_id == ent->subvendor_id) ||
396		    (ent->subvendor_id == PCI_ANY_ID)) &&
397
398		    ((pci_subdevice_id == ent->subdevice_id) ||
399		    (ent->subdevice_id == PCI_ANY_ID))) {
400			sprintf(adapter_name, "%s %s",
401				em_strings[ent->index],
402				em_driver_version);
403			device_set_desc_copy(dev, adapter_name);
404			return (BUS_PROBE_DEFAULT);
405		}
406		ent++;
407	}
408
409	return (ENXIO);
410}
411
412/*********************************************************************
413 *  Device initialization routine
414 *
415 *  The attach entry point is called when the driver is being loaded.
416 *  This routine identifies the type of hardware, allocates all resources
417 *  and initializes the hardware.
418 *
419 *  return 0 on success, positive on failure
420 *********************************************************************/
421
422static int
423em_attach(device_t dev)
424{
425	struct adapter	*adapter;
426	int		tsize, rsize;
427	int		error = 0;
428	u16		eeprom_data, device_id;
429
430	INIT_DEBUGOUT("em_attach: begin");
431
432	adapter = device_get_softc(dev);
433	adapter->dev = adapter->osdep.dev = dev;
434	EM_LOCK_INIT(adapter, device_get_nameunit(dev));
435
436	/* SYSCTL stuff */
437	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
438	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
439	    OID_AUTO, "debug_info", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
440	    em_sysctl_debug_info, "I", "Debug Information");
441
442	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
443	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
444	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
445	    em_sysctl_stats, "I", "Statistics");
446
447	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
448	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
449
450	/* Determine hardware revision */
451	em_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (em_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/*
461	** For ICH8 and family we need to
462	** map the flash memory, and this
463	** must happen after the MAC is
464	** identified
465	*/
466	if ((adapter->hw.mac.type == e1000_ich8lan) ||
467	    (adapter->hw.mac.type == e1000_ich9lan)) {
468		int rid = EM_BAR_TYPE_FLASH;
469		adapter->flash_mem = bus_alloc_resource_any(dev,
470		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
471		/* This is used in the shared code */
472		adapter->hw.flash_address = (u8 *)adapter->flash_mem;
473		adapter->osdep.flash_bus_space_tag =
474		    rman_get_bustag(adapter->flash_mem);
475		adapter->osdep.flash_bus_space_handle =
476		    rman_get_bushandle(adapter->flash_mem);
477	}
478
479	/* Do Shared Code initialization */
480	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
481		device_printf(dev, "Setup of Shared code failed\n");
482		error = ENXIO;
483		goto err_pci;
484	}
485
486	e1000_get_bus_info(&adapter->hw);
487
488	/* Set up some sysctls for the tunable interrupt delays */
489	em_add_int_delay_sysctl(adapter, "rx_int_delay",
490	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
491	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
492	em_add_int_delay_sysctl(adapter, "tx_int_delay",
493	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
494	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
495	if (adapter->hw.mac.type >= e1000_82540) {
496		em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
497		    "receive interrupt delay limit in usecs",
498		    &adapter->rx_abs_int_delay,
499		    E1000_REGISTER(&adapter->hw, E1000_RADV),
500		    em_rx_abs_int_delay_dflt);
501		em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
502		    "transmit interrupt delay limit in usecs",
503		    &adapter->tx_abs_int_delay,
504		    E1000_REGISTER(&adapter->hw, E1000_TADV),
505		    em_tx_abs_int_delay_dflt);
506	}
507
508#ifndef DEVICE_POLLING
509	/* Sysctls for limiting the amount of work done in the taskqueue */
510	em_add_rx_process_limit(adapter, "rx_processing_limit",
511	    "max number of rx packets to process", &adapter->rx_process_limit,
512	    em_rx_process_limit);
513#endif
514
515	/*
516	 * Validate number of transmit and receive descriptors. It
517	 * must not exceed hardware maximum, and must be multiple
518	 * of E1000_DBA_ALIGN.
519	 */
520	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
521	    (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) ||
522	    (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) ||
523	    (em_txd < EM_MIN_TXD)) {
524		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
525		    EM_DEFAULT_TXD, em_txd);
526		adapter->num_tx_desc = EM_DEFAULT_TXD;
527	} else
528		adapter->num_tx_desc = em_txd;
529	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
530	    (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) ||
531	    (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) ||
532	    (em_rxd < EM_MIN_RXD)) {
533		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
534		    EM_DEFAULT_RXD, em_rxd);
535		adapter->num_rx_desc = EM_DEFAULT_RXD;
536	} else
537		adapter->num_rx_desc = em_rxd;
538
539	adapter->hw.mac.autoneg = DO_AUTO_NEG;
540	adapter->hw.phy.wait_for_link = FALSE;
541	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
542	adapter->rx_buffer_len = 2048;
543
544	e1000_init_script_state_82541(&adapter->hw, TRUE);
545	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
546
547	/* Copper options */
548	if (adapter->hw.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the max frame size assuming standard ethernet
556	 * sized frames.
557	 */
558	adapter->hw.mac.max_frame_size =
559	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
560
561	adapter->hw.mac.min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
562
563	/*
564	 * This controls when hardware reports transmit completion
565	 * status.
566	 */
567	adapter->hw.mac.report_tx_early = 1;
568
569	tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
570	    EM_DBA_ALIGN);
571
572	/* Allocate Transmit Descriptor ring */
573	if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
574		device_printf(dev, "Unable to allocate tx_desc memory\n");
575		error = ENOMEM;
576		goto err_tx_desc;
577	}
578	adapter->tx_desc_base =
579	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
580
581	rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
582	    EM_DBA_ALIGN);
583
584	/* Allocate Receive Descriptor ring */
585	if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
586		device_printf(dev, "Unable to allocate rx_desc memory\n");
587		error = ENOMEM;
588		goto err_rx_desc;
589	}
590	adapter->rx_desc_base =
591	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_hw_init;
605		}
606	}
607
608	if (e1000_read_part_num(&adapter->hw, &(adapter->part_num)) < 0) {
609		device_printf(dev, "EEPROM read error "
610		    "reading part number\n");
611		error = EIO;
612		goto err_hw_init;
613	}
614
615	/* Initialize the hardware */
616	if (em_hardware_init(adapter)) {
617		device_printf(dev, "Unable to initialize the hardware\n");
618		error = EIO;
619		goto err_hw_init;
620	}
621
622	/* Copy the permanent MAC address out of the EEPROM */
623	if (e1000_read_mac_addr(&adapter->hw) < 0) {
624		device_printf(dev, "EEPROM read error while reading MAC"
625		    " address\n");
626		error = EIO;
627		goto err_hw_init;
628	}
629
630	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
631		device_printf(dev, "Invalid MAC address\n");
632		error = EIO;
633		goto err_hw_init;
634	}
635
636	/* Setup OS specific network interface */
637	em_setup_interface(dev, adapter);
638
639	em_allocate_intr(adapter);
640
641	/* Initialize statistics */
642	em_update_stats_counters(adapter);
643
644	adapter->hw.mac.get_link_status = 1;
645	em_update_link_status(adapter);
646
647	/* Indicate SOL/IDER usage */
648	if (e1000_check_reset_block(&adapter->hw))
649		device_printf(dev,
650		    "PHY reset is blocked due to SOL/IDER session.\n");
651
652	/* Determine if we have to control management hardware */
653	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
654
655	/*
656	 * Setup Wake-on-Lan
657	 */
658	switch (adapter->hw.mac.type) {
659
660	case e1000_82542:
661	case e1000_82543:
662		break;
663	case e1000_82546:
664	case e1000_82546_rev_3:
665	case e1000_82571:
666	case e1000_80003es2lan:
667		if (adapter->hw.bus.func == 1)
668			e1000_read_nvm(&adapter->hw,
669			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
670		else
671			e1000_read_nvm(&adapter->hw,
672			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
673		eeprom_data &= EM_EEPROM_APME;
674		break;
675	default:
676		/* APME bit in EEPROM is mapped to WUC.APME */
677		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) &
678		    E1000_WUC_APME;
679		break;
680	}
681	if (eeprom_data)
682		adapter->wol = E1000_WUFC_MAG;
683	/*
684         * We have the eeprom settings, now apply the special cases
685         * where the eeprom may be wrong or the board won't support
686         * wake on lan on a particular port
687	 */
688	device_id = pci_get_device(dev);
689        switch (device_id) {
690	case E1000_DEV_ID_82546GB_PCIE:
691		adapter->wol = 0;
692		break;
693	case E1000_DEV_ID_82546EB_FIBER:
694	case E1000_DEV_ID_82546GB_FIBER:
695	case E1000_DEV_ID_82571EB_FIBER:
696		/* Wake events only supported on port A for dual fiber
697		 * regardless of eeprom setting */
698		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
699		    E1000_STATUS_FUNC_1)
700			adapter->wol = 0;
701		break;
702	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
703	case E1000_DEV_ID_82571EB_QUAD_COPPER:
704	case E1000_DEV_ID_82571EB_QUAD_FIBER:
705	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
706                /* if quad port adapter, disable WoL on all but port A */
707		if (global_quad_port_a != 0)
708			adapter->wol = 0;
709		/* Reset for multiple quad port adapters */
710		if (++global_quad_port_a == 4)
711			global_quad_port_a = 0;
712                break;
713	}
714
715	/* Do we need workaround for 82544 PCI-X adapter? */
716	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
717	    adapter->hw.mac.type == e1000_82544)
718		adapter->pcix_82544 = TRUE;
719	else
720		adapter->pcix_82544 = FALSE;
721
722	/* Get control from any management/hw control */
723	if (((adapter->hw.mac.type != e1000_82573) &&
724	    (adapter->hw.mac.type != e1000_ich8lan) &&
725	    (adapter->hw.mac.type != e1000_ich9lan)) ||
726	    !e1000_check_mng_mode(&adapter->hw))
727		em_get_hw_control(adapter);
728
729	/* Tell the stack that the interface is not active */
730	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
731
732	INIT_DEBUGOUT("em_attach: end");
733
734	return (0);
735
736err_hw_init:
737	em_release_hw_control(adapter);
738	e1000_remove_device(&adapter->hw);
739	em_dma_free(adapter, &adapter->rxdma);
740err_rx_desc:
741	em_dma_free(adapter, &adapter->txdma);
742err_tx_desc:
743err_pci:
744	em_free_intr(adapter);
745	em_free_pci_resources(adapter);
746	EM_LOCK_DESTROY(adapter);
747
748	return (error);
749}
750
751/*********************************************************************
752 *  Device removal routine
753 *
754 *  The detach entry point is called when the driver is being removed.
755 *  This routine stops the adapter and deallocates all the resources
756 *  that were allocated for driver operation.
757 *
758 *  return 0 on success, positive on failure
759 *********************************************************************/
760
761static int
762em_detach(device_t dev)
763{
764	struct adapter	*adapter = device_get_softc(dev);
765	struct ifnet	*ifp = adapter->ifp;
766
767	INIT_DEBUGOUT("em_detach: begin");
768
769#ifdef DEVICE_POLLING
770	if (ifp->if_capenable & IFCAP_POLLING)
771		ether_poll_deregister(ifp);
772#endif
773
774	em_disable_intr(adapter);
775	em_free_intr(adapter);
776	EM_LOCK(adapter);
777	adapter->in_detach = 1;
778	em_stop(adapter);
779	e1000_phy_hw_reset(&adapter->hw);
780
781	em_release_manageability(adapter);
782	if (((adapter->hw.mac.type != e1000_82573) &&
783	    (adapter->hw.mac.type != e1000_ich8lan) &&
784	    (adapter->hw.mac.type != e1000_ich9lan)) ||
785	    !e1000_check_mng_mode(&adapter->hw))
786		em_release_hw_control(adapter);
787	if (adapter->wol) {
788		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
789		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
790		em_enable_wakeup(dev);
791	}
792
793	EM_UNLOCK(adapter);
794	ether_ifdetach(adapter->ifp);
795
796	callout_drain(&adapter->timer);
797	callout_drain(&adapter->tx_fifo_timer);
798
799	em_free_pci_resources(adapter);
800	bus_generic_detach(dev);
801	if_free(ifp);
802
803	e1000_remove_device(&adapter->hw);
804	em_free_transmit_structures(adapter);
805	em_free_receive_structures(adapter);
806
807	/* Free Transmit Descriptor ring */
808	if (adapter->tx_desc_base) {
809		em_dma_free(adapter, &adapter->txdma);
810		adapter->tx_desc_base = NULL;
811	}
812
813	/* Free Receive Descriptor ring */
814	if (adapter->rx_desc_base) {
815		em_dma_free(adapter, &adapter->rxdma);
816		adapter->rx_desc_base = NULL;
817	}
818
819	EM_LOCK_DESTROY(adapter);
820
821	return (0);
822}
823
824/*********************************************************************
825 *
826 *  Shutdown entry point
827 *
828 **********************************************************************/
829
830static int
831em_shutdown(device_t dev)
832{
833	return em_suspend(dev);
834}
835
836/*
837 * Suspend/resume device methods.
838 */
839static int
840em_suspend(device_t dev)
841{
842	struct adapter *adapter = device_get_softc(dev);
843
844	EM_LOCK(adapter);
845	em_stop(adapter);
846
847        em_release_manageability(adapter);
848        if (((adapter->hw.mac.type != e1000_82573) &&
849            (adapter->hw.mac.type != e1000_ich8lan) &&
850            (adapter->hw.mac.type != e1000_ich9lan)) ||
851            !e1000_check_mng_mode(&adapter->hw))
852                em_release_hw_control(adapter);
853        if (adapter->wol) {
854                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
855                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
856                em_enable_wakeup(dev);
857        }
858
859	EM_UNLOCK(adapter);
860
861	return bus_generic_suspend(dev);
862}
863
864static int
865em_resume(device_t dev)
866{
867	struct adapter *adapter = device_get_softc(dev);
868	struct ifnet *ifp = adapter->ifp;
869
870	EM_LOCK(adapter);
871	em_init_locked(adapter);
872
873        /* Get control from any management/hw control */
874	if (((adapter->hw.mac.type != e1000_82573) &&
875	    (adapter->hw.mac.type != e1000_ich8lan) &&
876	    (adapter->hw.mac.type != e1000_ich9lan)) ||
877	    !e1000_check_mng_mode(&adapter->hw))
878		em_get_hw_control(adapter);
879	em_init_manageability(adapter);
880
881	if ((ifp->if_flags & IFF_UP) &&
882	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
883		em_start_locked(ifp);
884
885	EM_UNLOCK(adapter);
886
887	return bus_generic_resume(dev);
888}
889
890
891/*********************************************************************
892 *  Transmit entry point
893 *
894 *  em_start is called by the stack to initiate a transmit.
895 *  The driver will remain in this routine as long as there are
896 *  packets to transmit and transmit resources are available.
897 *  In case resources are not available stack is notified and
898 *  the packet is requeued.
899 **********************************************************************/
900
901static void
902em_start_locked(struct ifnet *ifp)
903{
904	struct adapter	*adapter = ifp->if_softc;
905	struct mbuf	*m_head;
906
907	EM_LOCK_ASSERT(adapter);
908
909	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
910	    IFF_DRV_RUNNING)
911		return;
912	if (!adapter->link_active)
913		return;
914
915	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
916
917		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
918		if (m_head == NULL)
919			break;
920		/*
921		 *  Encapsulation can modify our pointer, and or make it
922		 *  NULL on failure.  In that event, we can't requeue.
923		 *
924		 *  We now use a pointer to accomodate legacy and
925		 *  advanced transmit functions.
926		 */
927		if (adapter->em_xmit(adapter, &m_head)) {
928			if (m_head == NULL)
929				break;
930			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
931			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
932			break;
933		}
934
935		/* Send a copy of the frame to the BPF listener */
936		ETHER_BPF_MTAP(ifp, m_head);
937
938		/* Set timeout in case hardware has problems transmitting. */
939		adapter->watchdog_timer = EM_TX_TIMEOUT;
940	}
941}
942
943static void
944em_start(struct ifnet *ifp)
945{
946	struct adapter *adapter = ifp->if_softc;
947
948	EM_LOCK(adapter);
949	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
950		em_start_locked(ifp);
951	EM_UNLOCK(adapter);
952}
953
954/*********************************************************************
955 *  Ioctl entry point
956 *
957 *  em_ioctl is called when the user wants to configure the
958 *  interface.
959 *
960 *  return 0 on success, positive on failure
961 **********************************************************************/
962
963static int
964em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
965{
966	struct adapter	*adapter = ifp->if_softc;
967	struct ifreq *ifr = (struct ifreq *)data;
968	struct ifaddr *ifa = (struct ifaddr *)data;
969	int error = 0;
970
971	if (adapter->in_detach)
972		return (error);
973
974	switch (command) {
975	case SIOCSIFADDR:
976	case SIOCGIFADDR:
977		if (ifa->ifa_addr->sa_family == AF_INET) {
978			/*
979			 * XXX
980			 * Since resetting hardware takes a very long time
981			 * and results in link renegotiation we only
982			 * initialize the hardware only when it is absolutely
983			 * required.
984			 */
985			ifp->if_flags |= IFF_UP;
986			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
987				EM_LOCK(adapter);
988				em_init_locked(adapter);
989				EM_UNLOCK(adapter);
990			}
991			arp_ifinit(ifp, ifa);
992		} else
993			error = ether_ioctl(ifp, command, data);
994		break;
995	case SIOCSIFMTU:
996	    {
997		int max_frame_size;
998		uint16_t eeprom_data = 0;
999
1000		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1001
1002		EM_LOCK(adapter);
1003		switch (adapter->hw.mac.type) {
1004		case e1000_82573:
1005			/*
1006			 * 82573 only supports jumbo frames
1007			 * if ASPM is disabled.
1008			 */
1009			e1000_read_nvm(&adapter->hw,
1010			    NVM_INIT_3GIO_3, 1, &eeprom_data);
1011			if (eeprom_data & NVM_WORD1A_ASPM_MASK) {
1012				max_frame_size = ETHER_MAX_LEN;
1013				break;
1014			}
1015			/* Allow Jumbo frames - fall thru */
1016		case e1000_82571:
1017		case e1000_82572:
1018		case e1000_ich9lan:
1019		case e1000_82575:
1020		case e1000_80003es2lan:	/* Limit Jumbo Frame size */
1021			max_frame_size = 9234;
1022			break;
1023		case e1000_ich8lan:
1024			/* ICH8 does not support jumbo frames */
1025			max_frame_size = ETHER_MAX_LEN;
1026			break;
1027		default:
1028			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1029		}
1030		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1031		    ETHER_CRC_LEN) {
1032			EM_UNLOCK(adapter);
1033			error = EINVAL;
1034			break;
1035		}
1036
1037		ifp->if_mtu = ifr->ifr_mtu;
1038		adapter->hw.mac.max_frame_size =
1039		ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1040		em_init_locked(adapter);
1041		EM_UNLOCK(adapter);
1042		break;
1043	    }
1044	case SIOCSIFFLAGS:
1045		IOCTL_DEBUGOUT("ioctl rcv'd:\
1046		    SIOCSIFFLAGS (Set Interface Flags)");
1047		EM_LOCK(adapter);
1048		if (ifp->if_flags & IFF_UP) {
1049			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1050				if ((ifp->if_flags ^ adapter->if_flags) &
1051				    IFF_PROMISC) {
1052					em_disable_promisc(adapter);
1053					em_set_promisc(adapter);
1054				}
1055			} else
1056				em_init_locked(adapter);
1057		} else
1058			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1059				em_stop(adapter);
1060		adapter->if_flags = ifp->if_flags;
1061		EM_UNLOCK(adapter);
1062		break;
1063	case SIOCADDMULTI:
1064	case SIOCDELMULTI:
1065		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1066		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1067			EM_LOCK(adapter);
1068			em_disable_intr(adapter);
1069			em_set_multi(adapter);
1070			if (adapter->hw.mac.type == e1000_82542 &&
1071	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1072				em_initialize_receive_unit(adapter);
1073			}
1074#ifdef DEVICE_POLLING
1075			if (!(ifp->if_capenable & IFCAP_POLLING))
1076#endif
1077				em_enable_intr(adapter);
1078			EM_UNLOCK(adapter);
1079		}
1080		break;
1081	case SIOCSIFMEDIA:
1082		/* Check SOL/IDER usage */
1083		EM_LOCK(adapter);
1084		if (e1000_check_reset_block(&adapter->hw)) {
1085			EM_UNLOCK(adapter);
1086			device_printf(adapter->dev, "Media change is"
1087			    " blocked due to SOL/IDER session.\n");
1088			break;
1089		}
1090		EM_UNLOCK(adapter);
1091	case SIOCGIFMEDIA:
1092		IOCTL_DEBUGOUT("ioctl rcv'd: \
1093		    SIOCxIFMEDIA (Get/Set Interface Media)");
1094		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1095		break;
1096	case SIOCSIFCAP:
1097	    {
1098		int mask, reinit;
1099
1100		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1101		reinit = 0;
1102		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1103#ifdef DEVICE_POLLING
1104		if (mask & IFCAP_POLLING) {
1105			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1106				error = ether_poll_register(em_poll, ifp);
1107				if (error)
1108					return (error);
1109				EM_LOCK(adapter);
1110				em_disable_intr(adapter);
1111				ifp->if_capenable |= IFCAP_POLLING;
1112				EM_UNLOCK(adapter);
1113			} else {
1114				error = ether_poll_deregister(ifp);
1115				/* Enable interrupt even in error case */
1116				EM_LOCK(adapter);
1117				em_enable_intr(adapter);
1118				ifp->if_capenable &= ~IFCAP_POLLING;
1119				EM_UNLOCK(adapter);
1120			}
1121		}
1122#endif
1123		if (mask & IFCAP_HWCSUM) {
1124			ifp->if_capenable ^= IFCAP_HWCSUM;
1125			reinit = 1;
1126		}
1127		if (mask & IFCAP_TSO4) {
1128			ifp->if_capenable ^= IFCAP_TSO4;
1129			reinit = 1;
1130		}
1131		if (mask & IFCAP_VLAN_HWTAGGING) {
1132			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1133			reinit = 1;
1134		}
1135		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1136			em_init(adapter);
1137		VLAN_CAPABILITIES(ifp);
1138		break;
1139	    }
1140	default:
1141		error = ether_ioctl(ifp, command, data);
1142		break;
1143	}
1144
1145	return (error);
1146}
1147
1148/*********************************************************************
1149 *  Watchdog timer:
1150 *
1151 *  This routine is called from the local timer every second.
1152 *  As long as transmit descriptors are being cleaned the value
1153 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1154 *  and we then reset the device.
1155 *
1156 **********************************************************************/
1157
1158static void
1159em_watchdog(struct adapter *adapter)
1160{
1161
1162	EM_LOCK_ASSERT(adapter);
1163
1164	/*
1165	** The timer is set to 5 every time start queues a packet.
1166	** Then txeof keeps resetting to 5 as long as it cleans at
1167	** least one descriptor.
1168	** Finally, anytime all descriptors are clean the timer is
1169	** set to 0.
1170	*/
1171	if (adapter->watchdog_timer == 0 || --adapter->watchdog_timer)
1172		return;
1173
1174	/* If we are in this routine because of pause frames, then
1175	 * don't reset the hardware.
1176	 */
1177	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1178	    E1000_STATUS_TXOFF) {
1179		adapter->watchdog_timer = EM_TX_TIMEOUT;
1180		return;
1181	}
1182
1183	if (e1000_check_for_link(&adapter->hw) == 0)
1184		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1185	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1186	adapter->watchdog_events++;
1187
1188	em_init_locked(adapter);
1189}
1190
1191/*********************************************************************
1192 *  Init entry point
1193 *
1194 *  This routine is used in two ways. It is used by the stack as
1195 *  init entry point in network interface structure. It is also used
1196 *  by the driver as a hw/sw initialization routine to get to a
1197 *  consistent state.
1198 *
1199 *  return 0 on success, positive on failure
1200 **********************************************************************/
1201
1202static void
1203em_init_locked(struct adapter *adapter)
1204{
1205	struct ifnet	*ifp = adapter->ifp;
1206	device_t	dev = adapter->dev;
1207	uint32_t	pba;
1208
1209	INIT_DEBUGOUT("em_init: begin");
1210
1211	EM_LOCK_ASSERT(adapter);
1212
1213	em_stop(adapter);
1214
1215	/*
1216	 * Packet Buffer Allocation (PBA)
1217	 * Writing PBA sets the receive portion of the buffer
1218	 * the remainder is used for the transmit buffer.
1219	 *
1220	 * Devices before the 82547 had a Packet Buffer of 64K.
1221	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1222	 * After the 82547 the buffer was reduced to 40K.
1223	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1224	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1225	 */
1226	switch (adapter->hw.mac.type) {
1227	case e1000_82547:
1228	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1229		if (adapter->hw.mac.max_frame_size > 8192)
1230			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1231		else
1232			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1233		adapter->tx_fifo_head = 0;
1234		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1235		adapter->tx_fifo_size =
1236		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1237		break;
1238	/* Total Packet Buffer on these is 48K */
1239	case e1000_82571:
1240	case e1000_82572:
1241	case e1000_82575:
1242	case e1000_80003es2lan:
1243			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1244		break;
1245	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1246			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1247		break;
1248	case e1000_ich9lan:
1249#define E1000_PBA_10K	0x000A
1250		pba = E1000_PBA_10K;
1251		break;
1252	case e1000_ich8lan:
1253		pba = E1000_PBA_8K;
1254		break;
1255	default:
1256		/* Devices before 82547 had a Packet Buffer of 64K.   */
1257		if (adapter->hw.mac.max_frame_size > 8192)
1258			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1259		else
1260			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1261	}
1262
1263	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1264	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1265
1266	/* Get the latest mac address, User can use a LAA */
1267        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1268              ETHER_ADDR_LEN);
1269
1270	/* Put the address into the Receive Address Array */
1271	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1272
1273	/*
1274	 * With 82571 controllers, LAA may be overwritten
1275	 * due to controller reset from the other port.
1276	 */
1277	if (adapter->hw.mac.type == e1000_82571)
1278                e1000_set_laa_state_82571(&adapter->hw, TRUE);
1279
1280	/* Initialize the hardware */
1281	if (em_hardware_init(adapter)) {
1282		device_printf(dev, "Unable to initialize the hardware\n");
1283		return;
1284	}
1285	em_update_link_status(adapter);
1286
1287	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1288		em_enable_vlans(adapter);
1289
1290	/* Set hardware offload abilities */
1291	ifp->if_hwassist = 0;
1292	if (adapter->hw.mac.type >= e1000_82543) {
1293		if (ifp->if_capenable & IFCAP_TXCSUM)
1294			ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1295		if (ifp->if_capenable & IFCAP_TSO4)
1296			ifp->if_hwassist |= CSUM_TSO;
1297	}
1298
1299	/* Configure for OS presence */
1300	em_init_manageability(adapter);
1301
1302	/* Prepare transmit descriptors and buffers */
1303	if (em_setup_transmit_structures(adapter)) {
1304		device_printf(dev, "Could not setup transmit structures\n");
1305		em_stop(adapter);
1306		return;
1307	}
1308	em_initialize_transmit_unit(adapter);
1309
1310	/* Setup Multicast table */
1311	em_set_multi(adapter);
1312
1313	/* Prepare receive descriptors and buffers */
1314	if (em_setup_receive_structures(adapter)) {
1315		device_printf(dev, "Could not setup receive structures\n");
1316		em_stop(adapter);
1317		return;
1318	}
1319	em_initialize_receive_unit(adapter);
1320
1321	/* Don't lose promiscuous settings */
1322	em_set_promisc(adapter);
1323
1324	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1325	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1326
1327	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1328	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1329
1330#ifdef DEVICE_POLLING
1331	/*
1332	 * Only enable interrupts if we are not polling, make sure
1333	 * they are off otherwise.
1334	 */
1335	if (ifp->if_capenable & IFCAP_POLLING)
1336		em_disable_intr(adapter);
1337	else
1338#endif /* DEVICE_POLLING */
1339		em_enable_intr(adapter);
1340
1341	/* Don't reset the phy next time init gets called */
1342	adapter->hw.phy.reset_disable = TRUE;
1343}
1344
1345static void
1346em_init(void *arg)
1347{
1348	struct adapter *adapter = arg;
1349
1350	EM_LOCK(adapter);
1351	em_init_locked(adapter);
1352	EM_UNLOCK(adapter);
1353}
1354
1355
1356#ifdef DEVICE_POLLING
1357/*********************************************************************
1358 *
1359 *  Legacy polling routine
1360 *
1361 *********************************************************************/
1362static void
1363em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1364{
1365	struct adapter *adapter = ifp->if_softc;
1366	uint32_t reg_icr;
1367
1368	EM_LOCK(adapter);
1369	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1370		EM_UNLOCK(adapter);
1371		return;
1372	}
1373
1374	if (cmd == POLL_AND_CHECK_STATUS) {
1375		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1376		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1377			callout_stop(&adapter->timer);
1378			adapter->hw.mac.get_link_status = 1;
1379			e1000_check_for_link(&adapter->hw);
1380			em_update_link_status(adapter);
1381			callout_reset(&adapter->timer, hz,
1382			    em_local_timer, adapter);
1383		}
1384	}
1385	em_rxeof(adapter, count);
1386	em_txeof(adapter);
1387
1388	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1389		em_start_locked(ifp);
1390	EM_UNLOCK(adapter);
1391}
1392
1393/*********************************************************************
1394 *
1395 *  Legacy Interrupt Service routine
1396 *
1397 *********************************************************************/
1398
1399static void
1400em_intr(void *arg)
1401{
1402	struct adapter	*adapter = arg;
1403	struct ifnet	*ifp;
1404	uint32_t	reg_icr;
1405
1406	EM_LOCK(adapter);
1407	ifp = adapter->ifp;
1408
1409	if (ifp->if_capenable & IFCAP_POLLING) {
1410		EM_UNLOCK(adapter);
1411		return;
1412	}
1413
1414	for (;;) {
1415		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1416
1417		if (adapter->hw.mac.type >= e1000_82571 &&
1418	    	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1419			break;
1420		else if (reg_icr == 0)
1421			break;
1422
1423		/*
1424		 * XXX: some laptops trigger several spurious interrupts
1425		 * on em(4) when in the resume cycle. The ICR register
1426		 * reports all-ones value in this case. Processing such
1427		 * interrupts would lead to a freeze. I don't know why.
1428		 */
1429		if (reg_icr == 0xffffffff)
1430			break;
1431
1432		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1433			em_rxeof(adapter, -1);
1434			em_txeof(adapter);
1435		}
1436
1437		/* Link status change */
1438		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1439			callout_stop(&adapter->timer);
1440			adapter->hw.mac.get_link_status = 1;
1441			e1000_check_for_link(&adapter->hw);
1442			em_update_link_status(adapter);
1443			callout_reset(&adapter->timer, hz,
1444			    em_local_timer, adapter);
1445		}
1446
1447		if (reg_icr & E1000_ICR_RXO)
1448			adapter->rx_overruns++;
1449	}
1450
1451	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1452	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1453		em_start_locked(ifp);
1454	EM_UNLOCK(adapter);
1455}
1456
1457#else /* if not DEVICE_POLLING, then fast interrupt routines only */
1458
1459static void
1460em_handle_link(void *context, int pending)
1461{
1462	struct adapter	*adapter = context;
1463	struct ifnet *ifp;
1464
1465	ifp = adapter->ifp;
1466
1467	EM_LOCK(adapter);
1468	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1469		EM_UNLOCK(adapter);
1470		return;
1471	}
1472
1473	callout_stop(&adapter->timer);
1474	adapter->hw.mac.get_link_status = 1;
1475	e1000_check_for_link(&adapter->hw);
1476	em_update_link_status(adapter);
1477	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1478	EM_UNLOCK(adapter);
1479}
1480
1481static void
1482em_handle_rxtx(void *context, int pending)
1483{
1484	struct adapter	*adapter = context;
1485	struct ifnet	*ifp;
1486
1487	NET_LOCK_GIANT();
1488	ifp = adapter->ifp;
1489
1490	/*
1491	 * TODO:
1492	 * It should be possible to run the tx clean loop without the lock.
1493	 */
1494	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1495		if (em_rxeof(adapter, adapter->rx_process_limit) != 0)
1496			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1497		EM_LOCK(adapter);
1498		em_txeof(adapter);
1499
1500		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1501			em_start_locked(ifp);
1502		EM_UNLOCK(adapter);
1503	}
1504
1505	em_enable_intr(adapter);
1506	NET_UNLOCK_GIANT();
1507}
1508
1509/*********************************************************************
1510 *
1511 *  Fast Interrupt Service routine
1512 *
1513 *********************************************************************/
1514static int
1515em_intr_fast(void *arg)
1516{
1517	struct adapter	*adapter = arg;
1518	struct ifnet	*ifp;
1519	uint32_t	reg_icr;
1520
1521	ifp = adapter->ifp;
1522
1523	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1524
1525	/* Hot eject?  */
1526	if (reg_icr == 0xffffffff)
1527		return (FILTER_STRAY);
1528
1529	/* Definitely not our interrupt.  */
1530	if (reg_icr == 0x0)
1531		return (FILTER_STRAY);
1532
1533	/*
1534	 * Starting with the 82571 chip, bit 31 should be used to
1535	 * determine whether the interrupt belongs to us.
1536	 */
1537	if (adapter->hw.mac.type >= e1000_82571 &&
1538	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1539		return (FILTER_STRAY);
1540
1541	/*
1542	 * Mask interrupts until the taskqueue is finished running.  This is
1543	 * cheap, just assume that it is needed.  This also works around the
1544	 * MSI message reordering errata on certain systems.
1545	 */
1546	em_disable_intr(adapter);
1547	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1548
1549	/* Link status change */
1550	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1551		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1552
1553	if (reg_icr & E1000_ICR_RXO)
1554		adapter->rx_overruns++;
1555	return (FILTER_HANDLED);
1556}
1557#endif /* ! DEVICE_POLLING */
1558
1559/*********************************************************************
1560 *
1561 *  Media Ioctl callback
1562 *
1563 *  This routine is called whenever the user queries the status of
1564 *  the interface using ifconfig.
1565 *
1566 **********************************************************************/
1567static void
1568em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1569{
1570	struct adapter *adapter = ifp->if_softc;
1571	u_char fiber_type = IFM_1000_SX;
1572
1573	INIT_DEBUGOUT("em_media_status: begin");
1574
1575	EM_LOCK(adapter);
1576	e1000_check_for_link(&adapter->hw);
1577	em_update_link_status(adapter);
1578
1579	ifmr->ifm_status = IFM_AVALID;
1580	ifmr->ifm_active = IFM_ETHER;
1581
1582	if (!adapter->link_active) {
1583		EM_UNLOCK(adapter);
1584		return;
1585	}
1586
1587	ifmr->ifm_status |= IFM_ACTIVE;
1588
1589	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
1590	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
1591		if (adapter->hw.mac.type == e1000_82545)
1592			fiber_type = IFM_1000_LX;
1593		ifmr->ifm_active |= fiber_type | IFM_FDX;
1594	} else {
1595		switch (adapter->link_speed) {
1596		case 10:
1597			ifmr->ifm_active |= IFM_10_T;
1598			break;
1599		case 100:
1600			ifmr->ifm_active |= IFM_100_TX;
1601			break;
1602		case 1000:
1603			ifmr->ifm_active |= IFM_1000_T;
1604			break;
1605		}
1606		if (adapter->link_duplex == FULL_DUPLEX)
1607			ifmr->ifm_active |= IFM_FDX;
1608		else
1609			ifmr->ifm_active |= IFM_HDX;
1610	}
1611	EM_UNLOCK(adapter);
1612}
1613
1614/*********************************************************************
1615 *
1616 *  Media Ioctl callback
1617 *
1618 *  This routine is called when the user changes speed/duplex using
1619 *  media/mediopt option with ifconfig.
1620 *
1621 **********************************************************************/
1622static int
1623em_media_change(struct ifnet *ifp)
1624{
1625	struct adapter *adapter = ifp->if_softc;
1626	struct ifmedia  *ifm = &adapter->media;
1627
1628	INIT_DEBUGOUT("em_media_change: begin");
1629
1630	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1631		return (EINVAL);
1632
1633	EM_LOCK(adapter);
1634	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1635	case IFM_AUTO:
1636		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1637		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1638		break;
1639	case IFM_1000_LX:
1640	case IFM_1000_SX:
1641	case IFM_1000_T:
1642		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1643		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1644		break;
1645	case IFM_100_TX:
1646		adapter->hw.mac.autoneg = FALSE;
1647		adapter->hw.phy.autoneg_advertised = 0;
1648		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1650		else
1651			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1652		break;
1653	case IFM_10_T:
1654		adapter->hw.mac.autoneg = FALSE;
1655		adapter->hw.phy.autoneg_advertised = 0;
1656		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1657			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1658		else
1659			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1660		break;
1661	default:
1662		device_printf(adapter->dev, "Unsupported media type\n");
1663	}
1664
1665	/* As the speed/duplex settings my have changed we need to
1666	 * reset the PHY.
1667	 */
1668	adapter->hw.phy.reset_disable = FALSE;
1669
1670	em_init_locked(adapter);
1671	EM_UNLOCK(adapter);
1672
1673	return (0);
1674}
1675
1676/*********************************************************************
1677 *
1678 *  This routine maps the mbufs to tx descriptors.
1679 *
1680 *  return 0 on success, positive on failure
1681 **********************************************************************/
1682
1683static int
1684em_encap(struct adapter *adapter, struct mbuf **m_headp)
1685{
1686	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1687	bus_dmamap_t		map;
1688	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1689	struct e1000_tx_desc	*ctxd = NULL;
1690	struct mbuf		*m_head;
1691	uint32_t		txd_upper, txd_lower, txd_used, txd_saved;
1692	int			nsegs, i, j, first, last = 0;
1693	int			error, do_tso, tso_desc = 0;
1694
1695	m_head = *m_headp;
1696	txd_upper = txd_lower = txd_used = txd_saved = 0;
1697
1698	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1699
1700        /*
1701         * Force a cleanup if number of TX descriptors
1702         * available hits the threshold
1703         */
1704	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1705		em_txeof(adapter);
1706		/* Now do we at least have a minimal? */
1707		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1708			adapter->no_tx_desc_avail1++;
1709			return (ENOBUFS);
1710		}
1711	}
1712
1713
1714	/*
1715	 * TSO workaround:
1716	 *  If an mbuf is only header we need
1717	 *     to pull 4 bytes of data into it.
1718	 */
1719	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1720		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1721		*m_headp = m_head;
1722		if (m_head == NULL)
1723			return (ENOBUFS);
1724	}
1725
1726	/*
1727	 * Map the packet for DMA
1728	 *
1729	 * Capture the first descriptor index,
1730	 * this descriptor will have the index
1731	 * of the EOP which is the only one that
1732	 * now gets a DONE bit writeback.
1733	 */
1734	first = adapter->next_avail_tx_desc;
1735	tx_buffer = &adapter->tx_buffer_area[first];
1736	tx_buffer_mapped = tx_buffer;
1737	map = tx_buffer->map;
1738
1739	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1740	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1741
1742	/*
1743	 * There are two types of errors we can (try) to handle:
1744	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1745	 *   out of segments.  Defragment the mbuf chain and try again.
1746	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1747	 *   at this point in time.  Defer sending and try again later.
1748	 * All other errors, in particular EINVAL, are fatal and prevent the
1749	 * mbuf chain from ever going through.  Drop it and report error.
1750	 */
1751	if (error == EFBIG) {
1752		struct mbuf *m;
1753
1754		m = m_defrag(*m_headp, M_DONTWAIT);
1755		if (m == NULL) {
1756			adapter->mbuf_alloc_failed++;
1757			m_freem(*m_headp);
1758			*m_headp = NULL;
1759			return (ENOBUFS);
1760		}
1761		*m_headp = m;
1762
1763		/* Try it again */
1764		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1765		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1766
1767		if (error == ENOMEM) {
1768			adapter->no_tx_dma_setup++;
1769			return (error);
1770		} else if (error != 0) {
1771			adapter->no_tx_dma_setup++;
1772			m_freem(*m_headp);
1773			*m_headp = NULL;
1774			return (error);
1775		}
1776	} else if (error == ENOMEM) {
1777		adapter->no_tx_dma_setup++;
1778		return (error);
1779	} else if (error != 0) {
1780		adapter->no_tx_dma_setup++;
1781		m_freem(*m_headp);
1782		*m_headp = NULL;
1783		return (error);
1784	}
1785
1786	/*
1787	 * TSO Hardware workaround, if this packet is not
1788	 * TSO, and is only a single descriptor long, and
1789	 * it follows a TSO burst, then we need to add a
1790	 * sentinel descriptor to prevent premature writeback.
1791	 */
1792	if ((do_tso == 0) && (adapter->tx_tso == TRUE)) {
1793		if (nsegs == 1)
1794			tso_desc = TRUE;
1795		adapter->tx_tso = FALSE;
1796	}
1797
1798        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
1799                adapter->no_tx_desc_avail2++;
1800		bus_dmamap_unload(adapter->txtag, map);
1801		return (ENOBUFS);
1802        }
1803	m_head = *m_headp;
1804
1805	/* Do hardware assists */
1806	if (em_tso_setup(adapter, m_head, &txd_upper, &txd_lower))
1807		/* we need to make a final sentinel transmit desc */
1808		tso_desc = TRUE;
1809	else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1810		em_transmit_checksum_setup(adapter,  m_head,
1811		    &txd_upper, &txd_lower);
1812
1813	i = adapter->next_avail_tx_desc;
1814	if (adapter->pcix_82544)
1815		txd_saved = i;
1816
1817	/* Set up our transmit descriptors */
1818	for (j = 0; j < nsegs; j++) {
1819		bus_size_t seg_len;
1820		bus_addr_t seg_addr;
1821		/* If adapter is 82544 and on PCIX bus */
1822		if(adapter->pcix_82544) {
1823			DESC_ARRAY	desc_array;
1824			uint32_t	array_elements, counter;
1825			/*
1826			 * Check the Address and Length combination and
1827			 * split the data accordingly
1828			 */
1829			array_elements = em_fill_descriptors(segs[j].ds_addr,
1830			    segs[j].ds_len, &desc_array);
1831			for (counter = 0; counter < array_elements; counter++) {
1832				if (txd_used == adapter->num_tx_desc_avail) {
1833					adapter->next_avail_tx_desc = txd_saved;
1834					adapter->no_tx_desc_avail2++;
1835					bus_dmamap_unload(adapter->txtag, map);
1836					return (ENOBUFS);
1837				}
1838				tx_buffer = &adapter->tx_buffer_area[i];
1839				ctxd = &adapter->tx_desc_base[i];
1840				ctxd->buffer_addr = htole64(
1841				    desc_array.descriptor[counter].address);
1842				ctxd->lower.data = htole32(
1843				    (adapter->txd_cmd | txd_lower | (uint16_t)
1844				    desc_array.descriptor[counter].length));
1845				ctxd->upper.data =
1846				    htole32((txd_upper));
1847				last = i;
1848				if (++i == adapter->num_tx_desc)
1849                                         i = 0;
1850				tx_buffer->m_head = NULL;
1851				tx_buffer->next_eop = -1;
1852				txd_used++;
1853                        }
1854		} else {
1855			tx_buffer = &adapter->tx_buffer_area[i];
1856			ctxd = &adapter->tx_desc_base[i];
1857			seg_addr = segs[j].ds_addr;
1858			seg_len  = segs[j].ds_len;
1859			/*
1860			** TSO Workaround:
1861			** If this is the last descriptor, we want to
1862			** split it so we have a small final sentinel
1863			*/
1864			if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1865				seg_len -= 4;
1866				ctxd->buffer_addr = htole64(seg_addr);
1867				ctxd->lower.data = htole32(
1868				adapter->txd_cmd | txd_lower | seg_len);
1869				ctxd->upper.data =
1870				    htole32(txd_upper);
1871				if (++i == adapter->num_tx_desc)
1872					i = 0;
1873				/* Now make the sentinel */
1874				++txd_used; /* using an extra txd */
1875				ctxd = &adapter->tx_desc_base[i];
1876				tx_buffer = &adapter->tx_buffer_area[i];
1877				ctxd->buffer_addr =
1878				    htole64(seg_addr + seg_len);
1879				ctxd->lower.data = htole32(
1880				adapter->txd_cmd | txd_lower | 4);
1881				ctxd->upper.data =
1882				    htole32(txd_upper);
1883				last = i;
1884				if (++i == adapter->num_tx_desc)
1885					i = 0;
1886			} else {
1887				ctxd->buffer_addr = seg_addr;
1888				ctxd->lower.data = htole32(
1889				adapter->txd_cmd | txd_lower | seg_len);
1890				ctxd->upper.data =
1891				    htole32(txd_upper);
1892				last = i;
1893				if (++i == adapter->num_tx_desc)
1894					i = 0;
1895			}
1896			tx_buffer->m_head = NULL;
1897			tx_buffer->next_eop = -1;
1898		}
1899	}
1900
1901	adapter->next_avail_tx_desc = i;
1902	if (adapter->pcix_82544)
1903		adapter->num_tx_desc_avail -= txd_used;
1904	else {
1905		adapter->num_tx_desc_avail -= nsegs;
1906		if (tso_desc) /* TSO used an extra for sentinel */
1907			adapter->num_tx_desc_avail -= txd_used;
1908	}
1909
1910	if (m_head->m_flags & M_VLANTAG) {
1911		/* Set the vlan id. */
1912		ctxd->upper.fields.special =
1913		    htole16(m_head->m_pkthdr.ether_vtag);
1914                /* Tell hardware to add tag */
1915                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1916        }
1917
1918        tx_buffer->m_head = m_head;
1919	tx_buffer_mapped->map = tx_buffer->map;
1920	tx_buffer->map = map;
1921        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1922
1923        /*
1924         * Last Descriptor of Packet
1925	 * needs End Of Packet (EOP)
1926	 * and Report Status (RS)
1927         */
1928        ctxd->lower.data |=
1929	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1930	/*
1931	 * Keep track in the first buffer which
1932	 * descriptor will be written back
1933	 */
1934	tx_buffer = &adapter->tx_buffer_area[first];
1935	tx_buffer->next_eop = last;
1936
1937	/*
1938	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1939	 * that this frame is available to transmit.
1940	 */
1941	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1942	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1943	if (adapter->hw.mac.type == e1000_82547 &&
1944	    adapter->link_duplex == HALF_DUPLEX)
1945		em_82547_move_tail(adapter);
1946	else {
1947		E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
1948		if (adapter->hw.mac.type == e1000_82547)
1949			em_82547_update_fifo_head(adapter,
1950			    m_head->m_pkthdr.len);
1951	}
1952
1953	return (0);
1954}
1955
1956/*********************************************************************
1957 *
1958 *  This routine maps the mbufs to Advanced TX descriptors.
1959 *  used by the 82575 adapter. It also needs no workarounds.
1960 *
1961 **********************************************************************/
1962
1963static int
1964em_adv_encap(struct adapter *adapter, struct mbuf **m_headp)
1965{
1966	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1967	bus_dmamap_t		map;
1968	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1969	union e1000_adv_tx_desc	*txd = NULL;
1970	struct mbuf		*m_head;
1971	u32			olinfo_status = 0, cmd_type_len = 0;
1972	u32			paylen = 0;
1973	int			nsegs, i, j, error, first, last = 0;
1974
1975	m_head = *m_headp;
1976
1977
1978	/* Set basic descriptor constants */
1979	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1980	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1981
1982        /*
1983         * Force a cleanup if number of TX descriptors
1984         * available hits the threshold
1985         */
1986	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
1987		em_txeof(adapter);
1988		/* Now do we at least have a minimal? */
1989		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
1990			adapter->no_tx_desc_avail1++;
1991			return (ENOBUFS);
1992		}
1993	}
1994
1995	/*
1996         * Map the packet for DMA.
1997	 *
1998	 * Capture the first descriptor index,
1999	 * this descriptor will have the index
2000	 * of the EOP which is the only one that
2001	 * now gets a DONE bit writeback.
2002	 */
2003	first = adapter->next_avail_tx_desc;
2004	tx_buffer = &adapter->tx_buffer_area[first];
2005	tx_buffer_mapped = tx_buffer;
2006	map = tx_buffer->map;
2007
2008	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2009	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2010
2011	if (error == EFBIG) {
2012		struct mbuf *m;
2013
2014		m = m_defrag(*m_headp, M_DONTWAIT);
2015		if (m == NULL) {
2016			adapter->mbuf_alloc_failed++;
2017			m_freem(*m_headp);
2018			*m_headp = NULL;
2019			return (ENOBUFS);
2020		}
2021		*m_headp = m;
2022
2023		/* Try it again */
2024		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
2025		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2026
2027		if (error == ENOMEM) {
2028			adapter->no_tx_dma_setup++;
2029			return (error);
2030		} else if (error != 0) {
2031			adapter->no_tx_dma_setup++;
2032			m_freem(*m_headp);
2033			*m_headp = NULL;
2034			return (error);
2035		}
2036	} else if (error == ENOMEM) {
2037		adapter->no_tx_dma_setup++;
2038		return (error);
2039	} else if (error != 0) {
2040		adapter->no_tx_dma_setup++;
2041		m_freem(*m_headp);
2042		*m_headp = NULL;
2043		return (error);
2044	}
2045
2046	/* Check again to be sure we have enough descriptors */
2047        if (nsegs > (adapter->num_tx_desc_avail - 2)) {
2048                adapter->no_tx_desc_avail2++;
2049		bus_dmamap_unload(adapter->txtag, map);
2050		return (ENOBUFS);
2051        }
2052	m_head = *m_headp;
2053
2054        /*
2055         * Set up the context descriptor:
2056         * used when any hardware offload is done.
2057	 * This includes CSUM, VLAN, and TSO. It
2058	 * will use the first descriptor.
2059         */
2060	/* First try TSO */
2061	if (em_tso_adv_setup(adapter, m_head, &paylen)) {
2062		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
2063		olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
2064		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2065		olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
2066	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
2067		if (em_tx_adv_ctx_setup(adapter, m_head))
2068			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2069	}
2070
2071	/* Set up our transmit descriptors */
2072	i = adapter->next_avail_tx_desc;
2073	for (j = 0; j < nsegs; j++) {
2074		bus_size_t seg_len;
2075		bus_addr_t seg_addr;
2076
2077		tx_buffer = &adapter->tx_buffer_area[i];
2078		txd = (union e1000_adv_tx_desc *)&adapter->tx_desc_base[i];
2079		seg_addr = segs[j].ds_addr;
2080		seg_len  = segs[j].ds_len;
2081
2082		txd->read.buffer_addr = htole64(seg_addr);
2083		txd->read.cmd_type_len = htole32(
2084		    adapter->txd_cmd | cmd_type_len | seg_len);
2085		txd->read.olinfo_status = htole32(olinfo_status);
2086		last = i;
2087		if (++i == adapter->num_tx_desc)
2088			i = 0;
2089		tx_buffer->m_head = NULL;
2090		tx_buffer->next_eop = -1;
2091	}
2092
2093	adapter->next_avail_tx_desc = i;
2094	adapter->num_tx_desc_avail -= nsegs;
2095
2096        tx_buffer->m_head = m_head;
2097	tx_buffer_mapped->map = tx_buffer->map;
2098	tx_buffer->map = map;
2099        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
2100
2101        /*
2102         * Last Descriptor of Packet
2103	 * needs End Of Packet (EOP)
2104	 * and Report Status (RS)
2105         */
2106        txd->read.cmd_type_len |=
2107	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2108	/*
2109	 * Keep track in the first buffer which
2110	 * descriptor will be written back
2111	 */
2112	tx_buffer = &adapter->tx_buffer_area[first];
2113	tx_buffer->next_eop = last;
2114
2115	/*
2116	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2117	 * that this frame is available to transmit.
2118	 */
2119	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2120	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2121	E1000_WRITE_REG(&adapter->hw, E1000_TDT, i);
2122
2123	return (0);
2124
2125}
2126
2127/*********************************************************************
2128 *
2129 * 82547 workaround to avoid controller hang in half-duplex environment.
2130 * The workaround is to avoid queuing a large packet that would span
2131 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
2132 * in this case. We do that only when FIFO is quiescent.
2133 *
2134 **********************************************************************/
2135static void
2136em_82547_move_tail(void *arg)
2137{
2138	struct adapter *adapter = arg;
2139	uint16_t hw_tdt;
2140	uint16_t sw_tdt;
2141	struct e1000_tx_desc *tx_desc;
2142	uint16_t length = 0;
2143	boolean_t eop = 0;
2144
2145	EM_LOCK_ASSERT(adapter);
2146
2147	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT);
2148	sw_tdt = adapter->next_avail_tx_desc;
2149
2150	while (hw_tdt != sw_tdt) {
2151		tx_desc = &adapter->tx_desc_base[hw_tdt];
2152		length += tx_desc->lower.flags.length;
2153		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
2154		if (++hw_tdt == adapter->num_tx_desc)
2155			hw_tdt = 0;
2156
2157		if (eop) {
2158			if (em_82547_fifo_workaround(adapter, length)) {
2159				adapter->tx_fifo_wrk_cnt++;
2160				callout_reset(&adapter->tx_fifo_timer, 1,
2161					em_82547_move_tail, adapter);
2162				break;
2163			}
2164			E1000_WRITE_REG(&adapter->hw, E1000_TDT, hw_tdt);
2165			em_82547_update_fifo_head(adapter, length);
2166			length = 0;
2167		}
2168	}
2169}
2170
2171static int
2172em_82547_fifo_workaround(struct adapter *adapter, int len)
2173{
2174	int fifo_space, fifo_pkt_len;
2175
2176	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2177
2178	if (adapter->link_duplex == HALF_DUPLEX) {
2179		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
2180
2181		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
2182			if (em_82547_tx_fifo_reset(adapter))
2183				return (0);
2184			else
2185				return (1);
2186		}
2187	}
2188
2189	return (0);
2190}
2191
2192static void
2193em_82547_update_fifo_head(struct adapter *adapter, int len)
2194{
2195	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
2196
2197	/* tx_fifo_head is always 16 byte aligned */
2198	adapter->tx_fifo_head += fifo_pkt_len;
2199	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
2200		adapter->tx_fifo_head -= adapter->tx_fifo_size;
2201	}
2202}
2203
2204
2205static int
2206em_82547_tx_fifo_reset(struct adapter *adapter)
2207{
2208	uint32_t tctl;
2209
2210	if ((E1000_READ_REG(&adapter->hw, E1000_TDT) ==
2211	    E1000_READ_REG(&adapter->hw, E1000_TDH)) &&
2212	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
2213	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
2214	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
2215	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
2216	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
2217		/* Disable TX unit */
2218		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2219		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
2220		    tctl & ~E1000_TCTL_EN);
2221
2222		/* Reset FIFO pointers */
2223		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
2224		    adapter->tx_head_addr);
2225		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
2226		    adapter->tx_head_addr);
2227		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
2228		    adapter->tx_head_addr);
2229		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
2230		    adapter->tx_head_addr);
2231
2232		/* Re-enable TX unit */
2233		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2234		E1000_WRITE_FLUSH(&adapter->hw);
2235
2236		adapter->tx_fifo_head = 0;
2237		adapter->tx_fifo_reset_cnt++;
2238
2239		return (TRUE);
2240	}
2241	else {
2242		return (FALSE);
2243	}
2244}
2245
2246static void
2247em_set_promisc(struct adapter *adapter)
2248{
2249	struct ifnet	*ifp = adapter->ifp;
2250	uint32_t	reg_rctl;
2251
2252	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2253
2254	if (ifp->if_flags & IFF_PROMISC) {
2255		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2256		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2257	} else if (ifp->if_flags & IFF_ALLMULTI) {
2258		reg_rctl |= E1000_RCTL_MPE;
2259		reg_rctl &= ~E1000_RCTL_UPE;
2260		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2261	}
2262}
2263
2264static void
2265em_disable_promisc(struct adapter *adapter)
2266{
2267	uint32_t	reg_rctl;
2268
2269	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2270
2271	reg_rctl &=  (~E1000_RCTL_UPE);
2272	reg_rctl &=  (~E1000_RCTL_MPE);
2273	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2274}
2275
2276
2277/*********************************************************************
2278 *  Multicast Update
2279 *
2280 *  This routine is called whenever multicast address list is updated.
2281 *
2282 **********************************************************************/
2283
2284static void
2285em_set_multi(struct adapter *adapter)
2286{
2287	struct ifnet	*ifp = adapter->ifp;
2288	struct ifmultiaddr *ifma;
2289	uint32_t reg_rctl = 0;
2290	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
2291	int mcnt = 0;
2292
2293	IOCTL_DEBUGOUT("em_set_multi: begin");
2294
2295	if (adapter->hw.mac.type == e1000_82542 &&
2296	    adapter->hw.revision_id == E1000_REVISION_2) {
2297		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2298		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2299			e1000_pci_clear_mwi(&adapter->hw);
2300		reg_rctl |= E1000_RCTL_RST;
2301		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2302		msec_delay(5);
2303	}
2304
2305	IF_ADDR_LOCK(ifp);
2306	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2307		if (ifma->ifma_addr->sa_family != AF_LINK)
2308			continue;
2309
2310		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2311			break;
2312
2313		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2314		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2315		mcnt++;
2316	}
2317	IF_ADDR_UNLOCK(ifp);
2318
2319	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2320		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2321		reg_rctl |= E1000_RCTL_MPE;
2322		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2323	} else
2324		e1000_mc_addr_list_update(&adapter->hw, mta,
2325		    mcnt, 1, adapter->hw.mac.rar_entry_count);
2326
2327	if (adapter->hw.mac.type == e1000_82542 &&
2328	    adapter->hw.revision_id == E1000_REVISION_2) {
2329		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2330		reg_rctl &= ~E1000_RCTL_RST;
2331		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2332		msec_delay(5);
2333		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2334			e1000_pci_set_mwi(&adapter->hw);
2335	}
2336}
2337
2338
2339/*********************************************************************
2340 *  Timer routine
2341 *
2342 *  This routine checks for link status and updates statistics.
2343 *
2344 **********************************************************************/
2345
2346static void
2347em_local_timer(void *arg)
2348{
2349	struct adapter	*adapter = arg;
2350	struct ifnet	*ifp = adapter->ifp;
2351
2352	EM_LOCK_ASSERT(adapter);
2353
2354	e1000_check_for_link(&adapter->hw);
2355	em_update_link_status(adapter);
2356	em_update_stats_counters(adapter);
2357	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2358		em_print_hw_stats(adapter);
2359	em_smartspeed(adapter);
2360	/*
2361	 * Each second we check the watchdog to
2362	 * protect against hardware hangs.
2363	 */
2364	em_watchdog(adapter);
2365
2366	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2367
2368}
2369
2370static void
2371em_update_link_status(struct adapter *adapter)
2372{
2373	struct ifnet *ifp = adapter->ifp;
2374	device_t dev = adapter->dev;
2375
2376	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
2377	    E1000_STATUS_LU) {
2378		if (adapter->link_active == 0) {
2379			e1000_get_speed_and_duplex(&adapter->hw,
2380			    &adapter->link_speed, &adapter->link_duplex);
2381			/* Check if we must disable SPEED_MODE bit on PCI-E */
2382			if ((adapter->link_speed != SPEED_1000) &&
2383			    ((adapter->hw.mac.type == e1000_82571) ||
2384			    (adapter->hw.mac.type == e1000_82572))) {
2385				int tarc0;
2386
2387				tarc0 = E1000_READ_REG(&adapter->hw,
2388				    E1000_TARC0);
2389				tarc0 &= ~SPEED_MODE_BIT;
2390				E1000_WRITE_REG(&adapter->hw,
2391				    E1000_TARC0, tarc0);
2392			}
2393			if (bootverbose)
2394				device_printf(dev, "Link is up %d Mbps %s\n",
2395				    adapter->link_speed,
2396				    ((adapter->link_duplex == FULL_DUPLEX) ?
2397				    "Full Duplex" : "Half Duplex"));
2398			adapter->link_active = 1;
2399			adapter->smartspeed = 0;
2400			ifp->if_baudrate = adapter->link_speed * 1000000;
2401			if_link_state_change(ifp, LINK_STATE_UP);
2402		}
2403	} else {
2404		if (adapter->link_active == 1) {
2405			ifp->if_baudrate = adapter->link_speed = 0;
2406			adapter->link_duplex = 0;
2407			if (bootverbose)
2408				device_printf(dev, "Link is Down\n");
2409			adapter->link_active = 0;
2410			if_link_state_change(ifp, LINK_STATE_DOWN);
2411		}
2412	}
2413}
2414
2415/*********************************************************************
2416 *
2417 *  This routine disables all traffic on the adapter by issuing a
2418 *  global reset on the MAC and deallocates TX/RX buffers.
2419 *
2420 **********************************************************************/
2421
2422static void
2423em_stop(void *arg)
2424{
2425	struct adapter	*adapter = arg;
2426	struct ifnet	*ifp = adapter->ifp;
2427
2428	EM_LOCK_ASSERT(adapter);
2429
2430	INIT_DEBUGOUT("em_stop: begin");
2431
2432	em_disable_intr(adapter);
2433	callout_stop(&adapter->timer);
2434	callout_stop(&adapter->tx_fifo_timer);
2435	em_free_transmit_structures(adapter);
2436	em_free_receive_structures(adapter);
2437
2438	/* Tell the stack that the interface is no longer active */
2439	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2440
2441	e1000_reset_hw(&adapter->hw);
2442	if (adapter->hw.mac.type >= e1000_82544)
2443		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2444}
2445
2446
2447/*********************************************************************
2448 *
2449 *  Determine hardware revision.
2450 *
2451 **********************************************************************/
2452static void
2453em_identify_hardware(struct adapter *adapter)
2454{
2455	device_t dev = adapter->dev;
2456
2457	/* Make sure our PCI config space has the necessary stuff set */
2458	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2459	if ((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) == 0 &&
2460	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN)) {
2461		device_printf(dev, "Memory Access and/or Bus Master bits "
2462		    "were not set!\n");
2463		adapter->hw.bus.pci_cmd_word |=
2464		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2465		pci_write_config(dev, PCIR_COMMAND,
2466		    adapter->hw.bus.pci_cmd_word, 2);
2467	}
2468
2469	/* Save off the information about this board */
2470	adapter->hw.vendor_id = pci_get_vendor(dev);
2471	adapter->hw.device_id = pci_get_device(dev);
2472	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2473	adapter->hw.subsystem_vendor_id =
2474	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2475	adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
2476
2477	/* Do Shared Code Init and Setup */
2478	if (e1000_set_mac_type(&adapter->hw)) {
2479		device_printf(dev, "Setup init failure\n");
2480		return;
2481	}
2482}
2483
2484static int
2485em_allocate_pci_resources(struct adapter *adapter)
2486{
2487	device_t	dev = adapter->dev;
2488	int		val, rid;
2489
2490	rid = PCIR_BAR(0);
2491	adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2492	    &rid, RF_ACTIVE);
2493	if (adapter->res_memory == NULL) {
2494		device_printf(dev, "Unable to allocate bus resource: memory\n");
2495		return (ENXIO);
2496	}
2497	adapter->osdep.mem_bus_space_tag =
2498	    rman_get_bustag(adapter->res_memory);
2499	adapter->osdep.mem_bus_space_handle =
2500	    rman_get_bushandle(adapter->res_memory);
2501	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2502
2503	/* Only older adapters use IO mapping */
2504	if ((adapter->hw.mac.type > e1000_82542) &&
2505	    (adapter->hw.mac.type < e1000_82571)) {
2506		/* Figure our where our IO BAR is ? */
2507		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2508			val = pci_read_config(dev, rid, 4);
2509			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2510				adapter->io_rid = rid;
2511				break;
2512			}
2513			rid += 4;
2514			/* check for 64bit BAR */
2515			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2516				rid += 4;
2517		}
2518		if (rid >= PCIR_CIS) {
2519			device_printf(dev, "Unable to locate IO BAR\n");
2520			return (ENXIO);
2521		}
2522		adapter->res_ioport = bus_alloc_resource_any(dev,
2523		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2524		if (adapter->res_ioport == NULL) {
2525			device_printf(dev, "Unable to allocate bus resource: "
2526			    "ioport\n");
2527			return (ENXIO);
2528		}
2529		adapter->hw.io_base = 0;
2530		adapter->osdep.io_bus_space_tag =
2531		    rman_get_bustag(adapter->res_ioport);
2532		adapter->osdep.io_bus_space_handle =
2533		    rman_get_bushandle(adapter->res_ioport);
2534	}
2535
2536	/*
2537	 * Setup MSI/X or MSI if PCI Express
2538	 * only the latest can use MSI/X and
2539	 * real support for it is forthcoming
2540	 */
2541	adapter->msi = 0; /* Set defaults */
2542	rid = 0x0;
2543	if (adapter->hw.mac.type >= e1000_82575) {
2544		/*
2545		 * Eventually this will be used
2546		 * for Multiqueue, for now we will
2547		 * just use one vector.
2548		 */
2549        	val = pci_msix_count(dev);
2550		if ((val) && pci_alloc_msix(dev, &val) == 0) {
2551                	rid = 1;
2552                	adapter->msi = 1;
2553		}
2554	} else if (adapter->hw.bus.type == e1000_bus_type_pci_express) {
2555        	val = pci_msi_count(dev);
2556        	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2557                	rid = 1;
2558                	adapter->msi = 1;
2559        	}
2560	}
2561	adapter->res_interrupt = bus_alloc_resource_any(dev,
2562	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2563	if (adapter->res_interrupt == NULL) {
2564		device_printf(dev, "Unable to allocate bus resource: "
2565		    "interrupt\n");
2566		return (ENXIO);
2567	}
2568
2569	adapter->hw.back = &adapter->osdep;
2570
2571	return (0);
2572}
2573
2574/*********************************************************************
2575 *
2576 *  Setup the appropriate Interrupt handlers.
2577 *
2578 **********************************************************************/
2579int
2580em_allocate_intr(struct adapter *adapter)
2581{
2582	device_t dev = adapter->dev;
2583	int error;
2584
2585	/* Manually turn off all interrupts */
2586	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2587
2588#ifdef DEVICE_POLLING
2589	/* We do Legacy setup */
2590	if (adapter->int_handler_tag == NULL &&
2591	    (error = bus_setup_intr(dev, adapter->res_interrupt,
2592	    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter,
2593	    &adapter->int_handler_tag)) != 0) {
2594		device_printf(dev, "Failed to register interrupt handler");
2595		return (error);
2596	}
2597
2598#else
2599	/*
2600	 * Try allocating a fast interrupt and the associated deferred
2601	 * processing contexts.
2602	 */
2603	TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter);
2604	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2605	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2606	    taskqueue_thread_enqueue, &adapter->tq);
2607	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2608	    device_get_nameunit(adapter->dev));
2609	if ((error = bus_setup_intr(dev, adapter->res_interrupt,
2610	    INTR_TYPE_NET, em_intr_fast, NULL, adapter,
2611	    &adapter->int_handler_tag)) != 0) {
2612		device_printf(dev, "Failed to register fast interrupt "
2613			    "handler: %d\n", error);
2614		taskqueue_free(adapter->tq);
2615		adapter->tq = NULL;
2616		return (error);
2617	}
2618#endif
2619
2620	em_enable_intr(adapter);
2621	return (0);
2622}
2623
2624static void
2625em_free_intr(struct adapter *adapter)
2626{
2627	device_t dev = adapter->dev;
2628
2629	if (adapter->res_interrupt != NULL) {
2630		bus_teardown_intr(dev, adapter->res_interrupt,
2631			adapter->int_handler_tag);
2632		adapter->int_handler_tag = NULL;
2633	}
2634	if (adapter->tq != NULL) {
2635		taskqueue_drain(adapter->tq, &adapter->rxtx_task);
2636		taskqueue_drain(taskqueue_fast, &adapter->link_task);
2637		taskqueue_free(adapter->tq);
2638		adapter->tq = NULL;
2639	}
2640}
2641
2642static void
2643em_free_pci_resources(struct adapter *adapter)
2644{
2645	device_t dev = adapter->dev;
2646
2647	if (adapter->res_interrupt != NULL)
2648		bus_release_resource(dev, SYS_RES_IRQ,
2649		    0, adapter->res_interrupt);
2650
2651	if (adapter->msi)
2652		pci_release_msi(dev);
2653
2654	if (adapter->res_memory != NULL)
2655		bus_release_resource(dev, SYS_RES_MEMORY,
2656		    PCIR_BAR(0), adapter->res_memory);
2657
2658	if (adapter->flash_mem != NULL)
2659		bus_release_resource(dev, SYS_RES_MEMORY,
2660		    EM_FLASH, adapter->flash_mem);
2661
2662	if (adapter->res_ioport != NULL)
2663		bus_release_resource(dev, SYS_RES_IOPORT,
2664		    adapter->io_rid, adapter->res_ioport);
2665}
2666
2667/*********************************************************************
2668 *
2669 *  Initialize the hardware to a configuration
2670 *  as specified by the adapter structure.
2671 *
2672 **********************************************************************/
2673static int
2674em_hardware_init(struct adapter *adapter)
2675{
2676	device_t dev = adapter->dev;
2677	uint16_t rx_buffer_size;
2678
2679	INIT_DEBUGOUT("em_hardware_init: begin");
2680
2681	/* Issue a global reset */
2682	e1000_reset_hw(&adapter->hw);
2683
2684	/* When hardware is reset, fifo_head is also reset */
2685	adapter->tx_fifo_head = 0;
2686
2687	/* Set up smart power down as default off on newer adapters. */
2688	if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 ||
2689	    adapter->hw.mac.type == e1000_82572)) {
2690		uint16_t phy_tmp = 0;
2691
2692		/* Speed up time to link by disabling smart power down. */
2693		e1000_read_phy_reg(&adapter->hw,
2694		    IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2695		phy_tmp &= ~IGP02E1000_PM_SPD;
2696		e1000_write_phy_reg(&adapter->hw,
2697		    IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2698	}
2699
2700	/*
2701	 * These parameters control the automatic generation (Tx) and
2702	 * response (Rx) to Ethernet PAUSE frames.
2703	 * - High water mark should allow for at least two frames to be
2704	 *   received after sending an XOFF.
2705	 * - Low water mark works best when it is very near the high water mark.
2706	 *   This allows the receiver to restart by sending XON when it has
2707	 *   drained a bit. Here we use an arbitary value of 1500 which will
2708	 *   restart after one full frame is pulled from the buffer. There
2709	 *   could be several smaller frames in the buffer and if so they will
2710	 *   not trigger the XON until their total number reduces the buffer
2711	 *   by 1500.
2712	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2713	 */
2714	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2715	    0xffff) << 10 );
2716
2717	adapter->hw.mac.fc_high_water = rx_buffer_size -
2718	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2719	adapter->hw.mac.fc_low_water = adapter->hw.mac.fc_high_water - 1500;
2720	if (adapter->hw.mac.type == e1000_80003es2lan)
2721		adapter->hw.mac.fc_pause_time = 0xFFFF;
2722	else
2723		adapter->hw.mac.fc_pause_time = EM_FC_PAUSE_TIME;
2724	adapter->hw.mac.fc_send_xon = TRUE;
2725	adapter->hw.mac.fc = e1000_fc_full;
2726
2727	if (e1000_init_hw(&adapter->hw) < 0) {
2728		device_printf(dev, "Hardware Initialization Failed\n");
2729		return (EIO);
2730	}
2731
2732	e1000_check_for_link(&adapter->hw);
2733
2734	return (0);
2735}
2736
2737/*********************************************************************
2738 *
2739 *  Setup networking device structure and register an interface.
2740 *
2741 **********************************************************************/
2742static void
2743em_setup_interface(device_t dev, struct adapter *adapter)
2744{
2745	struct ifnet   *ifp;
2746
2747	INIT_DEBUGOUT("em_setup_interface: begin");
2748
2749	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2750	if (ifp == NULL)
2751		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2752	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2753	ifp->if_mtu = ETHERMTU;
2754	ifp->if_init =  em_init;
2755	ifp->if_softc = adapter;
2756	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2757	ifp->if_ioctl = em_ioctl;
2758	ifp->if_start = em_start;
2759	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2760	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2761	IFQ_SET_READY(&ifp->if_snd);
2762
2763	ether_ifattach(ifp, adapter->hw.mac.addr);
2764
2765	ifp->if_capabilities = ifp->if_capenable = 0;
2766
2767	if (adapter->hw.mac.type >= e1000_82543) {
2768		ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2769		ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2770	}
2771
2772	/* Enable TSO for PCI Express adapters */
2773	if (adapter->hw.bus.type == e1000_bus_type_pci_express) {
2774		ifp->if_capabilities |= IFCAP_TSO4;
2775		ifp->if_capenable |= IFCAP_TSO4;
2776	}
2777
2778	/*
2779	 * Tell the upper layer(s) we support long frames.
2780	 */
2781	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2782	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2783	ifp->if_capenable |= IFCAP_VLAN_MTU;
2784
2785#ifdef DEVICE_POLLING
2786	ifp->if_capabilities |= IFCAP_POLLING;
2787#endif
2788
2789	/*
2790	 * Specify the media types supported by this adapter and register
2791	 * callbacks to update media and link information
2792	 */
2793	ifmedia_init(&adapter->media, IFM_IMASK,
2794	    em_media_change, em_media_status);
2795	if ((adapter->hw.media_type == e1000_media_type_fiber) ||
2796	    (adapter->hw.media_type == e1000_media_type_internal_serdes)) {
2797		u_char fiber_type = IFM_1000_SX;	/* default type */
2798
2799		if (adapter->hw.mac.type == e1000_82545)
2800			fiber_type = IFM_1000_LX;
2801		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2802			    0, NULL);
2803		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2804	} else {
2805		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2806		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2807			    0, NULL);
2808		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2809			    0, NULL);
2810		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2811			    0, NULL);
2812		if (adapter->hw.phy.type != e1000_phy_ife) {
2813			ifmedia_add(&adapter->media,
2814				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2815			ifmedia_add(&adapter->media,
2816				IFM_ETHER | IFM_1000_T, 0, NULL);
2817		}
2818	}
2819	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2820	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2821}
2822
2823
2824/*********************************************************************
2825 *
2826 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2827 *
2828 **********************************************************************/
2829static void
2830em_smartspeed(struct adapter *adapter)
2831{
2832	uint16_t phy_tmp;
2833
2834	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2835	    adapter->hw.mac.autoneg == 0 ||
2836	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2837		return;
2838
2839	if (adapter->smartspeed == 0) {
2840		/* If Master/Slave config fault is asserted twice,
2841		 * we assume back-to-back */
2842		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2843		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2844			return;
2845		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2846		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2847			e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2848			if(phy_tmp & CR_1000T_MS_ENABLE) {
2849				phy_tmp &= ~CR_1000T_MS_ENABLE;
2850				e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
2851				    phy_tmp);
2852				adapter->smartspeed++;
2853				if(adapter->hw.mac.autoneg &&
2854				   !e1000_phy_setup_autoneg(&adapter->hw) &&
2855				   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
2856				    &phy_tmp)) {
2857					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2858						    MII_CR_RESTART_AUTO_NEG);
2859					e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
2860					    phy_tmp);
2861				}
2862			}
2863		}
2864		return;
2865	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2866		/* If still no link, perhaps using 2/3 pair cable */
2867		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2868		phy_tmp |= CR_1000T_MS_ENABLE;
2869		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2870		if(adapter->hw.mac.autoneg &&
2871		   !e1000_phy_setup_autoneg(&adapter->hw) &&
2872		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2873			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2874				    MII_CR_RESTART_AUTO_NEG);
2875			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2876		}
2877	}
2878	/* Restart process after EM_SMARTSPEED_MAX iterations */
2879	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2880		adapter->smartspeed = 0;
2881}
2882
2883
2884/*
2885 * Manage DMA'able memory.
2886 */
2887static void
2888em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2889{
2890	if (error)
2891		return;
2892	*(bus_addr_t *) arg = segs[0].ds_addr;
2893}
2894
2895static int
2896em_dma_malloc(struct adapter *adapter, bus_size_t size,
2897        struct em_dma_alloc *dma, int mapflags)
2898{
2899	int error;
2900
2901	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2902				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2903				BUS_SPACE_MAXADDR,	/* lowaddr */
2904				BUS_SPACE_MAXADDR,	/* highaddr */
2905				NULL, NULL,		/* filter, filterarg */
2906				size,			/* maxsize */
2907				1,			/* nsegments */
2908				size,			/* maxsegsize */
2909				0,			/* flags */
2910				NULL,			/* lockfunc */
2911				NULL,			/* lockarg */
2912				&dma->dma_tag);
2913	if (error) {
2914		device_printf(adapter->dev,
2915		    "%s: bus_dma_tag_create failed: %d\n",
2916		    __func__, error);
2917		goto fail_0;
2918	}
2919
2920	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2921	    BUS_DMA_NOWAIT, &dma->dma_map);
2922	if (error) {
2923		device_printf(adapter->dev,
2924		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2925		    __func__, (uintmax_t)size, error);
2926		goto fail_2;
2927	}
2928
2929	dma->dma_paddr = 0;
2930	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2931	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2932	if (error || dma->dma_paddr == 0) {
2933		device_printf(adapter->dev,
2934		    "%s: bus_dmamap_load failed: %d\n",
2935		    __func__, error);
2936		goto fail_3;
2937	}
2938
2939	return (0);
2940
2941fail_3:
2942	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2943fail_2:
2944	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2945	bus_dma_tag_destroy(dma->dma_tag);
2946fail_0:
2947	dma->dma_map = NULL;
2948	dma->dma_tag = NULL;
2949
2950	return (error);
2951}
2952
2953static void
2954em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2955{
2956	if (dma->dma_tag == NULL)
2957		return;
2958	if (dma->dma_map != NULL) {
2959		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2960		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2961		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2962		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2963		dma->dma_map = NULL;
2964	}
2965	bus_dma_tag_destroy(dma->dma_tag);
2966	dma->dma_tag = NULL;
2967}
2968
2969
2970/*********************************************************************
2971 *
2972 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2973 *  the information needed to transmit a packet on the wire.
2974 *
2975 **********************************************************************/
2976static int
2977em_allocate_transmit_structures(struct adapter *adapter)
2978{
2979	device_t dev = adapter->dev;
2980
2981	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2982	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2983	if (adapter->tx_buffer_area == NULL) {
2984		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2985		return (ENOMEM);
2986	}
2987
2988	bzero(adapter->tx_buffer_area,
2989	    (sizeof(struct em_buffer)) * adapter->num_tx_desc);
2990
2991	return (0);
2992}
2993
2994/*********************************************************************
2995 *
2996 *  Initialize transmit structures.
2997 *
2998 **********************************************************************/
2999static int
3000em_setup_transmit_structures(struct adapter *adapter)
3001{
3002	device_t dev = adapter->dev;
3003	struct em_buffer *tx_buffer;
3004	int error, i;
3005
3006	/*
3007	 * Create DMA tags for tx descriptors
3008	 */
3009	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3010				1, 0,			/* alignment, bounds */
3011				BUS_SPACE_MAXADDR,	/* lowaddr */
3012				BUS_SPACE_MAXADDR,	/* highaddr */
3013				NULL, NULL,		/* filter, filterarg */
3014				EM_TSO_SIZE,		/* maxsize */
3015				EM_MAX_SCATTER,		/* nsegments */
3016				EM_TSO_SEG_SIZE,	/* maxsegsize */
3017				0,			/* flags */
3018				NULL,		/* lockfunc */
3019				NULL,		/* lockarg */
3020				&adapter->txtag)) != 0) {
3021		device_printf(dev, "Unable to allocate TX DMA tag\n");
3022		goto fail;
3023	}
3024
3025	if ((error = em_allocate_transmit_structures(adapter)) != 0)
3026		goto fail;
3027
3028	/* Clear the old ring contents */
3029	bzero(adapter->tx_desc_base,
3030	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3031
3032	/* Create the descriptor buffer dma maps */
3033	tx_buffer = adapter->tx_buffer_area;
3034	for (i = 0; i < adapter->num_tx_desc; i++) {
3035		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
3036		if (error != 0) {
3037			device_printf(dev, "Unable to create TX DMA map\n");
3038			goto fail;
3039		}
3040		tx_buffer->next_eop = -1;
3041		tx_buffer++;
3042	}
3043
3044	adapter->next_avail_tx_desc = 0;
3045	adapter->next_tx_to_clean = 0;
3046
3047	/* Set number of descriptors available */
3048	adapter->num_tx_desc_avail = adapter->num_tx_desc;
3049
3050	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3051	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3052
3053	return (0);
3054
3055fail:
3056	em_free_transmit_structures(adapter);
3057	return (error);
3058}
3059
3060/*********************************************************************
3061 *
3062 *  Enable transmit unit.
3063 *
3064 **********************************************************************/
3065static void
3066em_initialize_transmit_unit(struct adapter *adapter)
3067{
3068	uint32_t	tctl, tarc, tipg = 0;
3069	uint64_t	bus_addr;
3070
3071	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3072	/* Setup the Base and Length of the Tx Descriptor Ring */
3073	bus_addr = adapter->txdma.dma_paddr;
3074	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN,
3075	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3076	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH, (uint32_t)(bus_addr >> 32));
3077	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL, (uint32_t)bus_addr);
3078
3079	/* Setup the HW Tx Head and Tail descriptor pointers */
3080	E1000_WRITE_REG(&adapter->hw, E1000_TDT, 0);
3081	E1000_WRITE_REG(&adapter->hw, E1000_TDH, 0);
3082
3083	HW_DEBUGOUT2("Base = %x, Length = %x\n",
3084	    E1000_READ_REG(&adapter->hw, E1000_TDBAL),
3085	    E1000_READ_REG(&adapter->hw, E1000_TDLEN));
3086
3087	/* Set the default values for the Tx Inter Packet Gap timer */
3088	switch (adapter->hw.mac.type) {
3089	case e1000_82542:
3090		tipg = DEFAULT_82542_TIPG_IPGT;
3091		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3092		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3093		break;
3094	case e1000_80003es2lan:
3095		tipg = DEFAULT_82543_TIPG_IPGR1;
3096		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3097		    E1000_TIPG_IPGR2_SHIFT;
3098		break;
3099	default:
3100		if ((adapter->hw.media_type == e1000_media_type_fiber) ||
3101		    (adapter->hw.media_type ==
3102		    e1000_media_type_internal_serdes))
3103			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3104		else
3105			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3106		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3107		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3108	}
3109
3110	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3111	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3112	if(adapter->hw.mac.type >= e1000_82540)
3113		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3114		    adapter->tx_abs_int_delay.value);
3115
3116	if ((adapter->hw.mac.type == e1000_82571) ||
3117	    (adapter->hw.mac.type == e1000_82572)) {
3118		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3119		tarc |= SPEED_MODE_BIT;
3120		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3121	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3122		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC0);
3123		tarc |= 1;
3124		E1000_WRITE_REG(&adapter->hw, E1000_TARC0, tarc);
3125		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC1);
3126		tarc |= 1;
3127		E1000_WRITE_REG(&adapter->hw, E1000_TARC1, tarc);
3128	}
3129
3130	/* Program the Transmit Control Register */
3131	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3132	tctl &= ~E1000_TCTL_CT;
3133	tctl = E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3134		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3135
3136	if (adapter->hw.mac.type >= e1000_82571)
3137		tctl |= E1000_TCTL_MULR;
3138
3139	/* This write will effectively turn on the transmit unit. */
3140	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3141
3142	/* Setup Transmit Descriptor Base Settings */
3143	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3144
3145	if ((adapter->tx_int_delay.value > 0) &&
3146	    (adapter->hw.mac.type != e1000_82575))
3147		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3148
3149        /* Set the function pointer for the transmit routine */
3150        if (adapter->hw.mac.type >= e1000_82575)
3151                adapter->em_xmit = em_adv_encap;
3152        else
3153                adapter->em_xmit = em_encap;
3154}
3155
3156/*********************************************************************
3157 *
3158 *  Free all transmit related data structures.
3159 *
3160 **********************************************************************/
3161static void
3162em_free_transmit_structures(struct adapter *adapter)
3163{
3164	struct em_buffer *tx_buffer;
3165	int i;
3166
3167	INIT_DEBUGOUT("free_transmit_structures: begin");
3168
3169	if (adapter->tx_buffer_area != NULL) {
3170		tx_buffer = adapter->tx_buffer_area;
3171		for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3172			if (tx_buffer->m_head != NULL) {
3173				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
3174				    BUS_DMASYNC_POSTWRITE);
3175				bus_dmamap_unload(adapter->txtag,
3176				    tx_buffer->map);
3177				m_freem(tx_buffer->m_head);
3178				tx_buffer->m_head = NULL;
3179			} else if (tx_buffer->map != NULL)
3180				bus_dmamap_unload(adapter->txtag,
3181				    tx_buffer->map);
3182			if (tx_buffer->map != NULL) {
3183				bus_dmamap_destroy(adapter->txtag,
3184				    tx_buffer->map);
3185				tx_buffer->map = NULL;
3186			}
3187		}
3188	}
3189	if (adapter->tx_buffer_area != NULL) {
3190		free(adapter->tx_buffer_area, M_DEVBUF);
3191		adapter->tx_buffer_area = NULL;
3192	}
3193	if (adapter->txtag != NULL) {
3194		bus_dma_tag_destroy(adapter->txtag);
3195		adapter->txtag = NULL;
3196	}
3197}
3198
3199/*********************************************************************
3200 *
3201 *  The offload context needs to be set when we transfer the first
3202 *  packet of a particular protocol (TCP/UDP). This routine has been
3203 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3204 *
3205 **********************************************************************/
3206static void
3207em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
3208    uint32_t *txd_upper, uint32_t *txd_lower)
3209{
3210	struct e1000_context_desc *TXD;
3211	struct em_buffer *tx_buffer;
3212	struct ether_vlan_header *eh;
3213	struct ip *ip;
3214	struct ip6_hdr *ip6;
3215	struct tcp_hdr *th;
3216	int curr_txd, ehdrlen, hdr_len, ip_hlen;
3217	uint32_t cmd = 0;
3218	uint16_t etype;
3219	uint8_t ipproto;
3220
3221	/* Setup checksum offload context. */
3222	curr_txd = adapter->next_avail_tx_desc;
3223	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3224	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3225
3226	*txd_lower = E1000_TXD_CMD_DEXT |	/* Extended descr type */
3227		     E1000_TXD_DTYP_D;		/* Data descr */
3228
3229	/*
3230	 * Determine where frame payload starts.
3231	 * Jump over vlan headers if already present,
3232	 * helpful for QinQ too.
3233	 */
3234	eh = mtod(mp, struct ether_vlan_header *);
3235	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3236		etype = ntohs(eh->evl_proto);
3237		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3238	} else {
3239		etype = ntohs(eh->evl_encap_proto);
3240		ehdrlen = ETHER_HDR_LEN;
3241	}
3242
3243	/*
3244	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3245	 * TODO: Support SCTP too when it hits the tree.
3246	 */
3247	switch (etype) {
3248	case ETHERTYPE_IP:
3249		ip = (struct ip *)(mp->m_data + ehdrlen);
3250		ip_hlen = ip->ip_hl << 2;
3251
3252		/* Setup of IP header checksum. */
3253		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3254			/*
3255			 * Start offset for header checksum calculation.
3256			 * End offset for header checksum calculation.
3257			 * Offset of place to put the checksum.
3258			 */
3259			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3260			TXD->lower_setup.ip_fields.ipcse =
3261			    htole16(ehdrlen + ip_hlen);
3262			TXD->lower_setup.ip_fields.ipcso =
3263			    ehdrlen + offsetof(struct ip, ip_sum);
3264			cmd |= E1000_TXD_CMD_IP;
3265			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3266		}
3267
3268		if (mp->m_len < ehdrlen + ip_hlen)
3269			return;	/* failure */
3270
3271		hdr_len = ehdrlen + ip_hlen;
3272		ipproto = ip->ip_p;
3273
3274		break;
3275	case ETHERTYPE_IPV6:
3276		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3277		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3278
3279		if (mp->m_len < ehdrlen + ip_hlen)
3280			return;	/* failure */
3281
3282		/* IPv6 doesn't have a header checksum. */
3283
3284		hdr_len = ehdrlen + ip_hlen;
3285		ipproto = ip6->ip6_nxt;
3286
3287		break;
3288	default:
3289		*txd_upper = 0;
3290		*txd_lower = 0;
3291		return;
3292	}
3293
3294	switch (ipproto) {
3295	case IPPROTO_TCP:
3296		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3297			/*
3298			 * Start offset for payload checksum calculation.
3299			 * End offset for payload checksum calculation.
3300			 * Offset of place to put the checksum.
3301			 */
3302			th = (struct tcp_hdr *)(mp->m_data + hdr_len);
3303			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3304			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3305			TXD->upper_setup.tcp_fields.tucso =
3306			    hdr_len + offsetof(struct tcphdr, th_sum);
3307			cmd |= E1000_TXD_CMD_TCP;
3308			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3309		}
3310		break;
3311	case IPPROTO_UDP:
3312		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3313			/*
3314			 * Start offset for header checksum calculation.
3315			 * End offset for header checksum calculation.
3316			 * Offset of place to put the checksum.
3317			 */
3318			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3319			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3320			TXD->upper_setup.tcp_fields.tucso =
3321			    hdr_len + offsetof(struct udphdr, uh_sum);
3322			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3323		}
3324		break;
3325	default:
3326		break;
3327	}
3328
3329	TXD->tcp_seg_setup.data = htole32(0);
3330	TXD->cmd_and_length =
3331	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3332	tx_buffer->m_head = NULL;
3333	tx_buffer->next_eop = -1;
3334
3335	if (++curr_txd == adapter->num_tx_desc)
3336		curr_txd = 0;
3337
3338	adapter->num_tx_desc_avail--;
3339	adapter->next_avail_tx_desc = curr_txd;
3340}
3341
3342/**********************************************************************
3343 *
3344 *  Setup work for hardware segmentation offload (TSO)
3345 *
3346 **********************************************************************/
3347static boolean_t
3348em_tso_setup(struct adapter *adapter, struct mbuf *mp, uint32_t *txd_upper,
3349   uint32_t *txd_lower)
3350{
3351	struct e1000_context_desc *TXD;
3352	struct em_buffer *tx_buffer;
3353	struct ether_vlan_header *eh;
3354	struct ip *ip;
3355	struct ip6_hdr *ip6;
3356	struct tcphdr *th;
3357	int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6;
3358	uint16_t etype;
3359
3360	/*
3361	 * XXX: This is not really correct as the stack would not have
3362	 * set up all checksums.
3363	 * XXX: Return FALSE is not sufficient as we may have to return
3364	 * in true failure cases as well.  Should do -1 (failure), 0 (no)
3365	 * and 1 (success).
3366	 */
3367	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3368	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3369		return FALSE;
3370
3371	/*
3372	 * This function could/should be extended to support IP/IPv6
3373	 * fragmentation as well.  But as they say, one step at a time.
3374	 */
3375
3376	/*
3377	 * Determine where frame payload starts.
3378	 * Jump over vlan headers if already present,
3379	 * helpful for QinQ too.
3380	 */
3381	eh = mtod(mp, struct ether_vlan_header *);
3382	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3383		etype = ntohs(eh->evl_proto);
3384		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3385	} else {
3386		etype = ntohs(eh->evl_encap_proto);
3387		ehdrlen = ETHER_HDR_LEN;
3388	}
3389
3390	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3391	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3392		return FALSE;	/* -1 */
3393
3394	/*
3395	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3396	 * TODO: Support SCTP too when it hits the tree.
3397	 */
3398	switch (etype) {
3399	case ETHERTYPE_IP:
3400		isip6 = 0;
3401		ip = (struct ip *)(mp->m_data + ehdrlen);
3402		if (ip->ip_p != IPPROTO_TCP)
3403			return FALSE;	/* 0 */
3404		ip->ip_len = 0;
3405		ip->ip_sum = 0;
3406		ip_hlen = ip->ip_hl << 2;
3407		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3408			return FALSE;	/* -1 */
3409		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3410#if 1
3411		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3412		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3413#else
3414		th->th_sum = mp->m_pkthdr.csum_data;
3415#endif
3416		break;
3417	case ETHERTYPE_IPV6:
3418		isip6 = 1;
3419		return FALSE;			/* Not supported yet. */
3420		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3421		if (ip6->ip6_nxt != IPPROTO_TCP)
3422			return FALSE;	/* 0 */
3423		ip6->ip6_plen = 0;
3424		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3425		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3426			return FALSE;	/* -1 */
3427		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3428#if 0
3429		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3430		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3431#else
3432		th->th_sum = mp->m_pkthdr.csum_data;
3433#endif
3434		break;
3435	default:
3436		return FALSE;
3437	}
3438	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3439
3440	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3441		      E1000_TXD_DTYP_D |	/* Data descr type */
3442		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3443
3444	/* IP and/or TCP header checksum calculation and insertion. */
3445	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3446		      E1000_TXD_POPTS_TXSM) << 8;
3447
3448	curr_txd = adapter->next_avail_tx_desc;
3449	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3450	TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd];
3451
3452	/* IPv6 doesn't have a header checksum. */
3453	if (!isip6) {
3454		/*
3455		 * Start offset for header checksum calculation.
3456		 * End offset for header checksum calculation.
3457		 * Offset of place put the checksum.
3458		 */
3459		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3460		TXD->lower_setup.ip_fields.ipcse =
3461		    htole16(ehdrlen + ip_hlen - 1);
3462		TXD->lower_setup.ip_fields.ipcso =
3463		    ehdrlen + offsetof(struct ip, ip_sum);
3464	}
3465	/*
3466	 * Start offset for payload checksum calculation.
3467	 * End offset for payload checksum calculation.
3468	 * Offset of place to put the checksum.
3469	 */
3470	TXD->upper_setup.tcp_fields.tucss =
3471	    ehdrlen + ip_hlen;
3472	TXD->upper_setup.tcp_fields.tucse = 0;
3473	TXD->upper_setup.tcp_fields.tucso =
3474	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3475	/*
3476	 * Payload size per packet w/o any headers.
3477	 * Length of all headers up to payload.
3478	 */
3479	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3480	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3481
3482	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3483				E1000_TXD_CMD_DEXT |	/* Extended descr */
3484				E1000_TXD_CMD_TSE |	/* TSE context */
3485				(isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */
3486				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3487				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3488
3489	tx_buffer->m_head = NULL;
3490	tx_buffer->next_eop = -1;
3491
3492	if (++curr_txd == adapter->num_tx_desc)
3493		curr_txd = 0;
3494
3495	adapter->num_tx_desc_avail--;
3496	adapter->next_avail_tx_desc = curr_txd;
3497	adapter->tx_tso = TRUE;
3498
3499	return TRUE;
3500}
3501
3502
3503/**********************************************************************
3504 *
3505 *  Setup work for hardware segmentation offload (TSO) on
3506 *  adapters using advanced tx descriptors
3507 *
3508 **********************************************************************/
3509static boolean_t
3510em_tso_adv_setup(struct adapter *adapter, struct mbuf *mp, u32 *paylen)
3511{
3512	struct e1000_adv_tx_context_desc *TXD;
3513	struct em_buffer        *tx_buffer;
3514	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3515	u32 mss_l4len_idx = 0;
3516	u16 vtag = 0;
3517	int ctxd, ehdrlen, hdrlen, ip_hlen, tcp_hlen;
3518	struct ether_vlan_header *eh;
3519	struct ip *ip;
3520	struct tcphdr *th;
3521
3522	if (((mp->m_pkthdr.csum_flags & CSUM_TSO) == 0) ||
3523	     (mp->m_pkthdr.len <= EM_TX_BUFFER_SIZE))
3524		return FALSE;
3525
3526	/*
3527	 * Determine where frame payload starts.
3528	 * Jump over vlan headers if already present
3529	 */
3530	eh = mtod(mp, struct ether_vlan_header *);
3531	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3532		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3533	else
3534		ehdrlen = ETHER_HDR_LEN;
3535
3536	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3537	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3538		return FALSE;
3539
3540	/* Only supports IPV4 for now */
3541	ctxd = adapter->next_avail_tx_desc;
3542	tx_buffer = &adapter->tx_buffer_area[ctxd];
3543	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3544
3545	ip = (struct ip *)(mp->m_data + ehdrlen);
3546	if (ip->ip_p != IPPROTO_TCP)
3547                return FALSE;   /* 0 */
3548	ip->ip_len = 0;
3549	ip->ip_sum = 0;
3550	ip_hlen = ip->ip_hl << 2;
3551	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3552	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3553	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3554	tcp_hlen = th->th_off << 2;
3555	hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3556	/* Calculate payload, this is used in the transmit desc in encap */
3557	*paylen = mp->m_pkthdr.len - hdrlen;
3558
3559	/* VLAN MACLEN IPLEN */
3560	if (mp->m_flags & M_VLANTAG) {
3561		vtag = htole16(mp->m_pkthdr.ether_vtag);
3562		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3563	}
3564	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3565	vlan_macip_lens |= ip_hlen;
3566	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3567
3568	/* ADV DTYPE TUCMD */
3569	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3570	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3571	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3572	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3573
3574	/* MSS L4LEN IDX */
3575	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3576	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3577	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3578
3579	TXD->seqnum_seed = htole32(0);
3580	tx_buffer->m_head = NULL;
3581	tx_buffer->next_eop = -1;
3582
3583	if (++ctxd == adapter->num_tx_desc)
3584		ctxd = 0;
3585
3586	adapter->num_tx_desc_avail--;
3587	adapter->next_avail_tx_desc = ctxd;
3588	return TRUE;
3589}
3590
3591
3592/*********************************************************************
3593 *
3594 *  Advanced Context Descriptor setup for VLAN or CSUM
3595 *
3596 **********************************************************************/
3597
3598static boolean_t
3599em_tx_adv_ctx_setup(struct adapter *adapter, struct mbuf *mp)
3600{
3601	struct e1000_adv_tx_context_desc *TXD;
3602	struct em_buffer        *tx_buffer;
3603	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3604	struct ether_vlan_header *eh;
3605	struct ip *ip;
3606	struct ip6_hdr *ip6;
3607	int  ehdrlen, ip_hlen;
3608	u16	etype;
3609	u8	ipproto;
3610
3611	int ctxd = adapter->next_avail_tx_desc;
3612	u16 vtag = 0;
3613
3614	tx_buffer = &adapter->tx_buffer_area[ctxd];
3615	TXD = (struct e1000_adv_tx_context_desc *) &adapter->tx_desc_base[ctxd];
3616
3617	/*
3618	** In advanced descriptors the vlan tag must
3619	** be placed into the descriptor itself.
3620	*/
3621	if (mp->m_flags & M_VLANTAG) {
3622		vtag = htole16(mp->m_pkthdr.ether_vtag);
3623		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3624	}
3625
3626	/*
3627	 * Determine where frame payload starts.
3628	 * Jump over vlan headers if already present,
3629	 * helpful for QinQ too.
3630	 */
3631	eh = mtod(mp, struct ether_vlan_header *);
3632	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3633		etype = ntohs(eh->evl_proto);
3634		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3635	} else {
3636		etype = ntohs(eh->evl_encap_proto);
3637		ehdrlen = ETHER_HDR_LEN;
3638	}
3639
3640	/* Set the ether header length */
3641	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3642
3643	switch (etype) {
3644		case ETHERTYPE_IP:
3645			ip = (struct ip *)(mp->m_data + ehdrlen);
3646			ip_hlen = ip->ip_hl << 2;
3647			if (mp->m_len < ehdrlen + ip_hlen)
3648				return FALSE; /* failure */
3649			ipproto = ip->ip_p;
3650			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3651			break;
3652		case ETHERTYPE_IPV6:
3653			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3654			ip_hlen = sizeof(struct ip6_hdr);
3655			if (mp->m_len < ehdrlen + ip_hlen)
3656				return FALSE; /* failure */
3657			ipproto = ip6->ip6_nxt;
3658			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3659			break;
3660		default:
3661			return FALSE;
3662	}
3663
3664	vlan_macip_lens |= ip_hlen;
3665	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3666
3667	switch (ipproto) {
3668		case IPPROTO_TCP:
3669			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3670				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3671			break;
3672		case IPPROTO_UDP:
3673			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3674				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3675			break;
3676	}
3677
3678	/* Now copy bits into descriptor */
3679	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3680	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3681	TXD->seqnum_seed = htole32(0);
3682	TXD->mss_l4len_idx = htole32(0);
3683
3684	tx_buffer->m_head = NULL;
3685	tx_buffer->next_eop = -1;
3686
3687	/* We've consumed the first desc, adjust counters */
3688	if (++ctxd == adapter->num_tx_desc)
3689		ctxd = 0;
3690	adapter->next_avail_tx_desc = ctxd;
3691	--adapter->num_tx_desc_avail;
3692
3693        return TRUE;
3694}
3695
3696
3697/**********************************************************************
3698 *
3699 *  Examine each tx_buffer in the used queue. If the hardware is done
3700 *  processing the packet then free associated resources. The
3701 *  tx_buffer is put back on the free queue.
3702 *
3703 **********************************************************************/
3704static void
3705em_txeof(struct adapter *adapter)
3706{
3707        int first, last, done, num_avail;
3708        struct em_buffer *tx_buffer;
3709        struct e1000_tx_desc   *tx_desc, *eop_desc;
3710	struct ifnet   *ifp = adapter->ifp;
3711
3712	EM_LOCK_ASSERT(adapter);
3713
3714        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3715                return;
3716
3717        num_avail = adapter->num_tx_desc_avail;
3718        first = adapter->next_tx_to_clean;
3719        tx_desc = &adapter->tx_desc_base[first];
3720        tx_buffer = &adapter->tx_buffer_area[first];
3721	last = tx_buffer->next_eop;
3722        eop_desc = &adapter->tx_desc_base[last];
3723
3724	/*
3725	 * What this does is get the index of the
3726	 * first descriptor AFTER the EOP of the
3727	 * first packet, that way we can do the
3728	 * simple comparison on the inner while loop.
3729	 */
3730	if (++last == adapter->num_tx_desc)
3731 		last = 0;
3732	done = last;
3733
3734        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3735            BUS_DMASYNC_POSTREAD);
3736
3737        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3738		/* We clean the range of the packet */
3739		while (first != done) {
3740                	tx_desc->upper.data = 0;
3741                	tx_desc->lower.data = 0;
3742                	tx_desc->buffer_addr = 0;
3743                	num_avail++;
3744
3745			if (tx_buffer->m_head) {
3746				ifp->if_opackets++;
3747				bus_dmamap_sync(adapter->txtag,
3748				    tx_buffer->map,
3749				    BUS_DMASYNC_POSTWRITE);
3750				bus_dmamap_unload(adapter->txtag,
3751				    tx_buffer->map);
3752
3753                        	m_freem(tx_buffer->m_head);
3754                        	tx_buffer->m_head = NULL;
3755                	}
3756			tx_buffer->next_eop = -1;
3757
3758	                if (++first == adapter->num_tx_desc)
3759				first = 0;
3760
3761	                tx_buffer = &adapter->tx_buffer_area[first];
3762			tx_desc = &adapter->tx_desc_base[first];
3763		}
3764		/* See if we can continue to the next packet */
3765		last = tx_buffer->next_eop;
3766		if (last != -1) {
3767        		eop_desc = &adapter->tx_desc_base[last];
3768			/* Get new done point */
3769			if (++last == adapter->num_tx_desc) last = 0;
3770			done = last;
3771		} else
3772			break;
3773        }
3774        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3775            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3776
3777        adapter->next_tx_to_clean = first;
3778
3779        /*
3780         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3781         * that it is OK to send packets.
3782         * If there are no pending descriptors, clear the timeout. Otherwise,
3783         * if some descriptors have been freed, restart the timeout.
3784         */
3785        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3786                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3787		/* All clean, turn off the timer */
3788                if (num_avail == adapter->num_tx_desc)
3789			adapter->watchdog_timer = 0;
3790		/* Some cleaned, reset the timer */
3791                else if (num_avail != adapter->num_tx_desc_avail)
3792			adapter->watchdog_timer = EM_TX_TIMEOUT;
3793        }
3794        adapter->num_tx_desc_avail = num_avail;
3795        return;
3796}
3797
3798/*********************************************************************
3799 *
3800 *  Get a buffer from system mbuf buffer pool.
3801 *
3802 **********************************************************************/
3803static int
3804em_get_buf(struct adapter *adapter, int i)
3805{
3806	struct mbuf		*m;
3807	bus_dma_segment_t	segs[1];
3808	bus_dmamap_t		map;
3809	struct em_buffer	*rx_buffer;
3810	int			error, nsegs;
3811
3812	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3813	if (m == NULL) {
3814		adapter->mbuf_cluster_failed++;
3815		return (ENOBUFS);
3816	}
3817	m->m_len = m->m_pkthdr.len = MCLBYTES;
3818
3819	if (adapter->hw.mac.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3820		m_adj(m, ETHER_ALIGN);
3821
3822	/*
3823	 * Using memory from the mbuf cluster pool, invoke the
3824	 * bus_dma machinery to arrange the memory mapping.
3825	 */
3826	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3827	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3828	if (error != 0) {
3829		m_free(m);
3830		return (error);
3831	}
3832
3833	/* If nsegs is wrong then the stack is corrupt. */
3834	KASSERT(nsegs == 1, ("Too many segments returned!"));
3835
3836	rx_buffer = &adapter->rx_buffer_area[i];
3837	if (rx_buffer->m_head != NULL)
3838		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3839
3840	map = rx_buffer->map;
3841	rx_buffer->map = adapter->rx_sparemap;
3842	adapter->rx_sparemap = map;
3843	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3844	rx_buffer->m_head = m;
3845
3846	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3847	return (0);
3848}
3849
3850/*********************************************************************
3851 *
3852 *  Allocate memory for rx_buffer structures. Since we use one
3853 *  rx_buffer per received packet, the maximum number of rx_buffer's
3854 *  that we'll need is equal to the number of receive descriptors
3855 *  that we've allocated.
3856 *
3857 **********************************************************************/
3858static int
3859em_allocate_receive_structures(struct adapter *adapter)
3860{
3861	device_t dev = adapter->dev;
3862	struct em_buffer *rx_buffer;
3863	int i, error;
3864
3865	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3866	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT);
3867	if (adapter->rx_buffer_area == NULL) {
3868		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3869		return (ENOMEM);
3870	}
3871
3872	bzero(adapter->rx_buffer_area,
3873	    sizeof(struct em_buffer) * adapter->num_rx_desc);
3874
3875	error = bus_dma_tag_create(bus_get_dma_tag(dev),        /* parent */
3876				1, 0,			/* alignment, bounds */
3877				BUS_SPACE_MAXADDR,	/* lowaddr */
3878				BUS_SPACE_MAXADDR,	/* highaddr */
3879				NULL, NULL,		/* filter, filterarg */
3880				MCLBYTES,		/* maxsize */
3881				1,			/* nsegments */
3882				MCLBYTES,		/* maxsegsize */
3883				0,			/* flags */
3884				NULL,			/* lockfunc */
3885				NULL,			/* lockarg */
3886				&adapter->rxtag);
3887	if (error) {
3888		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3889		    __func__, error);
3890		goto fail;
3891	}
3892
3893	/* Create the spare map (used by getbuf) */
3894	error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3895	     &adapter->rx_sparemap);
3896	if (error) {
3897		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3898		    __func__, error);
3899		goto fail;
3900	}
3901
3902	rx_buffer = adapter->rx_buffer_area;
3903	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3904		error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT,
3905		    &rx_buffer->map);
3906		if (error) {
3907			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3908			    __func__, error);
3909			goto fail;
3910		}
3911	}
3912
3913	/* Setup the initial buffers */
3914	for (i = 0; i < adapter->num_rx_desc; i++) {
3915		error = em_get_buf(adapter, i);
3916		if (error)
3917			goto fail;
3918	}
3919	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3920	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3921
3922	return (0);
3923
3924fail:
3925	em_free_receive_structures(adapter);
3926	return (error);
3927}
3928
3929/*********************************************************************
3930 *
3931 *  Allocate and initialize receive structures.
3932 *
3933 **********************************************************************/
3934static int
3935em_setup_receive_structures(struct adapter *adapter)
3936{
3937	int error;
3938
3939	bzero(adapter->rx_desc_base,
3940	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3941
3942	if ((error = em_allocate_receive_structures(adapter)) !=0)
3943		return (error);
3944
3945	/* Setup our descriptor pointers */
3946	adapter->next_rx_desc_to_check = 0;
3947
3948	return (0);
3949}
3950
3951/*********************************************************************
3952 *
3953 *  Enable receive unit.
3954 *
3955 **********************************************************************/
3956static void
3957em_initialize_receive_unit(struct adapter *adapter)
3958{
3959	struct ifnet	*ifp = adapter->ifp;
3960	uint64_t	bus_addr;
3961	uint32_t	reg_rctl;
3962	uint32_t	reg_rxcsum;
3963
3964	INIT_DEBUGOUT("em_initialize_receive_unit: begin");
3965
3966	/*
3967	 * Make sure receives are disabled while setting
3968	 * up the descriptor ring
3969	 */
3970	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3971	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl & ~E1000_RCTL_EN);
3972
3973	if(adapter->hw.mac.type >= e1000_82540) {
3974		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3975		    adapter->rx_abs_int_delay.value);
3976		/*
3977		 * Set the interrupt throttling rate. Value is calculated
3978		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3979		 */
3980#define MAX_INTS_PER_SEC	8000
3981#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3982		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
3983	}
3984
3985	/* Setup the Base and Length of the Rx Descriptor Ring */
3986	bus_addr = adapter->rxdma.dma_paddr;
3987	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN, adapter->num_rx_desc *
3988			sizeof(struct e1000_rx_desc));
3989	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH, (uint32_t)(bus_addr >> 32));
3990	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL, (uint32_t)bus_addr);
3991
3992	/* Setup the Receive Control Register */
3993	reg_rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3994	reg_rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3995		   E1000_RCTL_RDMTS_HALF |
3996		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3997
3998	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
3999		reg_rctl |= E1000_RCTL_SBP;
4000	else
4001		reg_rctl &= ~E1000_RCTL_SBP;
4002
4003	switch (adapter->rx_buffer_len) {
4004	default:
4005	case 2048:
4006		reg_rctl |= E1000_RCTL_SZ_2048;
4007		break;
4008	case 4096:
4009		reg_rctl |= E1000_RCTL_SZ_4096 |
4010		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4011		break;
4012	case 8192:
4013		reg_rctl |= E1000_RCTL_SZ_8192 |
4014		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4015		break;
4016	case 16384:
4017		reg_rctl |= E1000_RCTL_SZ_16384 |
4018		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
4019		break;
4020	}
4021
4022	if (ifp->if_mtu > ETHERMTU)
4023		reg_rctl |= E1000_RCTL_LPE;
4024	else
4025		reg_rctl &= ~E1000_RCTL_LPE;
4026
4027	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
4028	if ((adapter->hw.mac.type >= e1000_82543) &&
4029	    (ifp->if_capenable & IFCAP_RXCSUM)) {
4030		reg_rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
4031		reg_rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4032		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, reg_rxcsum);
4033	}
4034
4035	/*
4036	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4037	** long latencies are observed, like Lenovo X60. This
4038	** change eliminates the problem, but since having positive
4039	** values in RDTR is a known source of problems on other
4040	** platforms another solution is being sought.
4041	*/
4042	if (adapter->hw.mac.type == e1000_82573)
4043		E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20);
4044
4045	/* Enable Receives */
4046	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
4047
4048	/*
4049	 * Setup the HW Rx Head and
4050	 * Tail Descriptor Pointers
4051	 */
4052	E1000_WRITE_REG(&adapter->hw, E1000_RDH, 0);
4053	E1000_WRITE_REG(&adapter->hw, E1000_RDT, adapter->num_rx_desc - 1);
4054
4055	return;
4056}
4057
4058/*********************************************************************
4059 *
4060 *  Free receive related data structures.
4061 *
4062 **********************************************************************/
4063static void
4064em_free_receive_structures(struct adapter *adapter)
4065{
4066	struct em_buffer *rx_buffer;
4067	int i;
4068
4069	INIT_DEBUGOUT("free_receive_structures: begin");
4070
4071	if (adapter->rx_sparemap) {
4072		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
4073		adapter->rx_sparemap = NULL;
4074	}
4075
4076	/* Cleanup any existing buffers */
4077	if (adapter->rx_buffer_area != NULL) {
4078		rx_buffer = adapter->rx_buffer_area;
4079		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
4080			if (rx_buffer->m_head != NULL) {
4081				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
4082				    BUS_DMASYNC_POSTREAD);
4083				bus_dmamap_unload(adapter->rxtag,
4084				    rx_buffer->map);
4085				m_freem(rx_buffer->m_head);
4086				rx_buffer->m_head = NULL;
4087			} else if (rx_buffer->map != NULL)
4088				bus_dmamap_unload(adapter->rxtag,
4089				    rx_buffer->map);
4090			if (rx_buffer->map != NULL) {
4091				bus_dmamap_destroy(adapter->rxtag,
4092				    rx_buffer->map);
4093				rx_buffer->map = NULL;
4094			}
4095		}
4096	}
4097
4098	if (adapter->rx_buffer_area != NULL) {
4099		free(adapter->rx_buffer_area, M_DEVBUF);
4100		adapter->rx_buffer_area = NULL;
4101	}
4102
4103	if (adapter->rxtag != NULL) {
4104		bus_dma_tag_destroy(adapter->rxtag);
4105		adapter->rxtag = NULL;
4106	}
4107}
4108
4109/*********************************************************************
4110 *
4111 *  This routine executes in interrupt context. It replenishes
4112 *  the mbufs in the descriptor and sends data which has been
4113 *  dma'ed into host memory to upper layer.
4114 *
4115 *  We loop at most count times if count is > 0, or until done if
4116 *  count < 0.
4117 *
4118 *********************************************************************/
4119static int
4120em_rxeof(struct adapter *adapter, int count)
4121{
4122	struct ifnet	*ifp;
4123	struct mbuf	*mp;
4124	uint8_t		accept_frame = 0;
4125	uint8_t		eop = 0;
4126	uint16_t 	len, desc_len, prev_len_adj;
4127	int		i;
4128
4129	/* Pointer to the receive descriptor being examined. */
4130	struct e1000_rx_desc   *current_desc;
4131	uint8_t		status;
4132
4133	ifp = adapter->ifp;
4134	i = adapter->next_rx_desc_to_check;
4135	current_desc = &adapter->rx_desc_base[i];
4136	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4137	    BUS_DMASYNC_POSTREAD);
4138
4139	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4140		return (0);
4141
4142	while ((current_desc->status & E1000_RXD_STAT_DD) &&
4143	    (count != 0) &&
4144	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4145		struct mbuf *m = NULL;
4146
4147		mp = adapter->rx_buffer_area[i].m_head;
4148		/*
4149		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
4150		 * needs to access the last received byte in the mbuf.
4151		 */
4152		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
4153		    BUS_DMASYNC_POSTREAD);
4154
4155		accept_frame = 1;
4156		prev_len_adj = 0;
4157		desc_len = le16toh(current_desc->length);
4158		status = current_desc->status;
4159		if (status & E1000_RXD_STAT_EOP) {
4160			count--;
4161			eop = 1;
4162			if (desc_len < ETHER_CRC_LEN) {
4163				len = 0;
4164				prev_len_adj = ETHER_CRC_LEN - desc_len;
4165			} else
4166				len = desc_len - ETHER_CRC_LEN;
4167		} else {
4168			eop = 0;
4169			len = desc_len;
4170		}
4171
4172		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
4173			uint8_t		last_byte;
4174			uint32_t	pkt_len = desc_len;
4175
4176			if (adapter->fmp != NULL)
4177				pkt_len += adapter->fmp->m_pkthdr.len;
4178
4179			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
4180			if (TBI_ACCEPT(&adapter->hw, status,
4181			    current_desc->errors, pkt_len, last_byte)) {
4182				e1000_tbi_adjust_stats_82543(&adapter->hw,
4183				    &adapter->stats, pkt_len,
4184				    adapter->hw.mac.addr);
4185				if (len > 0)
4186					len--;
4187			} else
4188				accept_frame = 0;
4189		}
4190
4191		if (accept_frame) {
4192			if (em_get_buf(adapter, i) != 0) {
4193				ifp->if_iqdrops++;
4194				goto discard;
4195			}
4196
4197			/* Assign correct length to the current fragment */
4198			mp->m_len = len;
4199
4200			if (adapter->fmp == NULL) {
4201				mp->m_pkthdr.len = len;
4202				adapter->fmp = mp; /* Store the first mbuf */
4203				adapter->lmp = mp;
4204			} else {
4205				/* Chain mbuf's together */
4206				mp->m_flags &= ~M_PKTHDR;
4207				/*
4208				 * Adjust length of previous mbuf in chain if
4209				 * we received less than 4 bytes in the last
4210				 * descriptor.
4211				 */
4212				if (prev_len_adj > 0) {
4213					adapter->lmp->m_len -= prev_len_adj;
4214					adapter->fmp->m_pkthdr.len -=
4215					    prev_len_adj;
4216				}
4217				adapter->lmp->m_next = mp;
4218				adapter->lmp = adapter->lmp->m_next;
4219				adapter->fmp->m_pkthdr.len += len;
4220			}
4221
4222			if (eop) {
4223				adapter->fmp->m_pkthdr.rcvif = ifp;
4224				ifp->if_ipackets++;
4225				em_receive_checksum(adapter, current_desc,
4226				    adapter->fmp);
4227#ifndef __NO_STRICT_ALIGNMENT
4228				if (adapter->hw.mac.max_frame_size >
4229				    (MCLBYTES - ETHER_ALIGN) &&
4230				    em_fixup_rx(adapter) != 0)
4231					goto skip;
4232#endif
4233				if (status & E1000_RXD_STAT_VP) {
4234					adapter->fmp->m_pkthdr.ether_vtag =
4235					    (le16toh(current_desc->special) &
4236					    E1000_RXD_SPC_VLAN_MASK);
4237					adapter->fmp->m_flags |= M_VLANTAG;
4238				}
4239#ifndef __NO_STRICT_ALIGNMENT
4240skip:
4241#endif
4242				m = adapter->fmp;
4243				adapter->fmp = NULL;
4244				adapter->lmp = NULL;
4245			}
4246		} else {
4247			ifp->if_ierrors++;
4248discard:
4249			/* Reuse loaded DMA map and just update mbuf chain */
4250			mp = adapter->rx_buffer_area[i].m_head;
4251			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4252			mp->m_data = mp->m_ext.ext_buf;
4253			mp->m_next = NULL;
4254			if (adapter->hw.mac.max_frame_size <=
4255			    (MCLBYTES - ETHER_ALIGN))
4256				m_adj(mp, ETHER_ALIGN);
4257			if (adapter->fmp != NULL) {
4258				m_freem(adapter->fmp);
4259				adapter->fmp = NULL;
4260				adapter->lmp = NULL;
4261			}
4262			m = NULL;
4263		}
4264
4265		/* Zero out the receive descriptors status. */
4266		current_desc->status = 0;
4267		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
4268		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4269
4270		/* Advance our pointers to the next descriptor. */
4271		if (++i == adapter->num_rx_desc)
4272			i = 0;
4273		if (m != NULL) {
4274			adapter->next_rx_desc_to_check = i;
4275#ifdef DEVICE_POLLING
4276			EM_UNLOCK(adapter);
4277			(*ifp->if_input)(ifp, m);
4278			EM_LOCK(adapter);
4279#else
4280			/* Already running unlocked */
4281			(*ifp->if_input)(ifp, m);
4282#endif
4283			i = adapter->next_rx_desc_to_check;
4284		}
4285		current_desc = &adapter->rx_desc_base[i];
4286	}
4287	adapter->next_rx_desc_to_check = i;
4288
4289	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4290	if (--i < 0)
4291		i = adapter->num_rx_desc - 1;
4292	E1000_WRITE_REG(&adapter->hw, E1000_RDT, i);
4293	if (!((current_desc->status) & E1000_RXD_STAT_DD))
4294		return (0);
4295
4296	return (1);
4297}
4298
4299#ifndef __NO_STRICT_ALIGNMENT
4300/*
4301 * When jumbo frames are enabled we should realign entire payload on
4302 * architecures with strict alignment. This is serious design mistake of 8254x
4303 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4304 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4305 * payload. On architecures without strict alignment restrictions 8254x still
4306 * performs unaligned memory access which would reduce the performance too.
4307 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4308 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4309 * existing mbuf chain.
4310 *
4311 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4312 * not used at all on architectures with strict alignment.
4313 */
4314static int
4315em_fixup_rx(struct adapter *adapter)
4316{
4317	struct mbuf *m, *n;
4318	int error;
4319
4320	error = 0;
4321	m = adapter->fmp;
4322	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4323		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4324		m->m_data += ETHER_HDR_LEN;
4325	} else {
4326		MGETHDR(n, M_DONTWAIT, MT_DATA);
4327		if (n != NULL) {
4328			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4329			m->m_data += ETHER_HDR_LEN;
4330			m->m_len -= ETHER_HDR_LEN;
4331			n->m_len = ETHER_HDR_LEN;
4332			M_MOVE_PKTHDR(n, m);
4333			n->m_next = m;
4334			adapter->fmp = n;
4335		} else {
4336			adapter->dropped_pkts++;
4337			m_freem(adapter->fmp);
4338			adapter->fmp = NULL;
4339			error = ENOMEM;
4340		}
4341	}
4342
4343	return (error);
4344}
4345#endif
4346
4347/*********************************************************************
4348 *
4349 *  Verify that the hardware indicated that the checksum is valid.
4350 *  Inform the stack about the status of checksum so that stack
4351 *  doesn't spend time verifying the checksum.
4352 *
4353 *********************************************************************/
4354static void
4355em_receive_checksum(struct adapter *adapter,
4356	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4357{
4358	/* 82543 or newer only */
4359	if ((adapter->hw.mac.type < e1000_82543) ||
4360	    /* Ignore Checksum bit is set */
4361	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
4362		mp->m_pkthdr.csum_flags = 0;
4363		return;
4364	}
4365
4366	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4367		/* Did it pass? */
4368		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4369			/* IP Checksum Good */
4370			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4371			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4372
4373		} else {
4374			mp->m_pkthdr.csum_flags = 0;
4375		}
4376	}
4377
4378	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4379		/* Did it pass? */
4380		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4381			mp->m_pkthdr.csum_flags |=
4382			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4383			mp->m_pkthdr.csum_data = htons(0xffff);
4384		}
4385	}
4386}
4387
4388
4389static void
4390em_enable_vlans(struct adapter *adapter)
4391{
4392	uint32_t ctrl;
4393
4394	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
4395
4396	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4397	ctrl |= E1000_CTRL_VME;
4398	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4399}
4400
4401static void
4402em_enable_intr(struct adapter *adapter)
4403{
4404	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4405	    (IMS_ENABLE_MASK));
4406}
4407
4408static void
4409em_disable_intr(struct adapter *adapter)
4410{
4411	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4412}
4413
4414/*
4415 * Bit of a misnomer, what this really means is
4416 * to enable OS management of the system... aka
4417 * to disable special hardware management features
4418 */
4419static void
4420em_init_manageability(struct adapter *adapter)
4421{
4422	/* A shared code workaround */
4423#define E1000_82542_MANC2H E1000_MANC2H
4424	if (adapter->has_manage) {
4425		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4426		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4427
4428		/* disable hardware interception of ARP */
4429		manc &= ~(E1000_MANC_ARP_EN);
4430
4431                /* enable receiving management packets to the host */
4432                if (adapter->hw.mac.type >= e1000_82571) {
4433			manc |= E1000_MANC_EN_MNG2HOST;
4434#define E1000_MNG2HOST_PORT_623 (1 << 5)
4435#define E1000_MNG2HOST_PORT_664 (1 << 6)
4436			manc2h |= E1000_MNG2HOST_PORT_623;
4437			manc2h |= E1000_MNG2HOST_PORT_664;
4438			E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4439		}
4440
4441		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4442	}
4443}
4444
4445/*
4446 * Give control back to hardware management
4447 * controller if there is one.
4448 */
4449static void
4450em_release_manageability(struct adapter *adapter)
4451{
4452	if (adapter->has_manage) {
4453		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4454
4455		/* re-enable hardware interception of ARP */
4456		manc |= E1000_MANC_ARP_EN;
4457
4458		if (adapter->hw.mac.type >= e1000_82571)
4459			manc &= ~E1000_MANC_EN_MNG2HOST;
4460
4461		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4462	}
4463}
4464
4465/*
4466 * em_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4467 * For ASF and Pass Through versions of f/w this means that
4468 * the driver is loaded. For AMT version (only with 82573)
4469 * of the f/w this means that the network i/f is open.
4470 *
4471 */
4472static void
4473em_get_hw_control(struct adapter *adapter)
4474{
4475	u32 ctrl_ext, swsm;
4476
4477	/* Let firmware know the driver has taken over */
4478	switch (adapter->hw.mac.type) {
4479	case e1000_82573:
4480		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4481		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4482		    swsm | E1000_SWSM_DRV_LOAD);
4483		break;
4484	case e1000_82571:
4485	case e1000_82572:
4486	case e1000_80003es2lan:
4487	case e1000_ich8lan:
4488	case e1000_ich9lan:
4489		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4490		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4491		    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4492		break;
4493	default:
4494		break;
4495	}
4496}
4497
4498/*
4499 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4500 * For ASF and Pass Through versions of f/w this means that the
4501 * driver is no longer loaded. For AMT version (only with 82573) i
4502 * of the f/w this means that the network i/f is closed.
4503 *
4504 */
4505static void
4506em_release_hw_control(struct adapter *adapter)
4507{
4508	u32 ctrl_ext, swsm;
4509
4510	/* Let firmware taken over control of h/w */
4511	switch (adapter->hw.mac.type) {
4512	case e1000_82573:
4513		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4514		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4515		    swsm & ~E1000_SWSM_DRV_LOAD);
4516		break;
4517	case e1000_82571:
4518	case e1000_82572:
4519	case e1000_80003es2lan:
4520	case e1000_ich8lan:
4521	case e1000_ich9lan:
4522		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4523		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4524		    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4525		break;
4526	default:
4527		break;
4528
4529	}
4530}
4531
4532static int
4533em_is_valid_ether_addr(uint8_t *addr)
4534{
4535	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4536
4537	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4538		return (FALSE);
4539	}
4540
4541	return (TRUE);
4542}
4543
4544/*
4545 * NOTE: the following routines using the e1000
4546 * 	naming style are provided to the shared
4547 *	code which expects that rather than 'em'
4548 */
4549
4550void
4551e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4552{
4553	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
4554}
4555
4556void
4557e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4558{
4559	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4560}
4561
4562void
4563e1000_pci_set_mwi(struct e1000_hw *hw)
4564{
4565	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4566	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4567}
4568
4569void
4570e1000_pci_clear_mwi(struct e1000_hw *hw)
4571{
4572	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4573	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4574}
4575
4576/*
4577 * Read the PCI Express capabilities
4578 */
4579int32_t
4580e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4581{
4582	int32_t		error = E1000_SUCCESS;
4583	uint16_t	cap_off;
4584
4585	switch (hw->mac.type) {
4586
4587		case e1000_82571:
4588		case e1000_82572:
4589		case e1000_82573:
4590		case e1000_80003es2lan:
4591			cap_off = 0xE0;
4592			e1000_read_pci_cfg(hw, cap_off + reg, value);
4593			break;
4594		default:
4595			error = ~E1000_NOT_IMPLEMENTED;
4596			break;
4597	}
4598
4599	return (error);
4600}
4601
4602int32_t
4603e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4604{
4605	int32_t error = 0;
4606
4607	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4608	if (hw->dev_spec == NULL)
4609		error = ENOMEM;
4610	bzero(hw->dev_spec, size);
4611
4612	return (error);
4613}
4614
4615void
4616e1000_free_dev_spec_struct(struct e1000_hw *hw)
4617{
4618	if (hw->dev_spec != NULL)
4619		free(hw->dev_spec, M_DEVBUF);
4620	return;
4621}
4622
4623/*
4624 * Enable PCI Wake On Lan capability
4625 */
4626void
4627em_enable_wakeup(device_t dev)
4628{
4629	u16     cap, status;
4630	u8      id;
4631
4632	/* First find the capabilities pointer*/
4633	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4634	/* Read the PM Capabilities */
4635	id = pci_read_config(dev, cap, 1);
4636	if (id != PCIY_PMG)     /* Something wrong */
4637		return;
4638	/* OK, we have the power capabilities, so
4639	   now get the status register */
4640	cap += PCIR_POWER_STATUS;
4641	status = pci_read_config(dev, cap, 2);
4642	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4643	pci_write_config(dev, cap, status, 2);
4644	return;
4645}
4646
4647
4648/*********************************************************************
4649* 82544 Coexistence issue workaround.
4650*    There are 2 issues.
4651*       1. Transmit Hang issue.
4652*    To detect this issue, following equation can be used...
4653*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4654*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4655*
4656*       2. DAC issue.
4657*    To detect this issue, following equation can be used...
4658*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4659*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4660*
4661*
4662*    WORKAROUND:
4663*	  Make sure we do not have ending address
4664*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4665*
4666*************************************************************************/
4667static uint32_t
4668em_fill_descriptors (bus_addr_t address, uint32_t length,
4669		PDESC_ARRAY desc_array)
4670{
4671	/* Since issue is sensitive to length and address.*/
4672	/* Let us first check the address...*/
4673	uint32_t safe_terminator;
4674	if (length <= 4) {
4675		desc_array->descriptor[0].address = address;
4676		desc_array->descriptor[0].length = length;
4677		desc_array->elements = 1;
4678		return (desc_array->elements);
4679	}
4680	safe_terminator = (uint32_t)((((uint32_t)address & 0x7) +
4681	    (length & 0xF)) & 0xF);
4682	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4683	if (safe_terminator == 0   ||
4684	(safe_terminator > 4   &&
4685	safe_terminator < 9)   ||
4686	(safe_terminator > 0xC &&
4687	safe_terminator <= 0xF)) {
4688		desc_array->descriptor[0].address = address;
4689		desc_array->descriptor[0].length = length;
4690		desc_array->elements = 1;
4691		return (desc_array->elements);
4692	}
4693
4694	desc_array->descriptor[0].address = address;
4695	desc_array->descriptor[0].length = length - 4;
4696	desc_array->descriptor[1].address = address + (length - 4);
4697	desc_array->descriptor[1].length = 4;
4698	desc_array->elements = 2;
4699	return (desc_array->elements);
4700}
4701
4702/**********************************************************************
4703 *
4704 *  Update the board statistics counters.
4705 *
4706 **********************************************************************/
4707static void
4708em_update_stats_counters(struct adapter *adapter)
4709{
4710	struct ifnet   *ifp;
4711
4712	if(adapter->hw.media_type == e1000_media_type_copper ||
4713	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4714		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4715		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4716	}
4717	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4718	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4719	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4720	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4721
4722	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4723	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4724	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4725	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4726	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4727	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4728	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4729	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4730	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4731	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4732	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4733	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4734	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4735	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4736	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4737	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4738	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4739	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4740	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4741	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4742
4743	/* For the 64-bit byte counters the low dword must be read first. */
4744	/* Both registers clear on the read of the high dword */
4745
4746	adapter->stats.gorcl += E1000_READ_REG(&adapter->hw, E1000_GORCL);
4747	adapter->stats.gorch += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4748	adapter->stats.gotcl += E1000_READ_REG(&adapter->hw, E1000_GOTCL);
4749	adapter->stats.gotch += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4750
4751	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4752	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4753	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4754	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4755	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4756
4757	adapter->stats.torl += E1000_READ_REG(&adapter->hw, E1000_TORL);
4758	adapter->stats.torh += E1000_READ_REG(&adapter->hw, E1000_TORH);
4759	adapter->stats.totl += E1000_READ_REG(&adapter->hw, E1000_TOTL);
4760	adapter->stats.toth += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4761
4762	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4763	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4764	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4765	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4766	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4767	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4768	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4769	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4770	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4771	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4772
4773	if (adapter->hw.mac.type >= e1000_82543) {
4774		adapter->stats.algnerrc +=
4775		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4776		adapter->stats.rxerrc +=
4777		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4778		adapter->stats.tncrs +=
4779		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4780		adapter->stats.cexterr +=
4781		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4782		adapter->stats.tsctc +=
4783		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4784		adapter->stats.tsctfc +=
4785		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4786	}
4787	ifp = adapter->ifp;
4788
4789	ifp->if_collisions = adapter->stats.colc;
4790
4791	/* Rx Errors */
4792	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4793	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4794	    adapter->stats.ruc + adapter->stats.roc +
4795	    adapter->stats.mpc + adapter->stats.cexterr;
4796
4797	/* Tx Errors */
4798	ifp->if_oerrors = adapter->stats.ecol +
4799	    adapter->stats.latecol + adapter->watchdog_events;
4800}
4801
4802
4803/**********************************************************************
4804 *
4805 *  This routine is called only when em_display_debug_stats is enabled.
4806 *  This routine provides a way to take a look at important statistics
4807 *  maintained by the driver and hardware.
4808 *
4809 **********************************************************************/
4810static void
4811em_print_debug_info(struct adapter *adapter)
4812{
4813	device_t dev = adapter->dev;
4814	uint8_t *hw_addr = adapter->hw.hw_addr;
4815
4816	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4817	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4818	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4819	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4820	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4821	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4822	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4823	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4824	    adapter->hw.mac.fc_high_water,
4825	    adapter->hw.mac.fc_low_water);
4826	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4827	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4828	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4829	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4830	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4831	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4832	device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n",
4833	    (long long)adapter->tx_fifo_wrk_cnt,
4834	    (long long)adapter->tx_fifo_reset_cnt);
4835	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
4836	    E1000_READ_REG(&adapter->hw, E1000_TDH),
4837	    E1000_READ_REG(&adapter->hw, E1000_TDT));
4838	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4839	    E1000_READ_REG(&adapter->hw, E1000_RDH),
4840	    E1000_READ_REG(&adapter->hw, E1000_RDT));
4841	device_printf(dev, "Num Tx descriptors avail = %d\n",
4842	    adapter->num_tx_desc_avail);
4843	device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4844	    adapter->no_tx_desc_avail1);
4845	device_printf(dev, "Tx Descriptors not avail2 = %ld\n",
4846	    adapter->no_tx_desc_avail2);
4847	device_printf(dev, "Std mbuf failed = %ld\n",
4848	    adapter->mbuf_alloc_failed);
4849	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4850	    adapter->mbuf_cluster_failed);
4851	device_printf(dev, "Driver dropped packets = %ld\n",
4852	    adapter->dropped_pkts);
4853	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
4854		adapter->no_tx_dma_setup);
4855}
4856
4857static void
4858em_print_hw_stats(struct adapter *adapter)
4859{
4860	device_t dev = adapter->dev;
4861
4862	device_printf(dev, "Excessive collisions = %lld\n",
4863	    (long long)adapter->stats.ecol);
4864#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4865	device_printf(dev, "Symbol errors = %lld\n",
4866	    (long long)adapter->stats.symerrs);
4867#endif
4868	device_printf(dev, "Sequence errors = %lld\n",
4869	    (long long)adapter->stats.sec);
4870	device_printf(dev, "Defer count = %lld\n",
4871	    (long long)adapter->stats.dc);
4872	device_printf(dev, "Missed Packets = %lld\n",
4873	    (long long)adapter->stats.mpc);
4874	device_printf(dev, "Receive No Buffers = %lld\n",
4875	    (long long)adapter->stats.rnbc);
4876	/* RLEC is inaccurate on some hardware, calculate our own. */
4877	device_printf(dev, "Receive Length Errors = %lld\n",
4878	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4879	device_printf(dev, "Receive errors = %lld\n",
4880	    (long long)adapter->stats.rxerrc);
4881	device_printf(dev, "Crc errors = %lld\n",
4882	    (long long)adapter->stats.crcerrs);
4883	device_printf(dev, "Alignment errors = %lld\n",
4884	    (long long)adapter->stats.algnerrc);
4885	device_printf(dev, "Carrier extension errors = %lld\n",
4886	    (long long)adapter->stats.cexterr);
4887	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4888	device_printf(dev, "watchdog timeouts = %ld\n",
4889	    adapter->watchdog_events);
4890	device_printf(dev, "XON Rcvd = %lld\n",
4891	    (long long)adapter->stats.xonrxc);
4892	device_printf(dev, "XON Xmtd = %lld\n",
4893	    (long long)adapter->stats.xontxc);
4894	device_printf(dev, "XOFF Rcvd = %lld\n",
4895	    (long long)adapter->stats.xoffrxc);
4896	device_printf(dev, "XOFF Xmtd = %lld\n",
4897	    (long long)adapter->stats.xofftxc);
4898	device_printf(dev, "Good Packets Rcvd = %lld\n",
4899	    (long long)adapter->stats.gprc);
4900	device_printf(dev, "Good Packets Xmtd = %lld\n",
4901	    (long long)adapter->stats.gptc);
4902	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4903	    (long long)adapter->stats.tsctc);
4904	device_printf(dev, "TSO Contexts Failed = %lld\n",
4905	    (long long)adapter->stats.tsctfc);
4906}
4907
4908static int
4909em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4910{
4911	struct adapter *adapter;
4912	int error;
4913	int result;
4914
4915	result = -1;
4916	error = sysctl_handle_int(oidp, &result, 0, req);
4917
4918	if (error || !req->newptr)
4919		return (error);
4920
4921	if (result == 1) {
4922		adapter = (struct adapter *)arg1;
4923		em_print_debug_info(adapter);
4924	}
4925
4926	return (error);
4927}
4928
4929
4930static int
4931em_sysctl_stats(SYSCTL_HANDLER_ARGS)
4932{
4933	struct adapter *adapter;
4934	int error;
4935	int result;
4936
4937	result = -1;
4938	error = sysctl_handle_int(oidp, &result, 0, req);
4939
4940	if (error || !req->newptr)
4941		return (error);
4942
4943	if (result == 1) {
4944		adapter = (struct adapter *)arg1;
4945		em_print_hw_stats(adapter);
4946	}
4947
4948	return (error);
4949}
4950
4951static int
4952em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4953{
4954	struct em_int_delay_info *info;
4955	struct adapter *adapter;
4956	uint32_t regval;
4957	int error;
4958	int usecs;
4959	int ticks;
4960
4961	info = (struct em_int_delay_info *)arg1;
4962	usecs = info->value;
4963	error = sysctl_handle_int(oidp, &usecs, 0, req);
4964	if (error != 0 || req->newptr == NULL)
4965		return (error);
4966	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4967		return (EINVAL);
4968	info->value = usecs;
4969	ticks = EM_USECS_TO_TICKS(usecs);
4970
4971	adapter = info->adapter;
4972
4973	EM_LOCK(adapter);
4974	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4975	regval = (regval & ~0xffff) | (ticks & 0xffff);
4976	/* Handle a few special cases. */
4977	switch (info->offset) {
4978	case E1000_RDTR:
4979		break;
4980	case E1000_TIDV:
4981		if (ticks == 0) {
4982			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4983			/* Don't write 0 into the TIDV register. */
4984			regval++;
4985		} else
4986			if (adapter->hw.mac.type != e1000_82575)
4987				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4988		break;
4989	}
4990	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4991	EM_UNLOCK(adapter);
4992	return (0);
4993}
4994
4995static void
4996em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4997	const char *description, struct em_int_delay_info *info,
4998	int offset, int value)
4999{
5000	info->adapter = adapter;
5001	info->offset = offset;
5002	info->value = value;
5003	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5004	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5005	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5006	    info, 0, em_sysctl_int_delay, "I", description);
5007}
5008
5009#ifndef DEVICE_POLLING
5010static void
5011em_add_rx_process_limit(struct adapter *adapter, const char *name,
5012	const char *description, int *limit, int value)
5013{
5014	*limit = value;
5015	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5016	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5017	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5018}
5019#endif
5020