if_em.c revision 208103
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 208103 2010-05-14 22:18:34Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350#ifdef EM_MULTIQUEUE
351static int em_enable_msix = TRUE;
352static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
353#else
354static int em_enable_msix = FALSE;
355static int em_msix_queues = 0; /* disable */
356#endif
357TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/*
369** Shadow VFTA table, this is needed because
370** the real vlan filter table gets cleared during
371** a soft reset and the driver needs to be able
372** to repopulate it.
373*/
374static u32 em_shadow_vfta[EM_VFTA_SIZE];
375
376/* Global used in WOL setup with multiport cards */
377static int global_quad_port_a = 0;
378
379/*********************************************************************
380 *  Device identification routine
381 *
382 *  em_probe determines if the driver should be loaded on
383 *  adapter based on PCI vendor/device id of the adapter.
384 *
385 *  return BUS_PROBE_DEFAULT on success, positive on failure
386 *********************************************************************/
387
388static int
389em_probe(device_t dev)
390{
391	char		adapter_name[60];
392	u16		pci_vendor_id = 0;
393	u16		pci_device_id = 0;
394	u16		pci_subvendor_id = 0;
395	u16		pci_subdevice_id = 0;
396	em_vendor_info_t *ent;
397
398	INIT_DEBUGOUT("em_probe: begin");
399
400	pci_vendor_id = pci_get_vendor(dev);
401	if (pci_vendor_id != EM_VENDOR_ID)
402		return (ENXIO);
403
404	pci_device_id = pci_get_device(dev);
405	pci_subvendor_id = pci_get_subvendor(dev);
406	pci_subdevice_id = pci_get_subdevice(dev);
407
408	ent = em_vendor_info_array;
409	while (ent->vendor_id != 0) {
410		if ((pci_vendor_id == ent->vendor_id) &&
411		    (pci_device_id == ent->device_id) &&
412
413		    ((pci_subvendor_id == ent->subvendor_id) ||
414		    (ent->subvendor_id == PCI_ANY_ID)) &&
415
416		    ((pci_subdevice_id == ent->subdevice_id) ||
417		    (ent->subdevice_id == PCI_ANY_ID))) {
418			sprintf(adapter_name, "%s %s",
419				em_strings[ent->index],
420				em_driver_version);
421			device_set_desc_copy(dev, adapter_name);
422			return (BUS_PROBE_DEFAULT);
423		}
424		ent++;
425	}
426
427	return (ENXIO);
428}
429
430/*********************************************************************
431 *  Device initialization routine
432 *
433 *  The attach entry point is called when the driver is being loaded.
434 *  This routine identifies the type of hardware, allocates all resources
435 *  and initializes the hardware.
436 *
437 *  return 0 on success, positive on failure
438 *********************************************************************/
439
440static int
441em_attach(device_t dev)
442{
443	struct adapter	*adapter;
444	int		error = 0;
445
446	INIT_DEBUGOUT("em_attach: begin");
447
448	adapter = device_get_softc(dev);
449	adapter->dev = adapter->osdep.dev = dev;
450	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
451
452	/* SYSCTL stuff */
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_debug_info, "I", "Debug Information");
457
458	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
461	    em_sysctl_stats, "I", "Statistics");
462
463	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
464
465	/* Determine hardware and mac info */
466	em_identify_hardware(adapter);
467
468	/* Setup PCI resources */
469	if (em_allocate_pci_resources(adapter)) {
470		device_printf(dev, "Allocation of PCI resources failed\n");
471		error = ENXIO;
472		goto err_pci;
473	}
474
475	/*
476	** For ICH8 and family we need to
477	** map the flash memory, and this
478	** must happen after the MAC is
479	** identified
480	*/
481	if ((adapter->hw.mac.type == e1000_ich8lan) ||
482	    (adapter->hw.mac.type == e1000_pchlan) ||
483	    (adapter->hw.mac.type == e1000_ich9lan) ||
484	    (adapter->hw.mac.type == e1000_ich10lan)) {
485		int rid = EM_BAR_TYPE_FLASH;
486		adapter->flash = bus_alloc_resource_any(dev,
487		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
488		if (adapter->flash == NULL) {
489			device_printf(dev, "Mapping of Flash failed\n");
490			error = ENXIO;
491			goto err_pci;
492		}
493		/* This is used in the shared code */
494		adapter->hw.flash_address = (u8 *)adapter->flash;
495		adapter->osdep.flash_bus_space_tag =
496		    rman_get_bustag(adapter->flash);
497		adapter->osdep.flash_bus_space_handle =
498		    rman_get_bushandle(adapter->flash);
499	}
500
501	/* Do Shared Code initialization */
502	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
503		device_printf(dev, "Setup of Shared code failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	e1000_get_bus_info(&adapter->hw);
509
510	/* Set up some sysctls for the tunable interrupt delays */
511	em_add_int_delay_sysctl(adapter, "rx_int_delay",
512	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
514	em_add_int_delay_sysctl(adapter, "tx_int_delay",
515	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
516	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
518	    "receive interrupt delay limit in usecs",
519	    &adapter->rx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_RADV),
521	    em_rx_abs_int_delay_dflt);
522	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
523	    "transmit interrupt delay limit in usecs",
524	    &adapter->tx_abs_int_delay,
525	    E1000_REGISTER(&adapter->hw, E1000_TADV),
526	    em_tx_abs_int_delay_dflt);
527
528	/* Sysctls for limiting the amount of work done in the taskqueue */
529	em_add_rx_process_limit(adapter, "rx_processing_limit",
530	    "max number of rx packets to process", &adapter->rx_process_limit,
531	    em_rx_process_limit);
532
533	/*
534	 * Validate number of transmit and receive descriptors. It
535	 * must not exceed hardware maximum, and must be multiple
536	 * of E1000_DBA_ALIGN.
537	 */
538	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
539	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
540		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
541		    EM_DEFAULT_TXD, em_txd);
542		adapter->num_tx_desc = EM_DEFAULT_TXD;
543	} else
544		adapter->num_tx_desc = em_txd;
545
546	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
547	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    EM_DEFAULT_RXD, em_rxd);
550		adapter->num_rx_desc = EM_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = em_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
571
572	/*
573	 * This controls when hardware reports transmit completion
574	 * status.
575	 */
576	adapter->hw.mac.report_tx_early = 1;
577
578	/*
579	** Get queue/ring memory
580	*/
581	if (em_allocate_queues(adapter)) {
582		error = ENOMEM;
583		goto err_pci;
584	}
585
586	/*
587	** Start from a known state, this is
588	** important in reading the nvm and
589	** mac from that.
590	*/
591	e1000_reset_hw(&adapter->hw);
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_late;
605		}
606	}
607
608	/* Copy the permanent MAC address out of the EEPROM */
609	if (e1000_read_mac_addr(&adapter->hw) < 0) {
610		device_printf(dev, "EEPROM read error while reading MAC"
611		    " address\n");
612		error = EIO;
613		goto err_late;
614	}
615
616	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
617		device_printf(dev, "Invalid MAC address\n");
618		error = EIO;
619		goto err_late;
620	}
621
622	/*
623	**  Do interrupt configuration
624	*/
625	if (adapter->msix > 1) /* Do MSIX */
626		error = em_allocate_msix(adapter);
627	else  /* MSI or Legacy */
628		error = em_allocate_legacy(adapter);
629	if (error)
630		goto err_late;
631
632	/*
633	 * Get Wake-on-Lan and Management info for later use
634	 */
635	em_get_wakeup(dev);
636
637	/* Setup OS specific network interface */
638	em_setup_interface(dev, adapter);
639
640	em_reset(adapter);
641
642	/* Initialize statistics */
643	em_update_stats_counters(adapter);
644
645	adapter->hw.mac.get_link_status = 1;
646	em_update_link_status(adapter);
647
648	/* Indicate SOL/IDER usage */
649	if (e1000_check_reset_block(&adapter->hw))
650		device_printf(dev,
651		    "PHY reset is blocked due to SOL/IDER session.\n");
652
653	/* Register for VLAN events */
654	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
655	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
656	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
657	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
658
659	/* Non-AMT based hardware can now take control from firmware */
660	if (adapter->has_manage && !adapter->has_amt)
661		em_get_hw_control(adapter);
662
663	/* Tell the stack that the interface is not active */
664	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
665
666	adapter->led_dev = led_create(em_led_func, adapter,
667	    device_get_nameunit(dev));
668
669	INIT_DEBUGOUT("em_attach: end");
670
671	return (0);
672
673err_late:
674	em_free_transmit_structures(adapter);
675	em_free_receive_structures(adapter);
676	em_release_hw_control(adapter);
677err_pci:
678	em_free_pci_resources(adapter);
679	EM_CORE_LOCK_DESTROY(adapter);
680
681	return (error);
682}
683
684/*********************************************************************
685 *  Device removal routine
686 *
687 *  The detach entry point is called when the driver is being removed.
688 *  This routine stops the adapter and deallocates all the resources
689 *  that were allocated for driver operation.
690 *
691 *  return 0 on success, positive on failure
692 *********************************************************************/
693
694static int
695em_detach(device_t dev)
696{
697	struct adapter	*adapter = device_get_softc(dev);
698	struct ifnet	*ifp = adapter->ifp;
699
700	INIT_DEBUGOUT("em_detach: begin");
701
702	/* Make sure VLANS are not using driver */
703	if (adapter->ifp->if_vlantrunk != NULL) {
704		device_printf(dev,"Vlan in use, detach first\n");
705		return (EBUSY);
706	}
707
708#ifdef DEVICE_POLLING
709	if (ifp->if_capenable & IFCAP_POLLING)
710		ether_poll_deregister(ifp);
711#endif
712
713	EM_CORE_LOCK(adapter);
714	adapter->in_detach = 1;
715	em_stop(adapter);
716	EM_CORE_UNLOCK(adapter);
717	EM_CORE_LOCK_DESTROY(adapter);
718
719	e1000_phy_hw_reset(&adapter->hw);
720
721	em_release_manageability(adapter);
722	em_release_hw_control(adapter);
723
724	/* Unregister VLAN events */
725	if (adapter->vlan_attach != NULL)
726		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
727	if (adapter->vlan_detach != NULL)
728		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
729
730	ether_ifdetach(adapter->ifp);
731	callout_drain(&adapter->timer);
732
733	em_free_pci_resources(adapter);
734	bus_generic_detach(dev);
735	if_free(ifp);
736
737	em_free_transmit_structures(adapter);
738	em_free_receive_structures(adapter);
739
740	em_release_hw_control(adapter);
741
742	return (0);
743}
744
745/*********************************************************************
746 *
747 *  Shutdown entry point
748 *
749 **********************************************************************/
750
751static int
752em_shutdown(device_t dev)
753{
754	return em_suspend(dev);
755}
756
757/*
758 * Suspend/resume device methods.
759 */
760static int
761em_suspend(device_t dev)
762{
763	struct adapter *adapter = device_get_softc(dev);
764
765	EM_CORE_LOCK(adapter);
766
767        em_release_manageability(adapter);
768	em_release_hw_control(adapter);
769	em_enable_wakeup(dev);
770
771	EM_CORE_UNLOCK(adapter);
772
773	return bus_generic_suspend(dev);
774}
775
776static int
777em_resume(device_t dev)
778{
779	struct adapter *adapter = device_get_softc(dev);
780	struct ifnet *ifp = adapter->ifp;
781
782	if (adapter->led_dev != NULL)
783		led_destroy(adapter->led_dev);
784
785	EM_CORE_LOCK(adapter);
786	em_init_locked(adapter);
787	em_init_manageability(adapter);
788	EM_CORE_UNLOCK(adapter);
789	em_start(ifp);
790
791	return bus_generic_resume(dev);
792}
793
794
795/*********************************************************************
796 *  Transmit entry point
797 *
798 *  em_start is called by the stack to initiate a transmit.
799 *  The driver will remain in this routine as long as there are
800 *  packets to transmit and transmit resources are available.
801 *  In case resources are not available stack is notified and
802 *  the packet is requeued.
803 **********************************************************************/
804
805#ifdef EM_MULTIQUEUE
806static int
807em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
808{
809	struct adapter  *adapter = txr->adapter;
810        struct mbuf     *next;
811        int             err = 0, enq = 0;
812
813	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
814	    IFF_DRV_RUNNING || adapter->link_active == 0) {
815		if (m != NULL)
816			err = drbr_enqueue(ifp, txr->br, m);
817		return (err);
818	}
819
820        /* Call cleanup if number of TX descriptors low */
821	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
822		em_txeof(txr);
823
824	enq = 0;
825	if (m == NULL) {
826		next = drbr_dequeue(ifp, txr->br);
827	} else if (drbr_needs_enqueue(ifp, txr->br)) {
828		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
829			return (err);
830		next = drbr_dequeue(ifp, txr->br);
831	} else
832		next = m;
833
834	/* Process the queue */
835	while (next != NULL) {
836		if ((err = em_xmit(txr, &next)) != 0) {
837                        if (next != NULL)
838                                err = drbr_enqueue(ifp, txr->br, next);
839                        break;
840		}
841		enq++;
842		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
843		ETHER_BPF_MTAP(ifp, next);
844		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
845                        break;
846		if (txr->tx_avail < EM_MAX_SCATTER) {
847			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
848			break;
849		}
850		next = drbr_dequeue(ifp, txr->br);
851	}
852
853	if (enq > 0) {
854                /* Set the watchdog */
855                txr->watchdog_check = TRUE;
856		txr->watchdog_time = ticks;
857	}
858	return (err);
859}
860
861/*
862** Multiqueue capable stack interface, this is not
863** yet truely multiqueue, but that is coming...
864*/
865static int
866em_mq_start(struct ifnet *ifp, struct mbuf *m)
867{
868	struct adapter	*adapter = ifp->if_softc;
869	struct tx_ring	*txr;
870	int 		i, error = 0;
871
872	/* Which queue to use */
873	if ((m->m_flags & M_FLOWID) != 0)
874                i = m->m_pkthdr.flowid % adapter->num_queues;
875	else
876		i = curcpu % adapter->num_queues;
877
878	txr = &adapter->tx_rings[i];
879
880	if (EM_TX_TRYLOCK(txr)) {
881		error = em_mq_start_locked(ifp, txr, m);
882		EM_TX_UNLOCK(txr);
883	} else
884		error = drbr_enqueue(ifp, txr->br, m);
885
886	return (error);
887}
888
889/*
890** Flush all ring buffers
891*/
892static void
893em_qflush(struct ifnet *ifp)
894{
895	struct adapter  *adapter = ifp->if_softc;
896	struct tx_ring  *txr = adapter->tx_rings;
897	struct mbuf     *m;
898
899	for (int i = 0; i < adapter->num_queues; i++, txr++) {
900		EM_TX_LOCK(txr);
901		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902			m_freem(m);
903		EM_TX_UNLOCK(txr);
904	}
905	if_qflush(ifp);
906}
907
908#endif /* EM_MULTIQUEUE */
909
910static void
911em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
912{
913	struct adapter	*adapter = ifp->if_softc;
914	struct mbuf	*m_head;
915
916	EM_TX_LOCK_ASSERT(txr);
917
918	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
919	    IFF_DRV_RUNNING)
920		return;
921
922	if (!adapter->link_active)
923		return;
924
925        /* Call cleanup if number of TX descriptors low */
926	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
927		em_txeof(txr);
928
929	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
930		if (txr->tx_avail < EM_MAX_SCATTER) {
931			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
932			break;
933		}
934                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
935		if (m_head == NULL)
936			break;
937		/*
938		 *  Encapsulation can modify our pointer, and or make it
939		 *  NULL on failure.  In that event, we can't requeue.
940		 */
941		if (em_xmit(txr, &m_head)) {
942			if (m_head == NULL)
943				break;
944			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
945			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
946			break;
947		}
948
949		/* Send a copy of the frame to the BPF listener */
950		ETHER_BPF_MTAP(ifp, m_head);
951
952		/* Set timeout in case hardware has problems transmitting. */
953		txr->watchdog_time = ticks;
954		txr->watchdog_check = TRUE;
955	}
956
957	return;
958}
959
960static void
961em_start(struct ifnet *ifp)
962{
963	struct adapter	*adapter = ifp->if_softc;
964	struct tx_ring	*txr = adapter->tx_rings;
965
966	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
967		EM_TX_LOCK(txr);
968		em_start_locked(ifp, txr);
969		EM_TX_UNLOCK(txr);
970	}
971	return;
972}
973
974/*********************************************************************
975 *  Ioctl entry point
976 *
977 *  em_ioctl is called when the user wants to configure the
978 *  interface.
979 *
980 *  return 0 on success, positive on failure
981 **********************************************************************/
982
983static int
984em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
985{
986	struct adapter	*adapter = ifp->if_softc;
987	struct ifreq *ifr = (struct ifreq *)data;
988#ifdef INET
989	struct ifaddr *ifa = (struct ifaddr *)data;
990#endif
991	int error = 0;
992
993	if (adapter->in_detach)
994		return (error);
995
996	switch (command) {
997	case SIOCSIFADDR:
998#ifdef INET
999		if (ifa->ifa_addr->sa_family == AF_INET) {
1000			/*
1001			 * XXX
1002			 * Since resetting hardware takes a very long time
1003			 * and results in link renegotiation we only
1004			 * initialize the hardware only when it is absolutely
1005			 * required.
1006			 */
1007			ifp->if_flags |= IFF_UP;
1008			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1009				EM_CORE_LOCK(adapter);
1010				em_init_locked(adapter);
1011				EM_CORE_UNLOCK(adapter);
1012			}
1013			arp_ifinit(ifp, ifa);
1014		} else
1015#endif
1016			error = ether_ioctl(ifp, command, data);
1017		break;
1018	case SIOCSIFMTU:
1019	    {
1020		int max_frame_size;
1021
1022		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1023
1024		EM_CORE_LOCK(adapter);
1025		switch (adapter->hw.mac.type) {
1026		case e1000_82571:
1027		case e1000_82572:
1028		case e1000_ich9lan:
1029		case e1000_ich10lan:
1030		case e1000_82574:
1031		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1032			max_frame_size = 9234;
1033			break;
1034		case e1000_pchlan:
1035			max_frame_size = 4096;
1036			break;
1037			/* Adapters that do not support jumbo frames */
1038		case e1000_82583:
1039		case e1000_ich8lan:
1040			max_frame_size = ETHER_MAX_LEN;
1041			break;
1042		default:
1043			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1044		}
1045		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1046		    ETHER_CRC_LEN) {
1047			EM_CORE_UNLOCK(adapter);
1048			error = EINVAL;
1049			break;
1050		}
1051
1052		ifp->if_mtu = ifr->ifr_mtu;
1053		adapter->max_frame_size =
1054		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1055		em_init_locked(adapter);
1056		EM_CORE_UNLOCK(adapter);
1057		break;
1058	    }
1059	case SIOCSIFFLAGS:
1060		IOCTL_DEBUGOUT("ioctl rcv'd:\
1061		    SIOCSIFFLAGS (Set Interface Flags)");
1062		EM_CORE_LOCK(adapter);
1063		if (ifp->if_flags & IFF_UP) {
1064			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1065				if ((ifp->if_flags ^ adapter->if_flags) &
1066				    (IFF_PROMISC | IFF_ALLMULTI)) {
1067					em_disable_promisc(adapter);
1068					em_set_promisc(adapter);
1069				}
1070			} else
1071				em_init_locked(adapter);
1072		} else
1073			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1074				em_stop(adapter);
1075		adapter->if_flags = ifp->if_flags;
1076		EM_CORE_UNLOCK(adapter);
1077		break;
1078	case SIOCADDMULTI:
1079	case SIOCDELMULTI:
1080		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1081		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1082			EM_CORE_LOCK(adapter);
1083			em_disable_intr(adapter);
1084			em_set_multi(adapter);
1085#ifdef DEVICE_POLLING
1086			if (!(ifp->if_capenable & IFCAP_POLLING))
1087#endif
1088				em_enable_intr(adapter);
1089			EM_CORE_UNLOCK(adapter);
1090		}
1091		break;
1092	case SIOCSIFMEDIA:
1093		/* Check SOL/IDER usage */
1094		EM_CORE_LOCK(adapter);
1095		if (e1000_check_reset_block(&adapter->hw)) {
1096			EM_CORE_UNLOCK(adapter);
1097			device_printf(adapter->dev, "Media change is"
1098			    " blocked due to SOL/IDER session.\n");
1099			break;
1100		}
1101		EM_CORE_UNLOCK(adapter);
1102	case SIOCGIFMEDIA:
1103		IOCTL_DEBUGOUT("ioctl rcv'd: \
1104		    SIOCxIFMEDIA (Get/Set Interface Media)");
1105		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1106		break;
1107	case SIOCSIFCAP:
1108	    {
1109		int mask, reinit;
1110
1111		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1112		reinit = 0;
1113		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1114#ifdef DEVICE_POLLING
1115		if (mask & IFCAP_POLLING) {
1116			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1117				error = ether_poll_register(em_poll, ifp);
1118				if (error)
1119					return (error);
1120				EM_CORE_LOCK(adapter);
1121				em_disable_intr(adapter);
1122				ifp->if_capenable |= IFCAP_POLLING;
1123				EM_CORE_UNLOCK(adapter);
1124			} else {
1125				error = ether_poll_deregister(ifp);
1126				/* Enable interrupt even in error case */
1127				EM_CORE_LOCK(adapter);
1128				em_enable_intr(adapter);
1129				ifp->if_capenable &= ~IFCAP_POLLING;
1130				EM_CORE_UNLOCK(adapter);
1131			}
1132		}
1133#endif
1134		if (mask & IFCAP_HWCSUM) {
1135			ifp->if_capenable ^= IFCAP_HWCSUM;
1136			reinit = 1;
1137		}
1138		if (mask & IFCAP_TSO4) {
1139			ifp->if_capenable ^= IFCAP_TSO4;
1140			reinit = 1;
1141		}
1142		if (mask & IFCAP_VLAN_HWTAGGING) {
1143			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1144			reinit = 1;
1145		}
1146		if (mask & IFCAP_VLAN_HWFILTER) {
1147			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1148			reinit = 1;
1149		}
1150		if ((mask & IFCAP_WOL) &&
1151		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1152			if (mask & IFCAP_WOL_MCAST)
1153				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1154			if (mask & IFCAP_WOL_MAGIC)
1155				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1156		}
1157		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1158			em_init(adapter);
1159		VLAN_CAPABILITIES(ifp);
1160		break;
1161	    }
1162
1163	default:
1164		error = ether_ioctl(ifp, command, data);
1165		break;
1166	}
1167
1168	return (error);
1169}
1170
1171
1172/*********************************************************************
1173 *  Init entry point
1174 *
1175 *  This routine is used in two ways. It is used by the stack as
1176 *  init entry point in network interface structure. It is also used
1177 *  by the driver as a hw/sw initialization routine to get to a
1178 *  consistent state.
1179 *
1180 *  return 0 on success, positive on failure
1181 **********************************************************************/
1182
1183static void
1184em_init_locked(struct adapter *adapter)
1185{
1186	struct ifnet	*ifp = adapter->ifp;
1187	device_t	dev = adapter->dev;
1188	u32		pba;
1189
1190	INIT_DEBUGOUT("em_init: begin");
1191
1192	EM_CORE_LOCK_ASSERT(adapter);
1193
1194	em_disable_intr(adapter);
1195	callout_stop(&adapter->timer);
1196
1197	/*
1198	 * Packet Buffer Allocation (PBA)
1199	 * Writing PBA sets the receive portion of the buffer
1200	 * the remainder is used for the transmit buffer.
1201	 */
1202	switch (adapter->hw.mac.type) {
1203	/* Total Packet Buffer on these is 48K */
1204	case e1000_82571:
1205	case e1000_82572:
1206	case e1000_80003es2lan:
1207			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1208		break;
1209	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1210			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1211		break;
1212	case e1000_82574:
1213	case e1000_82583:
1214			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1215		break;
1216	case e1000_ich9lan:
1217	case e1000_ich10lan:
1218	case e1000_pchlan:
1219		pba = E1000_PBA_10K;
1220		break;
1221	case e1000_ich8lan:
1222		pba = E1000_PBA_8K;
1223		break;
1224	default:
1225		if (adapter->max_frame_size > 8192)
1226			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1227		else
1228			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1229	}
1230
1231	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1232	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1233
1234	/* Get the latest mac address, User can use a LAA */
1235        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1236              ETHER_ADDR_LEN);
1237
1238	/* Put the address into the Receive Address Array */
1239	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1240
1241	/*
1242	 * With the 82571 adapter, RAR[0] may be overwritten
1243	 * when the other port is reset, we make a duplicate
1244	 * in RAR[14] for that eventuality, this assures
1245	 * the interface continues to function.
1246	 */
1247	if (adapter->hw.mac.type == e1000_82571) {
1248		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1249		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1250		    E1000_RAR_ENTRIES - 1);
1251	}
1252
1253	/* Initialize the hardware */
1254	em_reset(adapter);
1255	em_update_link_status(adapter);
1256
1257	/* Setup VLAN support, basic and offload if available */
1258	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1259
1260	/* Use real VLAN Filter support? */
1261	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1262		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1263			/* Use real VLAN Filter support */
1264			em_setup_vlan_hw_support(adapter);
1265		else {
1266			u32 ctrl;
1267			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1268			ctrl |= E1000_CTRL_VME;
1269			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1270		}
1271	}
1272
1273	/* Set hardware offload abilities */
1274	ifp->if_hwassist = 0;
1275	if (ifp->if_capenable & IFCAP_TXCSUM)
1276		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1277	if (ifp->if_capenable & IFCAP_TSO4)
1278		ifp->if_hwassist |= CSUM_TSO;
1279
1280	/* Configure for OS presence */
1281	em_init_manageability(adapter);
1282
1283	/* Prepare transmit descriptors and buffers */
1284	em_setup_transmit_structures(adapter);
1285	em_initialize_transmit_unit(adapter);
1286
1287	/* Setup Multicast table */
1288	em_set_multi(adapter);
1289
1290	/* Prepare receive descriptors and buffers */
1291	if (em_setup_receive_structures(adapter)) {
1292		device_printf(dev, "Could not setup receive structures\n");
1293		em_stop(adapter);
1294		return;
1295	}
1296	em_initialize_receive_unit(adapter);
1297
1298	/* Don't lose promiscuous settings */
1299	em_set_promisc(adapter);
1300
1301	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1302	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1303
1304	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1305	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1306
1307	/* MSI/X configuration for 82574 */
1308	if (adapter->hw.mac.type == e1000_82574) {
1309		int tmp;
1310		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1311		tmp |= E1000_CTRL_EXT_PBA_CLR;
1312		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1313		/* Set the IVAR - interrupt vector routing. */
1314		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1315	}
1316
1317#ifdef DEVICE_POLLING
1318	/*
1319	 * Only enable interrupts if we are not polling, make sure
1320	 * they are off otherwise.
1321	 */
1322	if (ifp->if_capenable & IFCAP_POLLING)
1323		em_disable_intr(adapter);
1324	else
1325#endif /* DEVICE_POLLING */
1326		em_enable_intr(adapter);
1327
1328	/* AMT based hardware can now take control from firmware */
1329	if (adapter->has_manage && adapter->has_amt)
1330		em_get_hw_control(adapter);
1331
1332	/* Don't reset the phy next time init gets called */
1333	adapter->hw.phy.reset_disable = TRUE;
1334}
1335
1336static void
1337em_init(void *arg)
1338{
1339	struct adapter *adapter = arg;
1340
1341	EM_CORE_LOCK(adapter);
1342	em_init_locked(adapter);
1343	EM_CORE_UNLOCK(adapter);
1344}
1345
1346
1347#ifdef DEVICE_POLLING
1348/*********************************************************************
1349 *
1350 *  Legacy polling routine: note this only works with single queue
1351 *
1352 *********************************************************************/
1353static int
1354em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1355{
1356	struct adapter *adapter = ifp->if_softc;
1357	struct tx_ring	*txr = adapter->tx_rings;
1358	struct rx_ring	*rxr = adapter->rx_rings;
1359	u32		reg_icr, rx_done = 0;
1360
1361	EM_CORE_LOCK(adapter);
1362	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1363		EM_CORE_UNLOCK(adapter);
1364		return (rx_done);
1365	}
1366
1367	if (cmd == POLL_AND_CHECK_STATUS) {
1368		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1369		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1370			callout_stop(&adapter->timer);
1371			adapter->hw.mac.get_link_status = 1;
1372			em_update_link_status(adapter);
1373			callout_reset(&adapter->timer, hz,
1374			    em_local_timer, adapter);
1375		}
1376	}
1377	EM_CORE_UNLOCK(adapter);
1378
1379	rx_done = em_rxeof(rxr, count);
1380
1381	EM_TX_LOCK(txr);
1382	em_txeof(txr);
1383#ifdef EM_MULTIQUEUE
1384	if (!drbr_empty(ifp, txr->br))
1385		em_mq_start_locked(ifp, txr, NULL);
1386#else
1387	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1388		em_start_locked(ifp, txr);
1389#endif
1390	EM_TX_UNLOCK(txr);
1391
1392	return (rx_done);
1393}
1394#endif /* DEVICE_POLLING */
1395
1396
1397/*********************************************************************
1398 *
1399 *  Fast Legacy/MSI Combined Interrupt Service routine
1400 *
1401 *********************************************************************/
1402static int
1403em_irq_fast(void *arg)
1404{
1405	struct adapter	*adapter = arg;
1406	struct ifnet	*ifp;
1407	u32		reg_icr;
1408
1409	ifp = adapter->ifp;
1410
1411	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1412
1413	/* Hot eject?  */
1414	if (reg_icr == 0xffffffff)
1415		return FILTER_STRAY;
1416
1417	/* Definitely not our interrupt.  */
1418	if (reg_icr == 0x0)
1419		return FILTER_STRAY;
1420
1421	/*
1422	 * Starting with the 82571 chip, bit 31 should be used to
1423	 * determine whether the interrupt belongs to us.
1424	 */
1425	if (adapter->hw.mac.type >= e1000_82571 &&
1426	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1427		return FILTER_STRAY;
1428
1429	em_disable_intr(adapter);
1430	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1431
1432	/* Link status change */
1433	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1434		adapter->hw.mac.get_link_status = 1;
1435		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1436	}
1437
1438	if (reg_icr & E1000_ICR_RXO)
1439		adapter->rx_overruns++;
1440	return FILTER_HANDLED;
1441}
1442
1443/* Combined RX/TX handler, used by Legacy and MSI */
1444static void
1445em_handle_que(void *context, int pending)
1446{
1447	struct adapter	*adapter = context;
1448	struct ifnet	*ifp = adapter->ifp;
1449	struct tx_ring	*txr = adapter->tx_rings;
1450	struct rx_ring	*rxr = adapter->rx_rings;
1451	bool		more_rx;
1452
1453
1454	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1455		more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1456
1457		EM_TX_LOCK(txr);
1458		em_txeof(txr);
1459#ifdef EM_MULTIQUEUE
1460		if (!drbr_empty(ifp, txr->br))
1461			em_mq_start_locked(ifp, txr, NULL);
1462#else
1463		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1464			em_start_locked(ifp, txr);
1465#endif
1466		EM_TX_UNLOCK(txr);
1467		if (more_rx) {
1468			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1469			return;
1470		}
1471	}
1472
1473	em_enable_intr(adapter);
1474	return;
1475}
1476
1477
1478/*********************************************************************
1479 *
1480 *  MSIX Interrupt Service Routines
1481 *
1482 **********************************************************************/
1483static void
1484em_msix_tx(void *arg)
1485{
1486	struct tx_ring *txr = arg;
1487	struct adapter *adapter = txr->adapter;
1488	bool		more;
1489
1490	++txr->tx_irq;
1491	EM_TX_LOCK(txr);
1492	more = em_txeof(txr);
1493	EM_TX_UNLOCK(txr);
1494	if (more)
1495		taskqueue_enqueue(txr->tq, &txr->tx_task);
1496	else
1497		/* Reenable this interrupt */
1498		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1499	return;
1500}
1501
1502/*********************************************************************
1503 *
1504 *  MSIX RX Interrupt Service routine
1505 *
1506 **********************************************************************/
1507
1508static void
1509em_msix_rx(void *arg)
1510{
1511	struct rx_ring	*rxr = arg;
1512	struct adapter	*adapter = rxr->adapter;
1513	bool		more;
1514
1515	++rxr->rx_irq;
1516	more = em_rxeof(rxr, adapter->rx_process_limit);
1517	if (more)
1518		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1519	else
1520		/* Reenable this interrupt */
1521		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1522	return;
1523}
1524
1525/*********************************************************************
1526 *
1527 *  MSIX Link Fast Interrupt Service routine
1528 *
1529 **********************************************************************/
1530static void
1531em_msix_link(void *arg)
1532{
1533	struct adapter	*adapter = arg;
1534	u32		reg_icr;
1535
1536	++adapter->link_irq;
1537	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538
1539	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540		adapter->hw.mac.get_link_status = 1;
1541		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1542	} else
1543		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1544		    EM_MSIX_LINK | E1000_IMS_LSC);
1545	return;
1546}
1547
1548static void
1549em_handle_rx(void *context, int pending)
1550{
1551	struct rx_ring	*rxr = context;
1552	struct adapter	*adapter = rxr->adapter;
1553        bool            more;
1554
1555	more = em_rxeof(rxr, adapter->rx_process_limit);
1556	if (more)
1557		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1558	else
1559		/* Reenable this interrupt */
1560		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1561}
1562
1563static void
1564em_handle_tx(void *context, int pending)
1565{
1566	struct tx_ring	*txr = context;
1567	struct adapter	*adapter = txr->adapter;
1568	struct ifnet	*ifp = adapter->ifp;
1569
1570	if (!EM_TX_TRYLOCK(txr))
1571		return;
1572
1573	em_txeof(txr);
1574
1575#ifdef EM_MULTIQUEUE
1576	if (!drbr_empty(ifp, txr->br))
1577		em_mq_start_locked(ifp, txr, NULL);
1578#else
1579	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580		em_start_locked(ifp, txr);
1581#endif
1582	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1583	EM_TX_UNLOCK(txr);
1584}
1585
1586static void
1587em_handle_link(void *context, int pending)
1588{
1589	struct adapter	*adapter = context;
1590	struct ifnet *ifp = adapter->ifp;
1591
1592	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1593		return;
1594
1595	EM_CORE_LOCK(adapter);
1596	callout_stop(&adapter->timer);
1597	em_update_link_status(adapter);
1598	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1599	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1600	    EM_MSIX_LINK | E1000_IMS_LSC);
1601	EM_CORE_UNLOCK(adapter);
1602}
1603
1604
1605/*********************************************************************
1606 *
1607 *  Media Ioctl callback
1608 *
1609 *  This routine is called whenever the user queries the status of
1610 *  the interface using ifconfig.
1611 *
1612 **********************************************************************/
1613static void
1614em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1615{
1616	struct adapter *adapter = ifp->if_softc;
1617	u_char fiber_type = IFM_1000_SX;
1618
1619	INIT_DEBUGOUT("em_media_status: begin");
1620
1621	EM_CORE_LOCK(adapter);
1622	em_update_link_status(adapter);
1623
1624	ifmr->ifm_status = IFM_AVALID;
1625	ifmr->ifm_active = IFM_ETHER;
1626
1627	if (!adapter->link_active) {
1628		EM_CORE_UNLOCK(adapter);
1629		return;
1630	}
1631
1632	ifmr->ifm_status |= IFM_ACTIVE;
1633
1634	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1635	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1636		ifmr->ifm_active |= fiber_type | IFM_FDX;
1637	} else {
1638		switch (adapter->link_speed) {
1639		case 10:
1640			ifmr->ifm_active |= IFM_10_T;
1641			break;
1642		case 100:
1643			ifmr->ifm_active |= IFM_100_TX;
1644			break;
1645		case 1000:
1646			ifmr->ifm_active |= IFM_1000_T;
1647			break;
1648		}
1649		if (adapter->link_duplex == FULL_DUPLEX)
1650			ifmr->ifm_active |= IFM_FDX;
1651		else
1652			ifmr->ifm_active |= IFM_HDX;
1653	}
1654	EM_CORE_UNLOCK(adapter);
1655}
1656
1657/*********************************************************************
1658 *
1659 *  Media Ioctl callback
1660 *
1661 *  This routine is called when the user changes speed/duplex using
1662 *  media/mediopt option with ifconfig.
1663 *
1664 **********************************************************************/
1665static int
1666em_media_change(struct ifnet *ifp)
1667{
1668	struct adapter *adapter = ifp->if_softc;
1669	struct ifmedia  *ifm = &adapter->media;
1670
1671	INIT_DEBUGOUT("em_media_change: begin");
1672
1673	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1674		return (EINVAL);
1675
1676	EM_CORE_LOCK(adapter);
1677	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1678	case IFM_AUTO:
1679		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1680		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1681		break;
1682	case IFM_1000_LX:
1683	case IFM_1000_SX:
1684	case IFM_1000_T:
1685		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1686		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1687		break;
1688	case IFM_100_TX:
1689		adapter->hw.mac.autoneg = FALSE;
1690		adapter->hw.phy.autoneg_advertised = 0;
1691		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1692			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1693		else
1694			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1695		break;
1696	case IFM_10_T:
1697		adapter->hw.mac.autoneg = FALSE;
1698		adapter->hw.phy.autoneg_advertised = 0;
1699		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1700			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1701		else
1702			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1703		break;
1704	default:
1705		device_printf(adapter->dev, "Unsupported media type\n");
1706	}
1707
1708	/* As the speed/duplex settings my have changed we need to
1709	 * reset the PHY.
1710	 */
1711	adapter->hw.phy.reset_disable = FALSE;
1712
1713	em_init_locked(adapter);
1714	EM_CORE_UNLOCK(adapter);
1715
1716	return (0);
1717}
1718
1719/*********************************************************************
1720 *
1721 *  This routine maps the mbufs to tx descriptors.
1722 *
1723 *  return 0 on success, positive on failure
1724 **********************************************************************/
1725
1726static int
1727em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1728{
1729	struct adapter		*adapter = txr->adapter;
1730	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1731	bus_dmamap_t		map;
1732	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1733	struct e1000_tx_desc	*ctxd = NULL;
1734	struct mbuf		*m_head;
1735	u32			txd_upper, txd_lower, txd_used, txd_saved;
1736	int			nsegs, i, j, first, last = 0;
1737	int			error, do_tso, tso_desc = 0;
1738
1739	m_head = *m_headp;
1740	txd_upper = txd_lower = txd_used = txd_saved = 0;
1741	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1742
1743	/*
1744	 * TSO workaround:
1745	 *  If an mbuf is only header we need
1746	 *     to pull 4 bytes of data into it.
1747	 */
1748	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1749		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1750		*m_headp = m_head;
1751		if (m_head == NULL)
1752			return (ENOBUFS);
1753	}
1754
1755	/*
1756	 * Map the packet for DMA
1757	 *
1758	 * Capture the first descriptor index,
1759	 * this descriptor will have the index
1760	 * of the EOP which is the only one that
1761	 * now gets a DONE bit writeback.
1762	 */
1763	first = txr->next_avail_desc;
1764	tx_buffer = &txr->tx_buffers[first];
1765	tx_buffer_mapped = tx_buffer;
1766	map = tx_buffer->map;
1767
1768	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1769	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1770
1771	/*
1772	 * There are two types of errors we can (try) to handle:
1773	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1774	 *   out of segments.  Defragment the mbuf chain and try again.
1775	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1776	 *   at this point in time.  Defer sending and try again later.
1777	 * All other errors, in particular EINVAL, are fatal and prevent the
1778	 * mbuf chain from ever going through.  Drop it and report error.
1779	 */
1780	if (error == EFBIG) {
1781		struct mbuf *m;
1782
1783		m = m_defrag(*m_headp, M_DONTWAIT);
1784		if (m == NULL) {
1785			adapter->mbuf_alloc_failed++;
1786			m_freem(*m_headp);
1787			*m_headp = NULL;
1788			return (ENOBUFS);
1789		}
1790		*m_headp = m;
1791
1792		/* Try it again */
1793		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1794		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1795
1796		if (error) {
1797			adapter->no_tx_dma_setup++;
1798			m_freem(*m_headp);
1799			*m_headp = NULL;
1800			return (error);
1801		}
1802	} else if (error != 0) {
1803		adapter->no_tx_dma_setup++;
1804		return (error);
1805	}
1806
1807	/*
1808	 * TSO Hardware workaround, if this packet is not
1809	 * TSO, and is only a single descriptor long, and
1810	 * it follows a TSO burst, then we need to add a
1811	 * sentinel descriptor to prevent premature writeback.
1812	 */
1813	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1814		if (nsegs == 1)
1815			tso_desc = TRUE;
1816		txr->tx_tso = FALSE;
1817	}
1818
1819        if (nsegs > (txr->tx_avail - 2)) {
1820                txr->no_desc_avail++;
1821		bus_dmamap_unload(txr->txtag, map);
1822		return (ENOBUFS);
1823        }
1824	m_head = *m_headp;
1825
1826	/* Do hardware assists */
1827#if __FreeBSD_version >= 700000
1828	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1829		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1830		if (error != TRUE)
1831			return (ENXIO); /* something foobar */
1832		/* we need to make a final sentinel transmit desc */
1833		tso_desc = TRUE;
1834	} else
1835#endif
1836	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1837		em_transmit_checksum_setup(txr,  m_head,
1838		    &txd_upper, &txd_lower);
1839
1840	i = txr->next_avail_desc;
1841
1842	/* Set up our transmit descriptors */
1843	for (j = 0; j < nsegs; j++) {
1844		bus_size_t seg_len;
1845		bus_addr_t seg_addr;
1846
1847		tx_buffer = &txr->tx_buffers[i];
1848		ctxd = &txr->tx_base[i];
1849		seg_addr = segs[j].ds_addr;
1850		seg_len  = segs[j].ds_len;
1851		/*
1852		** TSO Workaround:
1853		** If this is the last descriptor, we want to
1854		** split it so we have a small final sentinel
1855		*/
1856		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1857			seg_len -= 4;
1858			ctxd->buffer_addr = htole64(seg_addr);
1859			ctxd->lower.data = htole32(
1860			adapter->txd_cmd | txd_lower | seg_len);
1861			ctxd->upper.data =
1862			    htole32(txd_upper);
1863			if (++i == adapter->num_tx_desc)
1864				i = 0;
1865			/* Now make the sentinel */
1866			++txd_used; /* using an extra txd */
1867			ctxd = &txr->tx_base[i];
1868			tx_buffer = &txr->tx_buffers[i];
1869			ctxd->buffer_addr =
1870			    htole64(seg_addr + seg_len);
1871			ctxd->lower.data = htole32(
1872			adapter->txd_cmd | txd_lower | 4);
1873			ctxd->upper.data =
1874			    htole32(txd_upper);
1875			last = i;
1876			if (++i == adapter->num_tx_desc)
1877				i = 0;
1878		} else {
1879			ctxd->buffer_addr = htole64(seg_addr);
1880			ctxd->lower.data = htole32(
1881			adapter->txd_cmd | txd_lower | seg_len);
1882			ctxd->upper.data =
1883			    htole32(txd_upper);
1884			last = i;
1885			if (++i == adapter->num_tx_desc)
1886				i = 0;
1887		}
1888		tx_buffer->m_head = NULL;
1889		tx_buffer->next_eop = -1;
1890	}
1891
1892	txr->next_avail_desc = i;
1893	txr->tx_avail -= nsegs;
1894	if (tso_desc) /* TSO used an extra for sentinel */
1895		txr->tx_avail -= txd_used;
1896
1897	if (m_head->m_flags & M_VLANTAG) {
1898		/* Set the vlan id. */
1899		ctxd->upper.fields.special =
1900		    htole16(m_head->m_pkthdr.ether_vtag);
1901                /* Tell hardware to add tag */
1902                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1903        }
1904
1905        tx_buffer->m_head = m_head;
1906	tx_buffer_mapped->map = tx_buffer->map;
1907	tx_buffer->map = map;
1908        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1909
1910        /*
1911         * Last Descriptor of Packet
1912	 * needs End Of Packet (EOP)
1913	 * and Report Status (RS)
1914         */
1915        ctxd->lower.data |=
1916	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1917	/*
1918	 * Keep track in the first buffer which
1919	 * descriptor will be written back
1920	 */
1921	tx_buffer = &txr->tx_buffers[first];
1922	tx_buffer->next_eop = last;
1923
1924	/*
1925	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1926	 * that this frame is available to transmit.
1927	 */
1928	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1929	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1930	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1931
1932	return (0);
1933}
1934
1935static void
1936em_set_promisc(struct adapter *adapter)
1937{
1938	struct ifnet	*ifp = adapter->ifp;
1939	u32		reg_rctl;
1940
1941	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1942
1943	if (ifp->if_flags & IFF_PROMISC) {
1944		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1945		/* Turn this on if you want to see bad packets */
1946		if (em_debug_sbp)
1947			reg_rctl |= E1000_RCTL_SBP;
1948		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1949	} else if (ifp->if_flags & IFF_ALLMULTI) {
1950		reg_rctl |= E1000_RCTL_MPE;
1951		reg_rctl &= ~E1000_RCTL_UPE;
1952		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1953	}
1954}
1955
1956static void
1957em_disable_promisc(struct adapter *adapter)
1958{
1959	u32	reg_rctl;
1960
1961	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1962
1963	reg_rctl &=  (~E1000_RCTL_UPE);
1964	reg_rctl &=  (~E1000_RCTL_MPE);
1965	reg_rctl &=  (~E1000_RCTL_SBP);
1966	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1967}
1968
1969
1970/*********************************************************************
1971 *  Multicast Update
1972 *
1973 *  This routine is called whenever multicast address list is updated.
1974 *
1975 **********************************************************************/
1976
1977static void
1978em_set_multi(struct adapter *adapter)
1979{
1980	struct ifnet	*ifp = adapter->ifp;
1981	struct ifmultiaddr *ifma;
1982	u32 reg_rctl = 0;
1983	u8  *mta; /* Multicast array memory */
1984	int mcnt = 0;
1985
1986	IOCTL_DEBUGOUT("em_set_multi: begin");
1987
1988	if (adapter->hw.mac.type == e1000_82542 &&
1989	    adapter->hw.revision_id == E1000_REVISION_2) {
1990		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1991		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1992			e1000_pci_clear_mwi(&adapter->hw);
1993		reg_rctl |= E1000_RCTL_RST;
1994		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1995		msec_delay(5);
1996	}
1997
1998	/* Allocate temporary memory to setup array */
1999	mta = malloc(sizeof(u8) *
2000	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2001	    M_DEVBUF, M_NOWAIT | M_ZERO);
2002	if (mta == NULL)
2003		panic("em_set_multi memory failure\n");
2004
2005#if __FreeBSD_version < 800000
2006	IF_ADDR_LOCK(ifp);
2007#else
2008	if_maddr_rlock(ifp);
2009#endif
2010	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011		if (ifma->ifma_addr->sa_family != AF_LINK)
2012			continue;
2013
2014		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2015			break;
2016
2017		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2018		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2019		mcnt++;
2020	}
2021#if __FreeBSD_version < 800000
2022	IF_ADDR_UNLOCK(ifp);
2023#else
2024	if_maddr_runlock(ifp);
2025#endif
2026	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2027		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2028		reg_rctl |= E1000_RCTL_MPE;
2029		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2030	} else
2031		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2032
2033	if (adapter->hw.mac.type == e1000_82542 &&
2034	    adapter->hw.revision_id == E1000_REVISION_2) {
2035		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2036		reg_rctl &= ~E1000_RCTL_RST;
2037		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2038		msec_delay(5);
2039		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2040			e1000_pci_set_mwi(&adapter->hw);
2041	}
2042	free(mta, M_DEVBUF);
2043}
2044
2045
2046/*********************************************************************
2047 *  Timer routine
2048 *
2049 *  This routine checks for link status and updates statistics.
2050 *
2051 **********************************************************************/
2052
2053static void
2054em_local_timer(void *arg)
2055{
2056	struct adapter	*adapter = arg;
2057	struct ifnet	*ifp = adapter->ifp;
2058	struct tx_ring	*txr = adapter->tx_rings;
2059
2060	EM_CORE_LOCK_ASSERT(adapter);
2061
2062	em_update_link_status(adapter);
2063	em_update_stats_counters(adapter);
2064
2065	/* Reset LAA into RAR[0] on 82571 */
2066	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2067		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2068
2069	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2070		em_print_hw_stats(adapter);
2071
2072	/*
2073	** Check for time since any descriptor was cleaned
2074	*/
2075	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2076		EM_TX_LOCK(txr);
2077		if (txr->watchdog_check == FALSE) {
2078			EM_TX_UNLOCK(txr);
2079			continue;
2080		}
2081		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2082			goto hung;
2083		EM_TX_UNLOCK(txr);
2084	}
2085
2086	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2087	return;
2088hung:
2089	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2090	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2091	adapter->watchdog_events++;
2092	EM_TX_UNLOCK(txr);
2093	em_init_locked(adapter);
2094}
2095
2096
2097static void
2098em_update_link_status(struct adapter *adapter)
2099{
2100	struct e1000_hw *hw = &adapter->hw;
2101	struct ifnet *ifp = adapter->ifp;
2102	device_t dev = adapter->dev;
2103	u32 link_check = 0;
2104
2105	/* Get the cached link value or read phy for real */
2106	switch (hw->phy.media_type) {
2107	case e1000_media_type_copper:
2108		if (hw->mac.get_link_status) {
2109			/* Do the work to read phy */
2110			e1000_check_for_link(hw);
2111			link_check = !hw->mac.get_link_status;
2112			if (link_check) /* ESB2 fix */
2113				e1000_cfg_on_link_up(hw);
2114		} else
2115			link_check = TRUE;
2116		break;
2117	case e1000_media_type_fiber:
2118		e1000_check_for_link(hw);
2119		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2120                                 E1000_STATUS_LU);
2121		break;
2122	case e1000_media_type_internal_serdes:
2123		e1000_check_for_link(hw);
2124		link_check = adapter->hw.mac.serdes_has_link;
2125		break;
2126	default:
2127	case e1000_media_type_unknown:
2128		break;
2129	}
2130
2131	/* Now check for a transition */
2132	if (link_check && (adapter->link_active == 0)) {
2133		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2134		    &adapter->link_duplex);
2135		/* Check if we must disable SPEED_MODE bit on PCI-E */
2136		if ((adapter->link_speed != SPEED_1000) &&
2137		    ((hw->mac.type == e1000_82571) ||
2138		    (hw->mac.type == e1000_82572))) {
2139			int tarc0;
2140			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2141			tarc0 &= ~SPEED_MODE_BIT;
2142			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2143		}
2144		if (bootverbose)
2145			device_printf(dev, "Link is up %d Mbps %s\n",
2146			    adapter->link_speed,
2147			    ((adapter->link_duplex == FULL_DUPLEX) ?
2148			    "Full Duplex" : "Half Duplex"));
2149		adapter->link_active = 1;
2150		adapter->smartspeed = 0;
2151		ifp->if_baudrate = adapter->link_speed * 1000000;
2152		if_link_state_change(ifp, LINK_STATE_UP);
2153	} else if (!link_check && (adapter->link_active == 1)) {
2154		ifp->if_baudrate = adapter->link_speed = 0;
2155		adapter->link_duplex = 0;
2156		if (bootverbose)
2157			device_printf(dev, "Link is Down\n");
2158		adapter->link_active = 0;
2159		/* Link down, disable watchdog */
2160		// JFV change later
2161		//adapter->watchdog_check = FALSE;
2162		if_link_state_change(ifp, LINK_STATE_DOWN);
2163	}
2164}
2165
2166/*********************************************************************
2167 *
2168 *  This routine disables all traffic on the adapter by issuing a
2169 *  global reset on the MAC and deallocates TX/RX buffers.
2170 *
2171 *  This routine should always be called with BOTH the CORE
2172 *  and TX locks.
2173 **********************************************************************/
2174
2175static void
2176em_stop(void *arg)
2177{
2178	struct adapter	*adapter = arg;
2179	struct ifnet	*ifp = adapter->ifp;
2180	struct tx_ring	*txr = adapter->tx_rings;
2181
2182	EM_CORE_LOCK_ASSERT(adapter);
2183
2184	INIT_DEBUGOUT("em_stop: begin");
2185
2186	em_disable_intr(adapter);
2187	callout_stop(&adapter->timer);
2188
2189	/* Tell the stack that the interface is no longer active */
2190	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2191
2192        /* Unarm watchdog timer. */
2193	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2194		EM_TX_LOCK(txr);
2195		txr->watchdog_check = FALSE;
2196		EM_TX_UNLOCK(txr);
2197	}
2198
2199	e1000_reset_hw(&adapter->hw);
2200	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2201
2202	e1000_led_off(&adapter->hw);
2203	e1000_cleanup_led(&adapter->hw);
2204}
2205
2206
2207/*********************************************************************
2208 *
2209 *  Determine hardware revision.
2210 *
2211 **********************************************************************/
2212static void
2213em_identify_hardware(struct adapter *adapter)
2214{
2215	device_t dev = adapter->dev;
2216
2217	/* Make sure our PCI config space has the necessary stuff set */
2218	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2219	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2220	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2221		device_printf(dev, "Memory Access and/or Bus Master bits "
2222		    "were not set!\n");
2223		adapter->hw.bus.pci_cmd_word |=
2224		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2225		pci_write_config(dev, PCIR_COMMAND,
2226		    adapter->hw.bus.pci_cmd_word, 2);
2227	}
2228
2229	/* Save off the information about this board */
2230	adapter->hw.vendor_id = pci_get_vendor(dev);
2231	adapter->hw.device_id = pci_get_device(dev);
2232	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2233	adapter->hw.subsystem_vendor_id =
2234	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2235	adapter->hw.subsystem_device_id =
2236	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2237
2238	/* Do Shared Code Init and Setup */
2239	if (e1000_set_mac_type(&adapter->hw)) {
2240		device_printf(dev, "Setup init failure\n");
2241		return;
2242	}
2243}
2244
2245static int
2246em_allocate_pci_resources(struct adapter *adapter)
2247{
2248	device_t	dev = adapter->dev;
2249	int		rid;
2250
2251	rid = PCIR_BAR(0);
2252	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2253	    &rid, RF_ACTIVE);
2254	if (adapter->memory == NULL) {
2255		device_printf(dev, "Unable to allocate bus resource: memory\n");
2256		return (ENXIO);
2257	}
2258	adapter->osdep.mem_bus_space_tag =
2259	    rman_get_bustag(adapter->memory);
2260	adapter->osdep.mem_bus_space_handle =
2261	    rman_get_bushandle(adapter->memory);
2262	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2263
2264	/* Default to a single queue */
2265	adapter->num_queues = 1;
2266
2267	/*
2268	 * Setup MSI/X or MSI if PCI Express
2269	 */
2270	adapter->msix = em_setup_msix(adapter);
2271
2272	adapter->hw.back = &adapter->osdep;
2273
2274	return (0);
2275}
2276
2277/*********************************************************************
2278 *
2279 *  Setup the Legacy or MSI Interrupt handler
2280 *
2281 **********************************************************************/
2282int
2283em_allocate_legacy(struct adapter *adapter)
2284{
2285	device_t dev = adapter->dev;
2286	int error, rid = 0;
2287
2288	/* Manually turn off all interrupts */
2289	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2290
2291	if (adapter->msix == 1) /* using MSI */
2292		rid = 1;
2293	/* We allocate a single interrupt resource */
2294	adapter->res = bus_alloc_resource_any(dev,
2295	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2296	if (adapter->res == NULL) {
2297		device_printf(dev, "Unable to allocate bus resource: "
2298		    "interrupt\n");
2299		return (ENXIO);
2300	}
2301
2302	/*
2303	 * Allocate a fast interrupt and the associated
2304	 * deferred processing contexts.
2305	 */
2306	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2307	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2308	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2309	    taskqueue_thread_enqueue, &adapter->tq);
2310	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2311	    device_get_nameunit(adapter->dev));
2312	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2313	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2314		device_printf(dev, "Failed to register fast interrupt "
2315			    "handler: %d\n", error);
2316		taskqueue_free(adapter->tq);
2317		adapter->tq = NULL;
2318		return (error);
2319	}
2320
2321	return (0);
2322}
2323
2324/*********************************************************************
2325 *
2326 *  Setup the MSIX Interrupt handlers
2327 *   This is not really Multiqueue, rather
2328 *   its just multiple interrupt vectors.
2329 *
2330 **********************************************************************/
2331int
2332em_allocate_msix(struct adapter *adapter)
2333{
2334	device_t	dev = adapter->dev;
2335	struct		tx_ring *txr = adapter->tx_rings;
2336	struct		rx_ring *rxr = adapter->rx_rings;
2337	int		error, rid, vector = 0;
2338
2339
2340	/* Make sure all interrupts are disabled */
2341	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2342
2343	/* First set up ring resources */
2344	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2345
2346		/* RX ring */
2347		rid = vector + 1;
2348
2349		rxr->res = bus_alloc_resource_any(dev,
2350		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2351		if (rxr->res == NULL) {
2352			device_printf(dev,
2353			    "Unable to allocate bus resource: "
2354			    "RX MSIX Interrupt %d\n", i);
2355			return (ENXIO);
2356		}
2357		if ((error = bus_setup_intr(dev, rxr->res,
2358		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2359		    rxr, &rxr->tag)) != 0) {
2360			device_printf(dev, "Failed to register RX handler");
2361			return (error);
2362		}
2363		rxr->msix = vector++; /* NOTE increment vector for TX */
2364		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2365		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2366		    taskqueue_thread_enqueue, &rxr->tq);
2367		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2368		    device_get_nameunit(adapter->dev));
2369		/*
2370		** Set the bit to enable interrupt
2371		** in E1000_IMS -- bits 20 and 21
2372		** are for RX0 and RX1, note this has
2373		** NOTHING to do with the MSIX vector
2374		*/
2375		rxr->ims = 1 << (20 + i);
2376		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2377
2378		/* TX ring */
2379		rid = vector + 1;
2380		txr->res = bus_alloc_resource_any(dev,
2381		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2382		if (txr->res == NULL) {
2383			device_printf(dev,
2384			    "Unable to allocate bus resource: "
2385			    "TX MSIX Interrupt %d\n", i);
2386			return (ENXIO);
2387		}
2388		if ((error = bus_setup_intr(dev, txr->res,
2389		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2390		    txr, &txr->tag)) != 0) {
2391			device_printf(dev, "Failed to register TX handler");
2392			return (error);
2393		}
2394		txr->msix = vector++; /* Increment vector for next pass */
2395		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2396		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2397		    taskqueue_thread_enqueue, &txr->tq);
2398		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2399		    device_get_nameunit(adapter->dev));
2400		/*
2401		** Set the bit to enable interrupt
2402		** in E1000_IMS -- bits 22 and 23
2403		** are for TX0 and TX1, note this has
2404		** NOTHING to do with the MSIX vector
2405		*/
2406		txr->ims = 1 << (22 + i);
2407		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2408	}
2409
2410	/* Link interrupt */
2411	++rid;
2412	adapter->res = bus_alloc_resource_any(dev,
2413	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2414	if (!adapter->res) {
2415		device_printf(dev,"Unable to allocate "
2416		    "bus resource: Link interrupt [%d]\n", rid);
2417		return (ENXIO);
2418        }
2419	/* Set the link handler function */
2420	error = bus_setup_intr(dev, adapter->res,
2421	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2422	    em_msix_link, adapter, &adapter->tag);
2423	if (error) {
2424		adapter->res = NULL;
2425		device_printf(dev, "Failed to register LINK handler");
2426		return (error);
2427	}
2428	adapter->linkvec = vector;
2429	adapter->ivars |=  (8 | vector) << 16;
2430	adapter->ivars |= 0x80000000;
2431	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2432	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2433	    taskqueue_thread_enqueue, &adapter->tq);
2434	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2435	    device_get_nameunit(adapter->dev));
2436
2437	return (0);
2438}
2439
2440
2441static void
2442em_free_pci_resources(struct adapter *adapter)
2443{
2444	device_t	dev = adapter->dev;
2445	struct tx_ring	*txr;
2446	struct rx_ring	*rxr;
2447	int		rid;
2448
2449
2450	/*
2451	** Release all the queue interrupt resources:
2452	*/
2453	for (int i = 0; i < adapter->num_queues; i++) {
2454		txr = &adapter->tx_rings[i];
2455		rxr = &adapter->rx_rings[i];
2456		rid = txr->msix +1;
2457		if (txr->tag != NULL) {
2458			bus_teardown_intr(dev, txr->res, txr->tag);
2459			txr->tag = NULL;
2460		}
2461		if (txr->res != NULL)
2462			bus_release_resource(dev, SYS_RES_IRQ,
2463			    rid, txr->res);
2464		rid = rxr->msix +1;
2465		if (rxr->tag != NULL) {
2466			bus_teardown_intr(dev, rxr->res, rxr->tag);
2467			rxr->tag = NULL;
2468		}
2469		if (rxr->res != NULL)
2470			bus_release_resource(dev, SYS_RES_IRQ,
2471			    rid, rxr->res);
2472	}
2473
2474        if (adapter->linkvec) /* we are doing MSIX */
2475                rid = adapter->linkvec + 1;
2476        else
2477                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2478
2479	if (adapter->tag != NULL) {
2480		bus_teardown_intr(dev, adapter->res, adapter->tag);
2481		adapter->tag = NULL;
2482	}
2483
2484	if (adapter->res != NULL)
2485		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2486
2487
2488	if (adapter->msix)
2489		pci_release_msi(dev);
2490
2491	if (adapter->msix_mem != NULL)
2492		bus_release_resource(dev, SYS_RES_MEMORY,
2493		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2494
2495	if (adapter->memory != NULL)
2496		bus_release_resource(dev, SYS_RES_MEMORY,
2497		    PCIR_BAR(0), adapter->memory);
2498
2499	if (adapter->flash != NULL)
2500		bus_release_resource(dev, SYS_RES_MEMORY,
2501		    EM_FLASH, adapter->flash);
2502}
2503
2504/*
2505 * Setup MSI or MSI/X
2506 */
2507static int
2508em_setup_msix(struct adapter *adapter)
2509{
2510	device_t dev = adapter->dev;
2511	int val = 0;
2512
2513
2514	/* Setup MSI/X for Hartwell */
2515	if ((adapter->hw.mac.type == e1000_82574) &&
2516	    (em_enable_msix == TRUE)) {
2517		/* Map the MSIX BAR */
2518		int rid = PCIR_BAR(EM_MSIX_BAR);
2519		adapter->msix_mem = bus_alloc_resource_any(dev,
2520		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2521       		if (!adapter->msix_mem) {
2522			/* May not be enabled */
2523               		device_printf(adapter->dev,
2524			    "Unable to map MSIX table \n");
2525			goto msi;
2526       		}
2527		val = pci_msix_count(dev);
2528		if (val != 5) {
2529			bus_release_resource(dev, SYS_RES_MEMORY,
2530			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2531			adapter->msix_mem = NULL;
2532               		device_printf(adapter->dev,
2533			    "MSIX vectors wrong, using MSI \n");
2534			goto msi;
2535		}
2536		if (em_msix_queues == 2) {
2537			val = 5;
2538			adapter->num_queues = 2;
2539		} else {
2540			val = 3;
2541			adapter->num_queues = 1;
2542		}
2543		if (pci_alloc_msix(dev, &val) == 0) {
2544			device_printf(adapter->dev,
2545			    "Using MSIX interrupts "
2546			    "with %d vectors\n", val);
2547		}
2548
2549		return (val);
2550	}
2551msi:
2552       	val = pci_msi_count(dev);
2553       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2554               	adapter->msix = 1;
2555               	device_printf(adapter->dev,"Using MSI interrupt\n");
2556		return (val);
2557	}
2558	/* Should only happen due to manual invention */
2559	device_printf(adapter->dev,"Setup MSIX failure\n");
2560	return (0);
2561}
2562
2563
2564/*********************************************************************
2565 *
2566 *  Initialize the hardware to a configuration
2567 *  as specified by the adapter structure.
2568 *
2569 **********************************************************************/
2570static void
2571em_reset(struct adapter *adapter)
2572{
2573	device_t	dev = adapter->dev;
2574	struct e1000_hw	*hw = &adapter->hw;
2575	u16		rx_buffer_size;
2576
2577	INIT_DEBUGOUT("em_reset: begin");
2578
2579	/* Set up smart power down as default off on newer adapters. */
2580	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2581	    hw->mac.type == e1000_82572)) {
2582		u16 phy_tmp = 0;
2583
2584		/* Speed up time to link by disabling smart power down. */
2585		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2586		phy_tmp &= ~IGP02E1000_PM_SPD;
2587		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2588	}
2589
2590	/*
2591	 * These parameters control the automatic generation (Tx) and
2592	 * response (Rx) to Ethernet PAUSE frames.
2593	 * - High water mark should allow for at least two frames to be
2594	 *   received after sending an XOFF.
2595	 * - Low water mark works best when it is very near the high water mark.
2596	 *   This allows the receiver to restart by sending XON when it has
2597	 *   drained a bit. Here we use an arbitary value of 1500 which will
2598	 *   restart after one full frame is pulled from the buffer. There
2599	 *   could be several smaller frames in the buffer and if so they will
2600	 *   not trigger the XON until their total number reduces the buffer
2601	 *   by 1500.
2602	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2603	 */
2604	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2605
2606	hw->fc.high_water = rx_buffer_size -
2607	    roundup2(adapter->max_frame_size, 1024);
2608	hw->fc.low_water = hw->fc.high_water - 1500;
2609
2610	if (hw->mac.type == e1000_80003es2lan)
2611		hw->fc.pause_time = 0xFFFF;
2612	else
2613		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2614
2615	hw->fc.send_xon = TRUE;
2616
2617        /* Set Flow control, use the tunable location if sane */
2618        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2619		hw->fc.requested_mode = em_fc_setting;
2620	else
2621		hw->fc.requested_mode = e1000_fc_none;
2622
2623	/* Override - workaround for PCHLAN issue */
2624	if (hw->mac.type == e1000_pchlan)
2625                hw->fc.requested_mode = e1000_fc_rx_pause;
2626
2627	/* Issue a global reset */
2628	e1000_reset_hw(hw);
2629	E1000_WRITE_REG(hw, E1000_WUC, 0);
2630
2631	if (e1000_init_hw(hw) < 0) {
2632		device_printf(dev, "Hardware Initialization Failed\n");
2633		return;
2634	}
2635
2636	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2637	e1000_get_phy_info(hw);
2638	e1000_check_for_link(hw);
2639	return;
2640}
2641
2642/*********************************************************************
2643 *
2644 *  Setup networking device structure and register an interface.
2645 *
2646 **********************************************************************/
2647static void
2648em_setup_interface(device_t dev, struct adapter *adapter)
2649{
2650	struct ifnet   *ifp;
2651
2652	INIT_DEBUGOUT("em_setup_interface: begin");
2653
2654	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2655	if (ifp == NULL)
2656		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2657	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2658	ifp->if_mtu = ETHERMTU;
2659	ifp->if_init =  em_init;
2660	ifp->if_softc = adapter;
2661	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2662	ifp->if_ioctl = em_ioctl;
2663	ifp->if_start = em_start;
2664	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2665	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2666	IFQ_SET_READY(&ifp->if_snd);
2667
2668	ether_ifattach(ifp, adapter->hw.mac.addr);
2669
2670	ifp->if_capabilities = ifp->if_capenable = 0;
2671
2672#ifdef EM_MULTIQUEUE
2673	/* Multiqueue tx functions */
2674	ifp->if_transmit = em_mq_start;
2675	ifp->if_qflush = em_qflush;
2676#endif
2677
2678	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2679	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2680
2681	/* Enable TSO by default, can disable with ifconfig */
2682	ifp->if_capabilities |= IFCAP_TSO4;
2683	ifp->if_capenable |= IFCAP_TSO4;
2684
2685	/*
2686	 * Tell the upper layer(s) we
2687	 * support full VLAN capability
2688	 */
2689	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2690	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2691	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2692
2693	/*
2694	** Dont turn this on by default, if vlans are
2695	** created on another pseudo device (eg. lagg)
2696	** then vlan events are not passed thru, breaking
2697	** operation, but with HW FILTER off it works. If
2698	** using vlans directly on the em driver you can
2699	** enable this and get full hardware tag filtering.
2700	*/
2701	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2702
2703#ifdef DEVICE_POLLING
2704	ifp->if_capabilities |= IFCAP_POLLING;
2705#endif
2706
2707	/* Enable only WOL MAGIC by default */
2708	if (adapter->wol) {
2709		ifp->if_capabilities |= IFCAP_WOL;
2710		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2711	}
2712
2713	/*
2714	 * Specify the media types supported by this adapter and register
2715	 * callbacks to update media and link information
2716	 */
2717	ifmedia_init(&adapter->media, IFM_IMASK,
2718	    em_media_change, em_media_status);
2719	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2720	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2721		u_char fiber_type = IFM_1000_SX;	/* default type */
2722
2723		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2724			    0, NULL);
2725		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2726	} else {
2727		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2728		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2729			    0, NULL);
2730		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2731			    0, NULL);
2732		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2733			    0, NULL);
2734		if (adapter->hw.phy.type != e1000_phy_ife) {
2735			ifmedia_add(&adapter->media,
2736				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2737			ifmedia_add(&adapter->media,
2738				IFM_ETHER | IFM_1000_T, 0, NULL);
2739		}
2740	}
2741	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2742	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2743}
2744
2745
2746/*
2747 * Manage DMA'able memory.
2748 */
2749static void
2750em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2751{
2752	if (error)
2753		return;
2754	*(bus_addr_t *) arg = segs[0].ds_addr;
2755}
2756
2757static int
2758em_dma_malloc(struct adapter *adapter, bus_size_t size,
2759        struct em_dma_alloc *dma, int mapflags)
2760{
2761	int error;
2762
2763	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2764				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2765				BUS_SPACE_MAXADDR,	/* lowaddr */
2766				BUS_SPACE_MAXADDR,	/* highaddr */
2767				NULL, NULL,		/* filter, filterarg */
2768				size,			/* maxsize */
2769				1,			/* nsegments */
2770				size,			/* maxsegsize */
2771				0,			/* flags */
2772				NULL,			/* lockfunc */
2773				NULL,			/* lockarg */
2774				&dma->dma_tag);
2775	if (error) {
2776		device_printf(adapter->dev,
2777		    "%s: bus_dma_tag_create failed: %d\n",
2778		    __func__, error);
2779		goto fail_0;
2780	}
2781
2782	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2783	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2784	if (error) {
2785		device_printf(adapter->dev,
2786		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2787		    __func__, (uintmax_t)size, error);
2788		goto fail_2;
2789	}
2790
2791	dma->dma_paddr = 0;
2792	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2793	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2794	if (error || dma->dma_paddr == 0) {
2795		device_printf(adapter->dev,
2796		    "%s: bus_dmamap_load failed: %d\n",
2797		    __func__, error);
2798		goto fail_3;
2799	}
2800
2801	return (0);
2802
2803fail_3:
2804	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2805fail_2:
2806	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2807	bus_dma_tag_destroy(dma->dma_tag);
2808fail_0:
2809	dma->dma_map = NULL;
2810	dma->dma_tag = NULL;
2811
2812	return (error);
2813}
2814
2815static void
2816em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2817{
2818	if (dma->dma_tag == NULL)
2819		return;
2820	if (dma->dma_map != NULL) {
2821		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2822		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2823		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2824		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2825		dma->dma_map = NULL;
2826	}
2827	bus_dma_tag_destroy(dma->dma_tag);
2828	dma->dma_tag = NULL;
2829}
2830
2831
2832/*********************************************************************
2833 *
2834 *  Allocate memory for the transmit and receive rings, and then
2835 *  the descriptors associated with each, called only once at attach.
2836 *
2837 **********************************************************************/
2838static int
2839em_allocate_queues(struct adapter *adapter)
2840{
2841	device_t		dev = adapter->dev;
2842	struct tx_ring		*txr = NULL;
2843	struct rx_ring		*rxr = NULL;
2844	int rsize, tsize, error = E1000_SUCCESS;
2845	int txconf = 0, rxconf = 0;
2846
2847
2848	/* Allocate the TX ring struct memory */
2849	if (!(adapter->tx_rings =
2850	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2851	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2852		device_printf(dev, "Unable to allocate TX ring memory\n");
2853		error = ENOMEM;
2854		goto fail;
2855	}
2856
2857	/* Now allocate the RX */
2858	if (!(adapter->rx_rings =
2859	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2860	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2861		device_printf(dev, "Unable to allocate RX ring memory\n");
2862		error = ENOMEM;
2863		goto rx_fail;
2864	}
2865
2866	tsize = roundup2(adapter->num_tx_desc *
2867	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2868	/*
2869	 * Now set up the TX queues, txconf is needed to handle the
2870	 * possibility that things fail midcourse and we need to
2871	 * undo memory gracefully
2872	 */
2873	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2874		/* Set up some basics */
2875		txr = &adapter->tx_rings[i];
2876		txr->adapter = adapter;
2877		txr->me = i;
2878
2879		/* Initialize the TX lock */
2880		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2881		    device_get_nameunit(dev), txr->me);
2882		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2883
2884		if (em_dma_malloc(adapter, tsize,
2885			&txr->txdma, BUS_DMA_NOWAIT)) {
2886			device_printf(dev,
2887			    "Unable to allocate TX Descriptor memory\n");
2888			error = ENOMEM;
2889			goto err_tx_desc;
2890		}
2891		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2892		bzero((void *)txr->tx_base, tsize);
2893
2894        	if (em_allocate_transmit_buffers(txr)) {
2895			device_printf(dev,
2896			    "Critical Failure setting up transmit buffers\n");
2897			error = ENOMEM;
2898			goto err_tx_desc;
2899        	}
2900#if __FreeBSD_version >= 800000
2901		/* Allocate a buf ring */
2902		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2903		    M_WAITOK, &txr->tx_mtx);
2904#endif
2905	}
2906
2907	/*
2908	 * Next the RX queues...
2909	 */
2910	rsize = roundup2(adapter->num_rx_desc *
2911	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2912	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2913		rxr = &adapter->rx_rings[i];
2914		rxr->adapter = adapter;
2915		rxr->me = i;
2916
2917		/* Initialize the RX lock */
2918		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2919		    device_get_nameunit(dev), txr->me);
2920		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2921
2922		if (em_dma_malloc(adapter, rsize,
2923			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2924			device_printf(dev,
2925			    "Unable to allocate RxDescriptor memory\n");
2926			error = ENOMEM;
2927			goto err_rx_desc;
2928		}
2929		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2930		bzero((void *)rxr->rx_base, rsize);
2931
2932        	/* Allocate receive buffers for the ring*/
2933		if (em_allocate_receive_buffers(rxr)) {
2934			device_printf(dev,
2935			    "Critical Failure setting up receive buffers\n");
2936			error = ENOMEM;
2937			goto err_rx_desc;
2938		}
2939	}
2940
2941	return (0);
2942
2943err_rx_desc:
2944	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2945		em_dma_free(adapter, &rxr->rxdma);
2946err_tx_desc:
2947	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2948		em_dma_free(adapter, &txr->txdma);
2949	free(adapter->rx_rings, M_DEVBUF);
2950rx_fail:
2951#if __FreeBSD_version >= 800000
2952	buf_ring_free(txr->br, M_DEVBUF);
2953#endif
2954	free(adapter->tx_rings, M_DEVBUF);
2955fail:
2956	return (error);
2957}
2958
2959
2960/*********************************************************************
2961 *
2962 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2963 *  the information needed to transmit a packet on the wire. This is
2964 *  called only once at attach, setup is done every reset.
2965 *
2966 **********************************************************************/
2967static int
2968em_allocate_transmit_buffers(struct tx_ring *txr)
2969{
2970	struct adapter *adapter = txr->adapter;
2971	device_t dev = adapter->dev;
2972	struct em_buffer *txbuf;
2973	int error, i;
2974
2975	/*
2976	 * Setup DMA descriptor areas.
2977	 */
2978	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2979			       1, 0,			/* alignment, bounds */
2980			       BUS_SPACE_MAXADDR,	/* lowaddr */
2981			       BUS_SPACE_MAXADDR,	/* highaddr */
2982			       NULL, NULL,		/* filter, filterarg */
2983			       EM_TSO_SIZE,		/* maxsize */
2984			       EM_MAX_SCATTER,		/* nsegments */
2985			       PAGE_SIZE,		/* maxsegsize */
2986			       0,			/* flags */
2987			       NULL,			/* lockfunc */
2988			       NULL,			/* lockfuncarg */
2989			       &txr->txtag))) {
2990		device_printf(dev,"Unable to allocate TX DMA tag\n");
2991		goto fail;
2992	}
2993
2994	if (!(txr->tx_buffers =
2995	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2996	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2997		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2998		error = ENOMEM;
2999		goto fail;
3000	}
3001
3002        /* Create the descriptor buffer dma maps */
3003	txbuf = txr->tx_buffers;
3004	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3005		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3006		if (error != 0) {
3007			device_printf(dev, "Unable to create TX DMA map\n");
3008			goto fail;
3009		}
3010	}
3011
3012	return 0;
3013fail:
3014	/* We free all, it handles case where we are in the middle */
3015	em_free_transmit_structures(adapter);
3016	return (error);
3017}
3018
3019/*********************************************************************
3020 *
3021 *  Initialize a transmit ring.
3022 *
3023 **********************************************************************/
3024static void
3025em_setup_transmit_ring(struct tx_ring *txr)
3026{
3027	struct adapter *adapter = txr->adapter;
3028	struct em_buffer *txbuf;
3029	int i;
3030
3031	/* Clear the old descriptor contents */
3032	EM_TX_LOCK(txr);
3033	bzero((void *)txr->tx_base,
3034	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3035	/* Reset indices */
3036	txr->next_avail_desc = 0;
3037	txr->next_to_clean = 0;
3038
3039	/* Free any existing tx buffers. */
3040        txbuf = txr->tx_buffers;
3041	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3042		if (txbuf->m_head != NULL) {
3043			bus_dmamap_sync(txr->txtag, txbuf->map,
3044			    BUS_DMASYNC_POSTWRITE);
3045			bus_dmamap_unload(txr->txtag, txbuf->map);
3046			m_freem(txbuf->m_head);
3047			txbuf->m_head = NULL;
3048		}
3049		/* clear the watch index */
3050		txbuf->next_eop = -1;
3051        }
3052
3053	/* Set number of descriptors available */
3054	txr->tx_avail = adapter->num_tx_desc;
3055
3056	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3057	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3058	EM_TX_UNLOCK(txr);
3059}
3060
3061/*********************************************************************
3062 *
3063 *  Initialize all transmit rings.
3064 *
3065 **********************************************************************/
3066static void
3067em_setup_transmit_structures(struct adapter *adapter)
3068{
3069	struct tx_ring *txr = adapter->tx_rings;
3070
3071	for (int i = 0; i < adapter->num_queues; i++, txr++)
3072		em_setup_transmit_ring(txr);
3073
3074	return;
3075}
3076
3077/*********************************************************************
3078 *
3079 *  Enable transmit unit.
3080 *
3081 **********************************************************************/
3082static void
3083em_initialize_transmit_unit(struct adapter *adapter)
3084{
3085	struct tx_ring	*txr = adapter->tx_rings;
3086	struct e1000_hw	*hw = &adapter->hw;
3087	u32	tctl, tarc, tipg = 0;
3088
3089	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3090
3091	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3092		u64 bus_addr = txr->txdma.dma_paddr;
3093		/* Base and Len of TX Ring */
3094		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3095	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3096		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3097	    	    (u32)(bus_addr >> 32));
3098		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3099	    	    (u32)bus_addr);
3100		/* Init the HEAD/TAIL indices */
3101		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3102		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3103
3104		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3105		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3106		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3107
3108		txr->watchdog_check = FALSE;
3109	}
3110
3111	/* Set the default values for the Tx Inter Packet Gap timer */
3112	switch (adapter->hw.mac.type) {
3113	case e1000_82542:
3114		tipg = DEFAULT_82542_TIPG_IPGT;
3115		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3116		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3117		break;
3118	case e1000_80003es2lan:
3119		tipg = DEFAULT_82543_TIPG_IPGR1;
3120		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3121		    E1000_TIPG_IPGR2_SHIFT;
3122		break;
3123	default:
3124		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3125		    (adapter->hw.phy.media_type ==
3126		    e1000_media_type_internal_serdes))
3127			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3128		else
3129			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3130		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3131		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3132	}
3133
3134	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3135	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3136
3137	if(adapter->hw.mac.type >= e1000_82540)
3138		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3139		    adapter->tx_abs_int_delay.value);
3140
3141	if ((adapter->hw.mac.type == e1000_82571) ||
3142	    (adapter->hw.mac.type == e1000_82572)) {
3143		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3144		tarc |= SPEED_MODE_BIT;
3145		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3146	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3147		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3148		tarc |= 1;
3149		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3150		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3151		tarc |= 1;
3152		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3153	}
3154
3155	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3156	if (adapter->tx_int_delay.value > 0)
3157		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3158
3159	/* Program the Transmit Control Register */
3160	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3161	tctl &= ~E1000_TCTL_CT;
3162	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3163		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3164
3165	if (adapter->hw.mac.type >= e1000_82571)
3166		tctl |= E1000_TCTL_MULR;
3167
3168	/* This write will effectively turn on the transmit unit. */
3169	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3170
3171}
3172
3173
3174/*********************************************************************
3175 *
3176 *  Free all transmit rings.
3177 *
3178 **********************************************************************/
3179static void
3180em_free_transmit_structures(struct adapter *adapter)
3181{
3182	struct tx_ring *txr = adapter->tx_rings;
3183
3184	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3185		EM_TX_LOCK(txr);
3186		em_free_transmit_buffers(txr);
3187		em_dma_free(adapter, &txr->txdma);
3188		EM_TX_UNLOCK(txr);
3189		EM_TX_LOCK_DESTROY(txr);
3190	}
3191
3192	free(adapter->tx_rings, M_DEVBUF);
3193}
3194
3195/*********************************************************************
3196 *
3197 *  Free transmit ring related data structures.
3198 *
3199 **********************************************************************/
3200static void
3201em_free_transmit_buffers(struct tx_ring *txr)
3202{
3203	struct adapter		*adapter = txr->adapter;
3204	struct em_buffer	*txbuf;
3205
3206	INIT_DEBUGOUT("free_transmit_ring: begin");
3207
3208	if (txr->tx_buffers == NULL)
3209		return;
3210
3211	for (int i = 0; i < adapter->num_tx_desc; i++) {
3212		txbuf = &txr->tx_buffers[i];
3213		if (txbuf->m_head != NULL) {
3214			bus_dmamap_sync(txr->txtag, txbuf->map,
3215			    BUS_DMASYNC_POSTWRITE);
3216			bus_dmamap_unload(txr->txtag,
3217			    txbuf->map);
3218			m_freem(txbuf->m_head);
3219			txbuf->m_head = NULL;
3220			if (txbuf->map != NULL) {
3221				bus_dmamap_destroy(txr->txtag,
3222				    txbuf->map);
3223				txbuf->map = NULL;
3224			}
3225		} else if (txbuf->map != NULL) {
3226			bus_dmamap_unload(txr->txtag,
3227			    txbuf->map);
3228			bus_dmamap_destroy(txr->txtag,
3229			    txbuf->map);
3230			txbuf->map = NULL;
3231		}
3232	}
3233#if __FreeBSD_version >= 800000
3234	if (txr->br != NULL)
3235		buf_ring_free(txr->br, M_DEVBUF);
3236#endif
3237	if (txr->tx_buffers != NULL) {
3238		free(txr->tx_buffers, M_DEVBUF);
3239		txr->tx_buffers = NULL;
3240	}
3241	if (txr->txtag != NULL) {
3242		bus_dma_tag_destroy(txr->txtag);
3243		txr->txtag = NULL;
3244	}
3245	return;
3246}
3247
3248
3249/*********************************************************************
3250 *
3251 *  The offload context needs to be set when we transfer the first
3252 *  packet of a particular protocol (TCP/UDP). This routine has been
3253 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3254 *
3255 *  Added back the old method of keeping the current context type
3256 *  and not setting if unnecessary, as this is reported to be a
3257 *  big performance win.  -jfv
3258 **********************************************************************/
3259static void
3260em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3261    u32 *txd_upper, u32 *txd_lower)
3262{
3263	struct adapter			*adapter = txr->adapter;
3264	struct e1000_context_desc	*TXD = NULL;
3265	struct em_buffer *tx_buffer;
3266	struct ether_vlan_header *eh;
3267	struct ip *ip = NULL;
3268	struct ip6_hdr *ip6;
3269	int cur, ehdrlen;
3270	u32 cmd, hdr_len, ip_hlen;
3271	u16 etype;
3272	u8 ipproto;
3273
3274
3275	cmd = hdr_len = ipproto = 0;
3276	cur = txr->next_avail_desc;
3277
3278	/*
3279	 * Determine where frame payload starts.
3280	 * Jump over vlan headers if already present,
3281	 * helpful for QinQ too.
3282	 */
3283	eh = mtod(mp, struct ether_vlan_header *);
3284	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3285		etype = ntohs(eh->evl_proto);
3286		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3287	} else {
3288		etype = ntohs(eh->evl_encap_proto);
3289		ehdrlen = ETHER_HDR_LEN;
3290	}
3291
3292	/*
3293	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3294	 * TODO: Support SCTP too when it hits the tree.
3295	 */
3296	switch (etype) {
3297	case ETHERTYPE_IP:
3298		ip = (struct ip *)(mp->m_data + ehdrlen);
3299		ip_hlen = ip->ip_hl << 2;
3300
3301		/* Setup of IP header checksum. */
3302		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3303			/*
3304			 * Start offset for header checksum calculation.
3305			 * End offset for header checksum calculation.
3306			 * Offset of place to put the checksum.
3307			 */
3308			TXD = (struct e1000_context_desc *)
3309			    &txr->tx_base[cur];
3310			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3311			TXD->lower_setup.ip_fields.ipcse =
3312			    htole16(ehdrlen + ip_hlen);
3313			TXD->lower_setup.ip_fields.ipcso =
3314			    ehdrlen + offsetof(struct ip, ip_sum);
3315			cmd |= E1000_TXD_CMD_IP;
3316			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3317		}
3318
3319		if (mp->m_len < ehdrlen + ip_hlen)
3320			return;	/* failure */
3321
3322		hdr_len = ehdrlen + ip_hlen;
3323		ipproto = ip->ip_p;
3324
3325		break;
3326	case ETHERTYPE_IPV6:
3327		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3328		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3329
3330		if (mp->m_len < ehdrlen + ip_hlen)
3331			return;	/* failure */
3332
3333		/* IPv6 doesn't have a header checksum. */
3334
3335		hdr_len = ehdrlen + ip_hlen;
3336		ipproto = ip6->ip6_nxt;
3337
3338		break;
3339	default:
3340		*txd_upper = 0;
3341		*txd_lower = 0;
3342		return;
3343	}
3344
3345	switch (ipproto) {
3346	case IPPROTO_TCP:
3347		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3348			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3349			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3350			/* no need for context if already set */
3351			if (txr->last_hw_offload == CSUM_TCP)
3352				return;
3353			txr->last_hw_offload = CSUM_TCP;
3354			/*
3355			 * Start offset for payload checksum calculation.
3356			 * End offset for payload checksum calculation.
3357			 * Offset of place to put the checksum.
3358			 */
3359			TXD = (struct e1000_context_desc *)
3360			    &txr->tx_base[cur];
3361			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3362			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3363			TXD->upper_setup.tcp_fields.tucso =
3364			    hdr_len + offsetof(struct tcphdr, th_sum);
3365			cmd |= E1000_TXD_CMD_TCP;
3366		}
3367		break;
3368	case IPPROTO_UDP:
3369	{
3370		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3371			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3372			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3373			/* no need for context if already set */
3374			if (txr->last_hw_offload == CSUM_UDP)
3375				return;
3376			txr->last_hw_offload = CSUM_UDP;
3377			/*
3378			 * Start offset for header checksum calculation.
3379			 * End offset for header checksum calculation.
3380			 * Offset of place to put the checksum.
3381			 */
3382			TXD = (struct e1000_context_desc *)
3383			    &txr->tx_base[cur];
3384			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3385			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3386			TXD->upper_setup.tcp_fields.tucso =
3387			    hdr_len + offsetof(struct udphdr, uh_sum);
3388		}
3389		/* Fall Thru */
3390	}
3391	default:
3392		break;
3393	}
3394
3395	TXD->tcp_seg_setup.data = htole32(0);
3396	TXD->cmd_and_length =
3397	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3398	tx_buffer = &txr->tx_buffers[cur];
3399	tx_buffer->m_head = NULL;
3400	tx_buffer->next_eop = -1;
3401
3402	if (++cur == adapter->num_tx_desc)
3403		cur = 0;
3404
3405	txr->tx_avail--;
3406	txr->next_avail_desc = cur;
3407}
3408
3409
3410/**********************************************************************
3411 *
3412 *  Setup work for hardware segmentation offload (TSO)
3413 *
3414 **********************************************************************/
3415static bool
3416em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3417   u32 *txd_lower)
3418{
3419	struct adapter			*adapter = txr->adapter;
3420	struct e1000_context_desc	*TXD;
3421	struct em_buffer		*tx_buffer;
3422	struct ether_vlan_header	*eh;
3423	struct ip			*ip;
3424	struct ip6_hdr			*ip6;
3425	struct tcphdr			*th;
3426	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3427	u16 etype;
3428
3429	/*
3430	 * This function could/should be extended to support IP/IPv6
3431	 * fragmentation as well.  But as they say, one step at a time.
3432	 */
3433
3434	/*
3435	 * Determine where frame payload starts.
3436	 * Jump over vlan headers if already present,
3437	 * helpful for QinQ too.
3438	 */
3439	eh = mtod(mp, struct ether_vlan_header *);
3440	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3441		etype = ntohs(eh->evl_proto);
3442		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3443	} else {
3444		etype = ntohs(eh->evl_encap_proto);
3445		ehdrlen = ETHER_HDR_LEN;
3446	}
3447
3448	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3449	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3450		return FALSE;	/* -1 */
3451
3452	/*
3453	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3454	 * TODO: Support SCTP too when it hits the tree.
3455	 */
3456	switch (etype) {
3457	case ETHERTYPE_IP:
3458		isip6 = 0;
3459		ip = (struct ip *)(mp->m_data + ehdrlen);
3460		if (ip->ip_p != IPPROTO_TCP)
3461			return FALSE;	/* 0 */
3462		ip->ip_len = 0;
3463		ip->ip_sum = 0;
3464		ip_hlen = ip->ip_hl << 2;
3465		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3466			return FALSE;	/* -1 */
3467		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3468#if 1
3469		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3470		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3471#else
3472		th->th_sum = mp->m_pkthdr.csum_data;
3473#endif
3474		break;
3475	case ETHERTYPE_IPV6:
3476		isip6 = 1;
3477		return FALSE;			/* Not supported yet. */
3478		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3479		if (ip6->ip6_nxt != IPPROTO_TCP)
3480			return FALSE;	/* 0 */
3481		ip6->ip6_plen = 0;
3482		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3483		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3484			return FALSE;	/* -1 */
3485		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3486#if 0
3487		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3488		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3489#else
3490		th->th_sum = mp->m_pkthdr.csum_data;
3491#endif
3492		break;
3493	default:
3494		return FALSE;
3495	}
3496	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3497
3498	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3499		      E1000_TXD_DTYP_D |	/* Data descr type */
3500		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3501
3502	/* IP and/or TCP header checksum calculation and insertion. */
3503	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3504		      E1000_TXD_POPTS_TXSM) << 8;
3505
3506	cur = txr->next_avail_desc;
3507	tx_buffer = &txr->tx_buffers[cur];
3508	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3509
3510	/* IPv6 doesn't have a header checksum. */
3511	if (!isip6) {
3512		/*
3513		 * Start offset for header checksum calculation.
3514		 * End offset for header checksum calculation.
3515		 * Offset of place put the checksum.
3516		 */
3517		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3518		TXD->lower_setup.ip_fields.ipcse =
3519		    htole16(ehdrlen + ip_hlen - 1);
3520		TXD->lower_setup.ip_fields.ipcso =
3521		    ehdrlen + offsetof(struct ip, ip_sum);
3522	}
3523	/*
3524	 * Start offset for payload checksum calculation.
3525	 * End offset for payload checksum calculation.
3526	 * Offset of place to put the checksum.
3527	 */
3528	TXD->upper_setup.tcp_fields.tucss =
3529	    ehdrlen + ip_hlen;
3530	TXD->upper_setup.tcp_fields.tucse = 0;
3531	TXD->upper_setup.tcp_fields.tucso =
3532	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3533	/*
3534	 * Payload size per packet w/o any headers.
3535	 * Length of all headers up to payload.
3536	 */
3537	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3538	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3539
3540	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3541				E1000_TXD_CMD_DEXT |	/* Extended descr */
3542				E1000_TXD_CMD_TSE |	/* TSE context */
3543				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3544				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3545				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3546
3547	tx_buffer->m_head = NULL;
3548	tx_buffer->next_eop = -1;
3549
3550	if (++cur == adapter->num_tx_desc)
3551		cur = 0;
3552
3553	txr->tx_avail--;
3554	txr->next_avail_desc = cur;
3555	txr->tx_tso = TRUE;
3556
3557	return TRUE;
3558}
3559
3560
3561/**********************************************************************
3562 *
3563 *  Examine each tx_buffer in the used queue. If the hardware is done
3564 *  processing the packet then free associated resources. The
3565 *  tx_buffer is put back on the free queue.
3566 *
3567 **********************************************************************/
3568static bool
3569em_txeof(struct tx_ring *txr)
3570{
3571	struct adapter	*adapter = txr->adapter;
3572        int first, last, done, num_avail;
3573        struct em_buffer *tx_buffer;
3574        struct e1000_tx_desc   *tx_desc, *eop_desc;
3575	struct ifnet   *ifp = adapter->ifp;
3576
3577	EM_TX_LOCK_ASSERT(txr);
3578
3579        if (txr->tx_avail == adapter->num_tx_desc)
3580                return (FALSE);
3581
3582        num_avail = txr->tx_avail;
3583        first = txr->next_to_clean;
3584        tx_desc = &txr->tx_base[first];
3585        tx_buffer = &txr->tx_buffers[first];
3586	last = tx_buffer->next_eop;
3587        eop_desc = &txr->tx_base[last];
3588
3589	/*
3590	 * What this does is get the index of the
3591	 * first descriptor AFTER the EOP of the
3592	 * first packet, that way we can do the
3593	 * simple comparison on the inner while loop.
3594	 */
3595	if (++last == adapter->num_tx_desc)
3596 		last = 0;
3597	done = last;
3598
3599        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3600            BUS_DMASYNC_POSTREAD);
3601
3602        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3603		/* We clean the range of the packet */
3604		while (first != done) {
3605                	tx_desc->upper.data = 0;
3606                	tx_desc->lower.data = 0;
3607                	tx_desc->buffer_addr = 0;
3608                	++num_avail;
3609
3610			if (tx_buffer->m_head) {
3611				ifp->if_opackets++;
3612				bus_dmamap_sync(txr->txtag,
3613				    tx_buffer->map,
3614				    BUS_DMASYNC_POSTWRITE);
3615				bus_dmamap_unload(txr->txtag,
3616				    tx_buffer->map);
3617
3618                        	m_freem(tx_buffer->m_head);
3619                        	tx_buffer->m_head = NULL;
3620                	}
3621			tx_buffer->next_eop = -1;
3622			txr->watchdog_time = ticks;
3623
3624	                if (++first == adapter->num_tx_desc)
3625				first = 0;
3626
3627	                tx_buffer = &txr->tx_buffers[first];
3628			tx_desc = &txr->tx_base[first];
3629		}
3630		/* See if we can continue to the next packet */
3631		last = tx_buffer->next_eop;
3632		if (last != -1) {
3633        		eop_desc = &txr->tx_base[last];
3634			/* Get new done point */
3635			if (++last == adapter->num_tx_desc) last = 0;
3636			done = last;
3637		} else
3638			break;
3639        }
3640        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3641            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3642
3643        txr->next_to_clean = first;
3644
3645        /*
3646         * If we have enough room, clear IFF_DRV_OACTIVE to
3647         * tell the stack that it is OK to send packets.
3648         * If there are no pending descriptors, clear the watchdog.
3649         */
3650        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3651                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3652                if (num_avail == adapter->num_tx_desc) {
3653			txr->watchdog_check = FALSE;
3654        		txr->tx_avail = num_avail;
3655			return (FALSE);
3656		}
3657        }
3658
3659        txr->tx_avail = num_avail;
3660	return (TRUE);
3661}
3662
3663
3664/*********************************************************************
3665 *
3666 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3667 *
3668 **********************************************************************/
3669static void
3670em_refresh_mbufs(struct rx_ring *rxr, int limit)
3671{
3672	struct adapter		*adapter = rxr->adapter;
3673	struct mbuf		*m;
3674	bus_dma_segment_t	segs[1];
3675	bus_dmamap_t		map;
3676	struct em_buffer	*rxbuf;
3677	int			i, error, nsegs, cleaned;
3678
3679	i = rxr->next_to_refresh;
3680	cleaned = -1;
3681	while (i != limit) {
3682		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3683		if (m == NULL)
3684			goto update;
3685		m->m_len = m->m_pkthdr.len = MCLBYTES;
3686
3687		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3688			m_adj(m, ETHER_ALIGN);
3689
3690		/*
3691		 * Using memory from the mbuf cluster pool, invoke the
3692		 * bus_dma machinery to arrange the memory mapping.
3693		 */
3694		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3695		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3696		if (error != 0) {
3697			m_free(m);
3698			goto update;
3699		}
3700
3701		/* If nsegs is wrong then the stack is corrupt. */
3702		KASSERT(nsegs == 1, ("Too many segments returned!"));
3703
3704		rxbuf = &rxr->rx_buffers[i];
3705		if (rxbuf->m_head != NULL)
3706			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3707
3708		map = rxbuf->map;
3709		rxbuf->map = rxr->rx_sparemap;
3710		rxr->rx_sparemap = map;
3711		bus_dmamap_sync(rxr->rxtag,
3712		    rxbuf->map, BUS_DMASYNC_PREREAD);
3713		rxbuf->m_head = m;
3714		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3715
3716		cleaned = i;
3717		/* Calculate next index */
3718		if (++i == adapter->num_rx_desc)
3719			i = 0;
3720		/* This is the work marker for refresh */
3721		rxr->next_to_refresh = i;
3722	}
3723update:
3724	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3725	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3726	if (cleaned != -1) /* Update tail index */
3727		E1000_WRITE_REG(&adapter->hw,
3728		    E1000_RDT(rxr->me), cleaned);
3729
3730	return;
3731}
3732
3733
3734/*********************************************************************
3735 *
3736 *  Allocate memory for rx_buffer structures. Since we use one
3737 *  rx_buffer per received packet, the maximum number of rx_buffer's
3738 *  that we'll need is equal to the number of receive descriptors
3739 *  that we've allocated.
3740 *
3741 **********************************************************************/
3742static int
3743em_allocate_receive_buffers(struct rx_ring *rxr)
3744{
3745	struct adapter		*adapter = rxr->adapter;
3746	device_t		dev = adapter->dev;
3747	struct em_buffer	*rxbuf;
3748	int			error;
3749
3750	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3751	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3752	if (rxr->rx_buffers == NULL) {
3753		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3754		return (ENOMEM);
3755	}
3756
3757	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3758				1, 0,			/* alignment, bounds */
3759				BUS_SPACE_MAXADDR,	/* lowaddr */
3760				BUS_SPACE_MAXADDR,	/* highaddr */
3761				NULL, NULL,		/* filter, filterarg */
3762				MCLBYTES,		/* maxsize */
3763				1,			/* nsegments */
3764				MCLBYTES,		/* maxsegsize */
3765				0,			/* flags */
3766				NULL,			/* lockfunc */
3767				NULL,			/* lockarg */
3768				&rxr->rxtag);
3769	if (error) {
3770		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3771		    __func__, error);
3772		goto fail;
3773	}
3774
3775	/* Create the spare map (used by getbuf) */
3776	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3777	     &rxr->rx_sparemap);
3778	if (error) {
3779		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3780		    __func__, error);
3781		goto fail;
3782	}
3783
3784	rxbuf = rxr->rx_buffers;
3785	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3786		rxbuf = &rxr->rx_buffers[i];
3787		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3788		    &rxbuf->map);
3789		if (error) {
3790			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3791			    __func__, error);
3792			goto fail;
3793		}
3794	}
3795
3796	return (0);
3797
3798fail:
3799	em_free_receive_structures(adapter);
3800	return (error);
3801}
3802
3803
3804/*********************************************************************
3805 *
3806 *  Initialize a receive ring and its buffers.
3807 *
3808 **********************************************************************/
3809static int
3810em_setup_receive_ring(struct rx_ring *rxr)
3811{
3812	struct	adapter 	*adapter = rxr->adapter;
3813	struct em_buffer	*rxbuf;
3814	bus_dma_segment_t	seg[1];
3815	int			rsize, nsegs, error;
3816
3817
3818	/* Clear the ring contents */
3819	EM_RX_LOCK(rxr);
3820	rsize = roundup2(adapter->num_rx_desc *
3821	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3822	bzero((void *)rxr->rx_base, rsize);
3823
3824	/*
3825	** Free current RX buffer structs and their mbufs
3826	*/
3827	for (int i = 0; i < adapter->num_rx_desc; i++) {
3828		rxbuf = &rxr->rx_buffers[i];
3829		if (rxbuf->m_head != NULL) {
3830			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3831			    BUS_DMASYNC_POSTREAD);
3832			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3833			m_freem(rxbuf->m_head);
3834		}
3835	}
3836
3837	/* Now replenish the mbufs */
3838	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3839
3840		rxbuf = &rxr->rx_buffers[j];
3841		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3842		if (rxbuf->m_head == NULL)
3843			panic("RX ring hdr initialization failed!\n");
3844		rxbuf->m_head->m_len = MCLBYTES;
3845		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3846		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3847
3848		/* Get the memory mapping */
3849		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3850		    rxbuf->map, rxbuf->m_head, seg,
3851		    &nsegs, BUS_DMA_NOWAIT);
3852		if (error != 0)
3853			panic("RX ring dma initialization failed!\n");
3854		bus_dmamap_sync(rxr->rxtag,
3855		    rxbuf->map, BUS_DMASYNC_PREREAD);
3856
3857		/* Update descriptor */
3858		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3859	}
3860
3861
3862	/* Setup our descriptor indices */
3863	rxr->next_to_check = 0;
3864	rxr->next_to_refresh = 0;
3865
3866	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3867	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868
3869	EM_RX_UNLOCK(rxr);
3870	return (0);
3871}
3872
3873/*********************************************************************
3874 *
3875 *  Initialize all receive rings.
3876 *
3877 **********************************************************************/
3878static int
3879em_setup_receive_structures(struct adapter *adapter)
3880{
3881	struct rx_ring *rxr = adapter->rx_rings;
3882	int j;
3883
3884	for (j = 0; j < adapter->num_queues; j++, rxr++)
3885		if (em_setup_receive_ring(rxr))
3886			goto fail;
3887
3888	return (0);
3889fail:
3890	/*
3891	 * Free RX buffers allocated so far, we will only handle
3892	 * the rings that completed, the failing case will have
3893	 * cleaned up for itself. 'j' failed, so its the terminus.
3894	 */
3895	for (int i = 0; i < j; ++i) {
3896		rxr = &adapter->rx_rings[i];
3897		for (int n = 0; n < adapter->num_rx_desc; n++) {
3898			struct em_buffer *rxbuf;
3899			rxbuf = &rxr->rx_buffers[n];
3900			if (rxbuf->m_head != NULL) {
3901				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3902			  	  BUS_DMASYNC_POSTREAD);
3903				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3904				m_freem(rxbuf->m_head);
3905				rxbuf->m_head = NULL;
3906			}
3907		}
3908	}
3909
3910	return (ENOBUFS);
3911}
3912
3913/*********************************************************************
3914 *
3915 *  Free all receive rings.
3916 *
3917 **********************************************************************/
3918static void
3919em_free_receive_structures(struct adapter *adapter)
3920{
3921	struct rx_ring *rxr = adapter->rx_rings;
3922
3923	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3924		em_free_receive_buffers(rxr);
3925		/* Free the ring memory as well */
3926		em_dma_free(adapter, &rxr->rxdma);
3927		EM_RX_LOCK_DESTROY(rxr);
3928	}
3929
3930	free(adapter->rx_rings, M_DEVBUF);
3931}
3932
3933
3934/*********************************************************************
3935 *
3936 *  Free receive ring data structures
3937 *
3938 **********************************************************************/
3939static void
3940em_free_receive_buffers(struct rx_ring *rxr)
3941{
3942	struct adapter		*adapter = rxr->adapter;
3943	struct em_buffer	*rxbuf = NULL;
3944
3945	INIT_DEBUGOUT("free_receive_buffers: begin");
3946
3947	if (rxr->rx_sparemap) {
3948		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3949		rxr->rx_sparemap = NULL;
3950	}
3951
3952	if (rxr->rx_buffers != NULL) {
3953		for (int i = 0; i < adapter->num_rx_desc; i++) {
3954			rxbuf = &rxr->rx_buffers[i];
3955			if (rxbuf->map != NULL) {
3956				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3957				    BUS_DMASYNC_POSTREAD);
3958				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3959				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3960			}
3961			if (rxbuf->m_head != NULL) {
3962				m_freem(rxbuf->m_head);
3963				rxbuf->m_head = NULL;
3964			}
3965		}
3966		free(rxr->rx_buffers, M_DEVBUF);
3967		rxr->rx_buffers = NULL;
3968	}
3969
3970	if (rxr->rxtag != NULL) {
3971		bus_dma_tag_destroy(rxr->rxtag);
3972		rxr->rxtag = NULL;
3973	}
3974
3975	return;
3976}
3977
3978
3979/*********************************************************************
3980 *
3981 *  Enable receive unit.
3982 *
3983 **********************************************************************/
3984#define MAX_INTS_PER_SEC	8000
3985#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3986
3987static void
3988em_initialize_receive_unit(struct adapter *adapter)
3989{
3990	struct rx_ring	*rxr = adapter->rx_rings;
3991	struct ifnet	*ifp = adapter->ifp;
3992	struct e1000_hw	*hw = &adapter->hw;
3993	u64	bus_addr;
3994	u32	rctl, rxcsum;
3995
3996	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3997
3998	/*
3999	 * Make sure receives are disabled while setting
4000	 * up the descriptor ring
4001	 */
4002	rctl = E1000_READ_REG(hw, E1000_RCTL);
4003	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4004
4005	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4006	    adapter->rx_abs_int_delay.value);
4007	/*
4008	 * Set the interrupt throttling rate. Value is calculated
4009	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4010	 */
4011	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4012
4013	/*
4014	** When using MSIX interrupts we need to throttle
4015	** using the EITR register (82574 only)
4016	*/
4017	if (hw->mac.type == e1000_82574)
4018		for (int i = 0; i < 4; i++)
4019			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4020			    DEFAULT_ITR);
4021
4022	/* Disable accelerated ackknowledge */
4023	if (adapter->hw.mac.type == e1000_82574)
4024		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4025
4026	if (ifp->if_capenable & IFCAP_RXCSUM) {
4027		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4028		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4029		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4030	}
4031
4032	/*
4033	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4034	** long latencies are observed, like Lenovo X60. This
4035	** change eliminates the problem, but since having positive
4036	** values in RDTR is a known source of problems on other
4037	** platforms another solution is being sought.
4038	*/
4039	if (hw->mac.type == e1000_82573)
4040		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4041
4042	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4043		/* Setup the Base and Length of the Rx Descriptor Ring */
4044		bus_addr = rxr->rxdma.dma_paddr;
4045		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4046		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4047		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4048		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4049		/* Setup the Head and Tail Descriptor Pointers */
4050		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4051		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4052	}
4053
4054	/* Setup the Receive Control Register */
4055	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4056	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4057	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4058	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4059
4060        /* Strip the CRC */
4061        rctl |= E1000_RCTL_SECRC;
4062
4063        /* Make sure VLAN Filters are off */
4064        rctl &= ~E1000_RCTL_VFE;
4065	rctl &= ~E1000_RCTL_SBP;
4066	rctl |= E1000_RCTL_SZ_2048;
4067	if (ifp->if_mtu > ETHERMTU)
4068		rctl |= E1000_RCTL_LPE;
4069	else
4070		rctl &= ~E1000_RCTL_LPE;
4071
4072	/* Write out the settings */
4073	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4074
4075	return;
4076}
4077
4078
4079/*********************************************************************
4080 *
4081 *  This routine executes in interrupt context. It replenishes
4082 *  the mbufs in the descriptor and sends data which has been
4083 *  dma'ed into host memory to upper layer.
4084 *
4085 *  We loop at most count times if count is > 0, or until done if
4086 *  count < 0.
4087 *
4088 *  For polling we also now return the number of cleaned packets
4089 *********************************************************************/
4090static int
4091em_rxeof(struct rx_ring *rxr, int count)
4092{
4093	struct adapter		*adapter = rxr->adapter;
4094	struct ifnet		*ifp = adapter->ifp;
4095	struct mbuf		*mp, *sendmp;
4096	u8			status = 0;
4097	u16 			len;
4098	int			i, processed, rxdone = 0;
4099	bool			eop;
4100	struct e1000_rx_desc	*cur;
4101
4102	EM_RX_LOCK(rxr);
4103
4104	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4105
4106		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4107			break;
4108
4109		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4110		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4111
4112		cur = &rxr->rx_base[i];
4113		status = cur->status;
4114		mp = sendmp = NULL;
4115
4116		if ((status & E1000_RXD_STAT_DD) == 0)
4117			break;
4118
4119		len = le16toh(cur->length);
4120		eop = (status & E1000_RXD_STAT_EOP) != 0;
4121		count--;
4122
4123		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4124
4125			/* Assign correct length to the current fragment */
4126			mp = rxr->rx_buffers[i].m_head;
4127			mp->m_len = len;
4128
4129			if (rxr->fmp == NULL) {
4130				mp->m_pkthdr.len = len;
4131				rxr->fmp = mp; /* Store the first mbuf */
4132				rxr->lmp = mp;
4133			} else {
4134				/* Chain mbuf's together */
4135				mp->m_flags &= ~M_PKTHDR;
4136				rxr->lmp->m_next = mp;
4137				rxr->lmp = rxr->lmp->m_next;
4138				rxr->fmp->m_pkthdr.len += len;
4139			}
4140
4141			if (eop) {
4142				rxr->fmp->m_pkthdr.rcvif = ifp;
4143				ifp->if_ipackets++;
4144				em_receive_checksum(cur, rxr->fmp);
4145#ifndef __NO_STRICT_ALIGNMENT
4146				if (adapter->max_frame_size >
4147				    (MCLBYTES - ETHER_ALIGN) &&
4148				    em_fixup_rx(rxr) != 0)
4149					goto skip;
4150#endif
4151				if (status & E1000_RXD_STAT_VP) {
4152					rxr->fmp->m_pkthdr.ether_vtag =
4153					    (le16toh(cur->special) &
4154					    E1000_RXD_SPC_VLAN_MASK);
4155					rxr->fmp->m_flags |= M_VLANTAG;
4156				}
4157#ifdef EM_MULTIQUEUE
4158				rxr->fmp->m_pkthdr.flowid = curcpu;
4159				rxr->fmp->m_flags |= M_FLOWID;
4160#endif
4161#ifndef __NO_STRICT_ALIGNMENT
4162skip:
4163#endif
4164				sendmp = rxr->fmp;
4165				rxr->fmp = NULL;
4166				rxr->lmp = NULL;
4167			}
4168		} else {
4169			ifp->if_ierrors++;
4170			/* Reuse loaded DMA map and just update mbuf chain */
4171			mp = rxr->rx_buffers[i].m_head;
4172			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4173			mp->m_data = mp->m_ext.ext_buf;
4174			mp->m_next = NULL;
4175			if (adapter->max_frame_size <=
4176			    (MCLBYTES - ETHER_ALIGN))
4177				m_adj(mp, ETHER_ALIGN);
4178			if (rxr->fmp != NULL) {
4179				m_freem(rxr->fmp);
4180				rxr->fmp = NULL;
4181				rxr->lmp = NULL;
4182			}
4183			sendmp = NULL;
4184		}
4185
4186		/* Zero out the receive descriptors status. */
4187		cur->status = 0;
4188		++rxdone;	/* cumulative for POLL */
4189		++processed;
4190
4191		/* Advance our pointers to the next descriptor. */
4192		if (++i == adapter->num_rx_desc)
4193			i = 0;
4194
4195		/* Send to the stack */
4196		if (sendmp != NULL) {
4197			rxr->next_to_check = i;
4198			EM_RX_UNLOCK(rxr);
4199			(*ifp->if_input)(ifp, sendmp);
4200			EM_RX_LOCK(rxr);
4201			i = rxr->next_to_check;
4202		}
4203
4204		/* Only refresh mbufs every 8 descriptors */
4205		if (processed == 8) {
4206			em_refresh_mbufs(rxr, i);
4207			processed = 0;
4208		}
4209	}
4210
4211	/* Catch any remaining refresh work */
4212	if (processed != 0) {
4213		em_refresh_mbufs(rxr, i);
4214		processed = 0;
4215	}
4216
4217	rxr->next_to_check = i;
4218	EM_RX_UNLOCK(rxr);
4219
4220#ifdef DEVICE_POLLING
4221	return (rxdone);
4222#else
4223	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4224#endif
4225}
4226
4227#ifndef __NO_STRICT_ALIGNMENT
4228/*
4229 * When jumbo frames are enabled we should realign entire payload on
4230 * architecures with strict alignment. This is serious design mistake of 8254x
4231 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4232 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4233 * payload. On architecures without strict alignment restrictions 8254x still
4234 * performs unaligned memory access which would reduce the performance too.
4235 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4236 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4237 * existing mbuf chain.
4238 *
4239 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4240 * not used at all on architectures with strict alignment.
4241 */
4242static int
4243em_fixup_rx(struct rx_ring *rxr)
4244{
4245	struct adapter *adapter = rxr->adapter;
4246	struct mbuf *m, *n;
4247	int error;
4248
4249	error = 0;
4250	m = rxr->fmp;
4251	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4252		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4253		m->m_data += ETHER_HDR_LEN;
4254	} else {
4255		MGETHDR(n, M_DONTWAIT, MT_DATA);
4256		if (n != NULL) {
4257			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4258			m->m_data += ETHER_HDR_LEN;
4259			m->m_len -= ETHER_HDR_LEN;
4260			n->m_len = ETHER_HDR_LEN;
4261			M_MOVE_PKTHDR(n, m);
4262			n->m_next = m;
4263			rxr->fmp = n;
4264		} else {
4265			adapter->dropped_pkts++;
4266			m_freem(rxr->fmp);
4267			rxr->fmp = NULL;
4268			error = ENOMEM;
4269		}
4270	}
4271
4272	return (error);
4273}
4274#endif
4275
4276/*********************************************************************
4277 *
4278 *  Verify that the hardware indicated that the checksum is valid.
4279 *  Inform the stack about the status of checksum so that stack
4280 *  doesn't spend time verifying the checksum.
4281 *
4282 *********************************************************************/
4283static void
4284em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4285{
4286	/* Ignore Checksum bit is set */
4287	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4288		mp->m_pkthdr.csum_flags = 0;
4289		return;
4290	}
4291
4292	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4293		/* Did it pass? */
4294		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4295			/* IP Checksum Good */
4296			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4297			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4298
4299		} else {
4300			mp->m_pkthdr.csum_flags = 0;
4301		}
4302	}
4303
4304	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4305		/* Did it pass? */
4306		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4307			mp->m_pkthdr.csum_flags |=
4308			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4309			mp->m_pkthdr.csum_data = htons(0xffff);
4310		}
4311	}
4312}
4313
4314/*
4315 * This routine is run via an vlan
4316 * config EVENT
4317 */
4318static void
4319em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4320{
4321	struct adapter	*adapter = ifp->if_softc;
4322	u32		index, bit;
4323
4324	if (ifp->if_softc !=  arg)   /* Not our event */
4325		return;
4326
4327	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4328                return;
4329
4330	index = (vtag >> 5) & 0x7F;
4331	bit = vtag & 0x1F;
4332	em_shadow_vfta[index] |= (1 << bit);
4333	++adapter->num_vlans;
4334	/* Re-init to load the changes */
4335	em_init(adapter);
4336}
4337
4338/*
4339 * This routine is run via an vlan
4340 * unconfig EVENT
4341 */
4342static void
4343em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4344{
4345	struct adapter	*adapter = ifp->if_softc;
4346	u32		index, bit;
4347
4348	if (ifp->if_softc !=  arg)
4349		return;
4350
4351	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4352                return;
4353
4354	index = (vtag >> 5) & 0x7F;
4355	bit = vtag & 0x1F;
4356	em_shadow_vfta[index] &= ~(1 << bit);
4357	--adapter->num_vlans;
4358	/* Re-init to load the changes */
4359	em_init(adapter);
4360}
4361
4362static void
4363em_setup_vlan_hw_support(struct adapter *adapter)
4364{
4365	struct e1000_hw *hw = &adapter->hw;
4366	u32             reg;
4367
4368	/*
4369	** We get here thru init_locked, meaning
4370	** a soft reset, this has already cleared
4371	** the VFTA and other state, so if there
4372	** have been no vlan's registered do nothing.
4373	*/
4374	if (adapter->num_vlans == 0)
4375                return;
4376
4377	/*
4378	** A soft reset zero's out the VFTA, so
4379	** we need to repopulate it now.
4380	*/
4381	for (int i = 0; i < EM_VFTA_SIZE; i++)
4382                if (em_shadow_vfta[i] != 0)
4383			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4384                            i, em_shadow_vfta[i]);
4385
4386	reg = E1000_READ_REG(hw, E1000_CTRL);
4387	reg |= E1000_CTRL_VME;
4388	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4389
4390	/* Enable the Filter Table */
4391	reg = E1000_READ_REG(hw, E1000_RCTL);
4392	reg &= ~E1000_RCTL_CFIEN;
4393	reg |= E1000_RCTL_VFE;
4394	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4395
4396	/* Update the frame size */
4397	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4398	    adapter->max_frame_size + VLAN_TAG_SIZE);
4399}
4400
4401static void
4402em_enable_intr(struct adapter *adapter)
4403{
4404	struct e1000_hw *hw = &adapter->hw;
4405	u32 ims_mask = IMS_ENABLE_MASK;
4406
4407	if (hw->mac.type == e1000_82574) {
4408		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4409		ims_mask |= EM_MSIX_MASK;
4410	}
4411	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4412}
4413
4414static void
4415em_disable_intr(struct adapter *adapter)
4416{
4417	struct e1000_hw *hw = &adapter->hw;
4418
4419	if (hw->mac.type == e1000_82574)
4420		E1000_WRITE_REG(hw, EM_EIAC, 0);
4421	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4422}
4423
4424/*
4425 * Bit of a misnomer, what this really means is
4426 * to enable OS management of the system... aka
4427 * to disable special hardware management features
4428 */
4429static void
4430em_init_manageability(struct adapter *adapter)
4431{
4432	/* A shared code workaround */
4433#define E1000_82542_MANC2H E1000_MANC2H
4434	if (adapter->has_manage) {
4435		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4436		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4437
4438		/* disable hardware interception of ARP */
4439		manc &= ~(E1000_MANC_ARP_EN);
4440
4441                /* enable receiving management packets to the host */
4442		manc |= E1000_MANC_EN_MNG2HOST;
4443#define E1000_MNG2HOST_PORT_623 (1 << 5)
4444#define E1000_MNG2HOST_PORT_664 (1 << 6)
4445		manc2h |= E1000_MNG2HOST_PORT_623;
4446		manc2h |= E1000_MNG2HOST_PORT_664;
4447		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4448		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4449	}
4450}
4451
4452/*
4453 * Give control back to hardware management
4454 * controller if there is one.
4455 */
4456static void
4457em_release_manageability(struct adapter *adapter)
4458{
4459	if (adapter->has_manage) {
4460		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4461
4462		/* re-enable hardware interception of ARP */
4463		manc |= E1000_MANC_ARP_EN;
4464		manc &= ~E1000_MANC_EN_MNG2HOST;
4465
4466		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4467	}
4468}
4469
4470/*
4471 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4472 * For ASF and Pass Through versions of f/w this means
4473 * that the driver is loaded. For AMT version type f/w
4474 * this means that the network i/f is open.
4475 */
4476static void
4477em_get_hw_control(struct adapter *adapter)
4478{
4479	u32 ctrl_ext, swsm;
4480
4481	if (adapter->hw.mac.type == e1000_82573) {
4482		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4483		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4484		    swsm | E1000_SWSM_DRV_LOAD);
4485		return;
4486	}
4487	/* else */
4488	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4489	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4490	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4491	return;
4492}
4493
4494/*
4495 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4496 * For ASF and Pass Through versions of f/w this means that
4497 * the driver is no longer loaded. For AMT versions of the
4498 * f/w this means that the network i/f is closed.
4499 */
4500static void
4501em_release_hw_control(struct adapter *adapter)
4502{
4503	u32 ctrl_ext, swsm;
4504
4505	if (!adapter->has_manage)
4506		return;
4507
4508	if (adapter->hw.mac.type == e1000_82573) {
4509		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4510		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4511		    swsm & ~E1000_SWSM_DRV_LOAD);
4512		return;
4513	}
4514	/* else */
4515	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4516	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4517	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4518	return;
4519}
4520
4521static int
4522em_is_valid_ether_addr(u8 *addr)
4523{
4524	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4525
4526	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4527		return (FALSE);
4528	}
4529
4530	return (TRUE);
4531}
4532
4533/*
4534** Parse the interface capabilities with regard
4535** to both system management and wake-on-lan for
4536** later use.
4537*/
4538static void
4539em_get_wakeup(device_t dev)
4540{
4541	struct adapter	*adapter = device_get_softc(dev);
4542	u16		eeprom_data = 0, device_id, apme_mask;
4543
4544	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4545	apme_mask = EM_EEPROM_APME;
4546
4547	switch (adapter->hw.mac.type) {
4548	case e1000_82573:
4549	case e1000_82583:
4550		adapter->has_amt = TRUE;
4551		/* Falls thru */
4552	case e1000_82571:
4553	case e1000_82572:
4554	case e1000_80003es2lan:
4555		if (adapter->hw.bus.func == 1) {
4556			e1000_read_nvm(&adapter->hw,
4557			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4558			break;
4559		} else
4560			e1000_read_nvm(&adapter->hw,
4561			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4562		break;
4563	case e1000_ich8lan:
4564	case e1000_ich9lan:
4565	case e1000_ich10lan:
4566	case e1000_pchlan:
4567		apme_mask = E1000_WUC_APME;
4568		adapter->has_amt = TRUE;
4569		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4570		break;
4571	default:
4572		e1000_read_nvm(&adapter->hw,
4573		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4574		break;
4575	}
4576	if (eeprom_data & apme_mask)
4577		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4578	/*
4579         * We have the eeprom settings, now apply the special cases
4580         * where the eeprom may be wrong or the board won't support
4581         * wake on lan on a particular port
4582	 */
4583	device_id = pci_get_device(dev);
4584        switch (device_id) {
4585	case E1000_DEV_ID_82571EB_FIBER:
4586		/* Wake events only supported on port A for dual fiber
4587		 * regardless of eeprom setting */
4588		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4589		    E1000_STATUS_FUNC_1)
4590			adapter->wol = 0;
4591		break;
4592	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4593	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4594	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4595                /* if quad port adapter, disable WoL on all but port A */
4596		if (global_quad_port_a != 0)
4597			adapter->wol = 0;
4598		/* Reset for multiple quad port adapters */
4599		if (++global_quad_port_a == 4)
4600			global_quad_port_a = 0;
4601                break;
4602	}
4603	return;
4604}
4605
4606
4607/*
4608 * Enable PCI Wake On Lan capability
4609 */
4610static void
4611em_enable_wakeup(device_t dev)
4612{
4613	struct adapter	*adapter = device_get_softc(dev);
4614	struct ifnet	*ifp = adapter->ifp;
4615	u32		pmc, ctrl, ctrl_ext, rctl;
4616	u16     	status;
4617
4618	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4619		return;
4620
4621	/* Advertise the wakeup capability */
4622	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4623	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4624	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4625	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4626
4627	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4628	    (adapter->hw.mac.type == e1000_pchlan) ||
4629	    (adapter->hw.mac.type == e1000_ich9lan) ||
4630	    (adapter->hw.mac.type == e1000_ich10lan)) {
4631		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4632		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4633	}
4634
4635	/* Keep the laser running on Fiber adapters */
4636	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4637	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4638		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4639		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4640		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4641	}
4642
4643	/*
4644	** Determine type of Wakeup: note that wol
4645	** is set with all bits on by default.
4646	*/
4647	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4648		adapter->wol &= ~E1000_WUFC_MAG;
4649
4650	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4651		adapter->wol &= ~E1000_WUFC_MC;
4652	else {
4653		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4654		rctl |= E1000_RCTL_MPE;
4655		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4656	}
4657
4658	if (adapter->hw.mac.type == e1000_pchlan) {
4659		if (em_enable_phy_wakeup(adapter))
4660			return;
4661	} else {
4662		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4663		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4664	}
4665
4666	if (adapter->hw.phy.type == e1000_phy_igp_3)
4667		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4668
4669        /* Request PME */
4670        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4671	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4672	if (ifp->if_capenable & IFCAP_WOL)
4673		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4674        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4675
4676	return;
4677}
4678
4679/*
4680** WOL in the newer chipset interfaces (pchlan)
4681** require thing to be copied into the phy
4682*/
4683static int
4684em_enable_phy_wakeup(struct adapter *adapter)
4685{
4686	struct e1000_hw *hw = &adapter->hw;
4687	u32 mreg, ret = 0;
4688	u16 preg;
4689
4690	/* copy MAC RARs to PHY RARs */
4691	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4692		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4693		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4694		e1000_write_phy_reg(hw, BM_RAR_M(i),
4695		    (u16)((mreg >> 16) & 0xFFFF));
4696		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4697		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4698		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4699		    (u16)((mreg >> 16) & 0xFFFF));
4700	}
4701
4702	/* copy MAC MTA to PHY MTA */
4703	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4704		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4705		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4706		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4707		    (u16)((mreg >> 16) & 0xFFFF));
4708	}
4709
4710	/* configure PHY Rx Control register */
4711	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4712	mreg = E1000_READ_REG(hw, E1000_RCTL);
4713	if (mreg & E1000_RCTL_UPE)
4714		preg |= BM_RCTL_UPE;
4715	if (mreg & E1000_RCTL_MPE)
4716		preg |= BM_RCTL_MPE;
4717	preg &= ~(BM_RCTL_MO_MASK);
4718	if (mreg & E1000_RCTL_MO_3)
4719		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4720				<< BM_RCTL_MO_SHIFT);
4721	if (mreg & E1000_RCTL_BAM)
4722		preg |= BM_RCTL_BAM;
4723	if (mreg & E1000_RCTL_PMCF)
4724		preg |= BM_RCTL_PMCF;
4725	mreg = E1000_READ_REG(hw, E1000_CTRL);
4726	if (mreg & E1000_CTRL_RFCE)
4727		preg |= BM_RCTL_RFCE;
4728	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4729
4730	/* enable PHY wakeup in MAC register */
4731	E1000_WRITE_REG(hw, E1000_WUC,
4732	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4733	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4734
4735	/* configure and enable PHY wakeup in PHY registers */
4736	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4737	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4738
4739	/* activate PHY wakeup */
4740	ret = hw->phy.ops.acquire(hw);
4741	if (ret) {
4742		printf("Could not acquire PHY\n");
4743		return ret;
4744	}
4745	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4746	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4747	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4748	if (ret) {
4749		printf("Could not read PHY page 769\n");
4750		goto out;
4751	}
4752	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4753	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4754	if (ret)
4755		printf("Could not set PHY Host Wakeup bit\n");
4756out:
4757	hw->phy.ops.release(hw);
4758
4759	return ret;
4760}
4761
4762static void
4763em_led_func(void *arg, int onoff)
4764{
4765	struct adapter	*adapter = arg;
4766
4767	EM_CORE_LOCK(adapter);
4768	if (onoff) {
4769		e1000_setup_led(&adapter->hw);
4770		e1000_led_on(&adapter->hw);
4771	} else {
4772		e1000_led_off(&adapter->hw);
4773		e1000_cleanup_led(&adapter->hw);
4774	}
4775	EM_CORE_UNLOCK(adapter);
4776}
4777
4778/**********************************************************************
4779 *
4780 *  Update the board statistics counters.
4781 *
4782 **********************************************************************/
4783static void
4784em_update_stats_counters(struct adapter *adapter)
4785{
4786	struct ifnet   *ifp;
4787
4788	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4789	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4790		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4791		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4792	}
4793	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4794	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4795	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4796	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4797
4798	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4799	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4800	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4801	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4802	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4803	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4804	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4805	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4806	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4807	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4808	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4809	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4810	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4811	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4812	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4813	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4814	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4815	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4816	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4817	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4818
4819	/* For the 64-bit byte counters the low dword must be read first. */
4820	/* Both registers clear on the read of the high dword */
4821
4822	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4823	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4824
4825	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4826	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4827	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4828	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4829	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4830
4831	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4832	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4833
4834	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4835	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4836	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4837	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4838	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4839	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4840	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4841	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4842	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4843	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4844
4845	if (adapter->hw.mac.type >= e1000_82543) {
4846		adapter->stats.algnerrc +=
4847		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4848		adapter->stats.rxerrc +=
4849		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4850		adapter->stats.tncrs +=
4851		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4852		adapter->stats.cexterr +=
4853		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4854		adapter->stats.tsctc +=
4855		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4856		adapter->stats.tsctfc +=
4857		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4858	}
4859	ifp = adapter->ifp;
4860
4861	ifp->if_collisions = adapter->stats.colc;
4862
4863	/* Rx Errors */
4864	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4865	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4866	    adapter->stats.ruc + adapter->stats.roc +
4867	    adapter->stats.mpc + adapter->stats.cexterr;
4868
4869	/* Tx Errors */
4870	ifp->if_oerrors = adapter->stats.ecol +
4871	    adapter->stats.latecol + adapter->watchdog_events;
4872}
4873
4874
4875/**********************************************************************
4876 *
4877 *  This routine is called only when em_display_debug_stats is enabled.
4878 *  This routine provides a way to take a look at important statistics
4879 *  maintained by the driver and hardware.
4880 *
4881 **********************************************************************/
4882static void
4883em_print_debug_info(struct adapter *adapter)
4884{
4885	device_t dev = adapter->dev;
4886	u8 *hw_addr = adapter->hw.hw_addr;
4887	struct rx_ring *rxr = adapter->rx_rings;
4888	struct tx_ring *txr = adapter->tx_rings;
4889
4890	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4891	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4892	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4893	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4894	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4895	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4896	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4897	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4898	    adapter->hw.fc.high_water,
4899	    adapter->hw.fc.low_water);
4900	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4901	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4902	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4903	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4904	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4905	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4906
4907	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4908		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4909		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4910		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4911		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4912		    txr->me, txr->no_desc_avail);
4913		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4914		    txr->me, txr->tx_irq);
4915		device_printf(dev, "Num Tx descriptors avail = %d\n",
4916		    txr->tx_avail);
4917		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4918		    txr->no_desc_avail);
4919	}
4920	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4921		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4922		    rxr->me, rxr->rx_irq);
4923		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4924		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4925		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4926	}
4927	device_printf(dev, "Std mbuf failed = %ld\n",
4928	    adapter->mbuf_alloc_failed);
4929	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4930	    adapter->mbuf_cluster_failed);
4931	device_printf(dev, "Driver dropped packets = %ld\n",
4932	    adapter->dropped_pkts);
4933}
4934
4935static void
4936em_print_hw_stats(struct adapter *adapter)
4937{
4938	device_t dev = adapter->dev;
4939
4940	device_printf(dev, "Excessive collisions = %lld\n",
4941	    (long long)adapter->stats.ecol);
4942#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4943	device_printf(dev, "Symbol errors = %lld\n",
4944	    (long long)adapter->stats.symerrs);
4945#endif
4946	device_printf(dev, "Sequence errors = %lld\n",
4947	    (long long)adapter->stats.sec);
4948	device_printf(dev, "Defer count = %lld\n",
4949	    (long long)adapter->stats.dc);
4950	device_printf(dev, "Missed Packets = %lld\n",
4951	    (long long)adapter->stats.mpc);
4952	device_printf(dev, "Receive No Buffers = %lld\n",
4953	    (long long)adapter->stats.rnbc);
4954	/* RLEC is inaccurate on some hardware, calculate our own. */
4955	device_printf(dev, "Receive Length Errors = %lld\n",
4956	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4957	device_printf(dev, "Receive errors = %lld\n",
4958	    (long long)adapter->stats.rxerrc);
4959	device_printf(dev, "Crc errors = %lld\n",
4960	    (long long)adapter->stats.crcerrs);
4961	device_printf(dev, "Alignment errors = %lld\n",
4962	    (long long)adapter->stats.algnerrc);
4963	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4964	    (long long)adapter->stats.cexterr);
4965	device_printf(dev, "watchdog timeouts = %ld\n",
4966	    adapter->watchdog_events);
4967	device_printf(dev, "XON Rcvd = %lld\n",
4968	    (long long)adapter->stats.xonrxc);
4969	device_printf(dev, "XON Xmtd = %lld\n",
4970	    (long long)adapter->stats.xontxc);
4971	device_printf(dev, "XOFF Rcvd = %lld\n",
4972	    (long long)adapter->stats.xoffrxc);
4973	device_printf(dev, "XOFF Xmtd = %lld\n",
4974	    (long long)adapter->stats.xofftxc);
4975	device_printf(dev, "Good Packets Rcvd = %lld\n",
4976	    (long long)adapter->stats.gprc);
4977	device_printf(dev, "Good Packets Xmtd = %lld\n",
4978	    (long long)adapter->stats.gptc);
4979	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4980	    (long long)adapter->stats.tsctc);
4981	device_printf(dev, "TSO Contexts Failed = %lld\n",
4982	    (long long)adapter->stats.tsctfc);
4983}
4984
4985/**********************************************************************
4986 *
4987 *  This routine provides a way to dump out the adapter eeprom,
4988 *  often a useful debug/service tool. This only dumps the first
4989 *  32 words, stuff that matters is in that extent.
4990 *
4991 **********************************************************************/
4992static void
4993em_print_nvm_info(struct adapter *adapter)
4994{
4995	u16	eeprom_data;
4996	int	i, j, row = 0;
4997
4998	/* Its a bit crude, but it gets the job done */
4999	printf("\nInterface EEPROM Dump:\n");
5000	printf("Offset\n0x0000  ");
5001	for (i = 0, j = 0; i < 32; i++, j++) {
5002		if (j == 8) { /* Make the offset block */
5003			j = 0; ++row;
5004			printf("\n0x00%x0  ",row);
5005		}
5006		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5007		printf("%04x ", eeprom_data);
5008	}
5009	printf("\n");
5010}
5011
5012static int
5013em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5014{
5015	struct adapter *adapter;
5016	int error;
5017	int result;
5018
5019	result = -1;
5020	error = sysctl_handle_int(oidp, &result, 0, req);
5021
5022	if (error || !req->newptr)
5023		return (error);
5024
5025	if (result == 1) {
5026		adapter = (struct adapter *)arg1;
5027		em_print_debug_info(adapter);
5028	}
5029	/*
5030	 * This value will cause a hex dump of the
5031	 * first 32 16-bit words of the EEPROM to
5032	 * the screen.
5033	 */
5034	if (result == 2) {
5035		adapter = (struct adapter *)arg1;
5036		em_print_nvm_info(adapter);
5037        }
5038
5039	return (error);
5040}
5041
5042
5043static int
5044em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5045{
5046	struct adapter *adapter;
5047	int error;
5048	int result;
5049
5050	result = -1;
5051	error = sysctl_handle_int(oidp, &result, 0, req);
5052
5053	if (error || !req->newptr)
5054		return (error);
5055
5056	if (result == 1) {
5057		adapter = (struct adapter *)arg1;
5058		em_print_hw_stats(adapter);
5059	}
5060
5061	return (error);
5062}
5063
5064static int
5065em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5066{
5067	struct em_int_delay_info *info;
5068	struct adapter *adapter;
5069	u32 regval;
5070	int error, usecs, ticks;
5071
5072	info = (struct em_int_delay_info *)arg1;
5073	usecs = info->value;
5074	error = sysctl_handle_int(oidp, &usecs, 0, req);
5075	if (error != 0 || req->newptr == NULL)
5076		return (error);
5077	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5078		return (EINVAL);
5079	info->value = usecs;
5080	ticks = EM_USECS_TO_TICKS(usecs);
5081
5082	adapter = info->adapter;
5083
5084	EM_CORE_LOCK(adapter);
5085	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5086	regval = (regval & ~0xffff) | (ticks & 0xffff);
5087	/* Handle a few special cases. */
5088	switch (info->offset) {
5089	case E1000_RDTR:
5090		break;
5091	case E1000_TIDV:
5092		if (ticks == 0) {
5093			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5094			/* Don't write 0 into the TIDV register. */
5095			regval++;
5096		} else
5097			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5098		break;
5099	}
5100	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5101	EM_CORE_UNLOCK(adapter);
5102	return (0);
5103}
5104
5105static void
5106em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5107	const char *description, struct em_int_delay_info *info,
5108	int offset, int value)
5109{
5110	info->adapter = adapter;
5111	info->offset = offset;
5112	info->value = value;
5113	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5114	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5115	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5116	    info, 0, em_sysctl_int_delay, "I", description);
5117}
5118
5119static void
5120em_add_rx_process_limit(struct adapter *adapter, const char *name,
5121	const char *description, int *limit, int value)
5122{
5123	*limit = value;
5124	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5125	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5126	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5127}
5128
5129
5130