if_em.c revision 211913
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 211913 2010-08-28 00:34:22Z yongari $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static int	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static void	em_add_hw_stats(struct adapter *adapter);
234static bool	em_txeof(struct tx_ring *);
235static bool	em_rxeof(struct rx_ring *, int, int *);
236#ifndef __NO_STRICT_ALIGNMENT
237static int	em_fixup_rx(struct rx_ring *);
238#endif
239static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241		    u32 *, u32 *);
242static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243static void	em_set_promisc(struct adapter *);
244static void	em_disable_promisc(struct adapter *);
245static void	em_set_multi(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259static void	em_add_int_delay_sysctl(struct adapter *, const char *,
260		    const char *, struct em_int_delay_info *, int, int);
261/* Management and WOL Support */
262static void	em_init_manageability(struct adapter *);
263static void	em_release_manageability(struct adapter *);
264static void     em_get_hw_control(struct adapter *);
265static void     em_release_hw_control(struct adapter *);
266static void	em_get_wakeup(device_t);
267static void     em_enable_wakeup(device_t);
268static int	em_enable_phy_wakeup(struct adapter *);
269static void	em_led_func(void *, int);
270
271static int	em_irq_fast(void *);
272
273/* MSIX handlers */
274static void	em_msix_tx(void *);
275static void	em_msix_rx(void *);
276static void	em_msix_link(void *);
277static void	em_handle_tx(void *context, int pending);
278static void	em_handle_rx(void *context, int pending);
279static void	em_handle_link(void *context, int pending);
280
281static void	em_add_rx_process_limit(struct adapter *, const char *,
282		    const char *, int *, int);
283
284#ifdef DEVICE_POLLING
285static poll_handler_t em_poll;
286#endif /* POLLING */
287
288/*********************************************************************
289 *  FreeBSD Device Interface Entry Points
290 *********************************************************************/
291
292static device_method_t em_methods[] = {
293	/* Device interface */
294	DEVMETHOD(device_probe, em_probe),
295	DEVMETHOD(device_attach, em_attach),
296	DEVMETHOD(device_detach, em_detach),
297	DEVMETHOD(device_shutdown, em_shutdown),
298	DEVMETHOD(device_suspend, em_suspend),
299	DEVMETHOD(device_resume, em_resume),
300	{0, 0}
301};
302
303static driver_t em_driver = {
304	"em", em_methods, sizeof(struct adapter),
305};
306
307devclass_t em_devclass;
308DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
309MODULE_DEPEND(em, pci, 1, 1, 1);
310MODULE_DEPEND(em, ether, 1, 1, 1);
311
312/*********************************************************************
313 *  Tunable default values.
314 *********************************************************************/
315
316#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
317#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
318#define M_TSO_LEN			66
319
320/* Allow common code without TSO */
321#ifndef CSUM_TSO
322#define CSUM_TSO	0
323#endif
324
325static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
326static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
327TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329
330static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
331static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
332TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
333TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
334
335static int em_rxd = EM_DEFAULT_RXD;
336static int em_txd = EM_DEFAULT_TXD;
337TUNABLE_INT("hw.em.rxd", &em_rxd);
338TUNABLE_INT("hw.em.txd", &em_txd);
339
340static int em_smart_pwr_down = FALSE;
341TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
342
343/* Controls whether promiscuous also shows bad packets */
344static int em_debug_sbp = FALSE;
345TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
346
347/* Local controls for MSI/MSIX */
348#ifdef EM_MULTIQUEUE
349static int em_enable_msix = TRUE;
350static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
351#else
352static int em_enable_msix = FALSE;
353static int em_msix_queues = 0; /* disable */
354#endif
355TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
356TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
357
358/* How many packets rxeof tries to clean at a time */
359static int em_rx_process_limit = 100;
360TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362/* Flow control setting - default to FULL */
363static int em_fc_setting = e1000_fc_full;
364TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366/*
367** Shadow VFTA table, this is needed because
368** the real vlan filter table gets cleared during
369** a soft reset and the driver needs to be able
370** to repopulate it.
371*/
372static u32 em_shadow_vfta[EM_VFTA_SIZE];
373
374/* Global used in WOL setup with multiport cards */
375static int global_quad_port_a = 0;
376
377/*********************************************************************
378 *  Device identification routine
379 *
380 *  em_probe determines if the driver should be loaded on
381 *  adapter based on PCI vendor/device id of the adapter.
382 *
383 *  return BUS_PROBE_DEFAULT on success, positive on failure
384 *********************************************************************/
385
386static int
387em_probe(device_t dev)
388{
389	char		adapter_name[60];
390	u16		pci_vendor_id = 0;
391	u16		pci_device_id = 0;
392	u16		pci_subvendor_id = 0;
393	u16		pci_subdevice_id = 0;
394	em_vendor_info_t *ent;
395
396	INIT_DEBUGOUT("em_probe: begin");
397
398	pci_vendor_id = pci_get_vendor(dev);
399	if (pci_vendor_id != EM_VENDOR_ID)
400		return (ENXIO);
401
402	pci_device_id = pci_get_device(dev);
403	pci_subvendor_id = pci_get_subvendor(dev);
404	pci_subdevice_id = pci_get_subdevice(dev);
405
406	ent = em_vendor_info_array;
407	while (ent->vendor_id != 0) {
408		if ((pci_vendor_id == ent->vendor_id) &&
409		    (pci_device_id == ent->device_id) &&
410
411		    ((pci_subvendor_id == ent->subvendor_id) ||
412		    (ent->subvendor_id == PCI_ANY_ID)) &&
413
414		    ((pci_subdevice_id == ent->subdevice_id) ||
415		    (ent->subdevice_id == PCI_ANY_ID))) {
416			sprintf(adapter_name, "%s %s",
417				em_strings[ent->index],
418				em_driver_version);
419			device_set_desc_copy(dev, adapter_name);
420			return (BUS_PROBE_DEFAULT);
421		}
422		ent++;
423	}
424
425	return (ENXIO);
426}
427
428/*********************************************************************
429 *  Device initialization routine
430 *
431 *  The attach entry point is called when the driver is being loaded.
432 *  This routine identifies the type of hardware, allocates all resources
433 *  and initializes the hardware.
434 *
435 *  return 0 on success, positive on failure
436 *********************************************************************/
437
438static int
439em_attach(device_t dev)
440{
441	struct adapter	*adapter;
442	int		error = 0;
443
444	INIT_DEBUGOUT("em_attach: begin");
445
446	adapter = device_get_softc(dev);
447	adapter->dev = adapter->osdep.dev = dev;
448	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449
450	/* SYSCTL stuff */
451	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454	    em_sysctl_nvm_info, "I", "NVM Information");
455
456	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457
458	/* Determine hardware and mac info */
459	em_identify_hardware(adapter);
460
461	/* Setup PCI resources */
462	if (em_allocate_pci_resources(adapter)) {
463		device_printf(dev, "Allocation of PCI resources failed\n");
464		error = ENXIO;
465		goto err_pci;
466	}
467
468	/*
469	** For ICH8 and family we need to
470	** map the flash memory, and this
471	** must happen after the MAC is
472	** identified
473	*/
474	if ((adapter->hw.mac.type == e1000_ich8lan) ||
475	    (adapter->hw.mac.type == e1000_pchlan) ||
476	    (adapter->hw.mac.type == e1000_ich9lan) ||
477	    (adapter->hw.mac.type == e1000_ich10lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctls for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/*
527	 * Validate number of transmit and receive descriptors. It
528	 * must not exceed hardware maximum, and must be multiple
529	 * of E1000_DBA_ALIGN.
530	 */
531	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
532	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
533		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
534		    EM_DEFAULT_TXD, em_txd);
535		adapter->num_tx_desc = EM_DEFAULT_TXD;
536	} else
537		adapter->num_tx_desc = em_txd;
538
539	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
540	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
541		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
542		    EM_DEFAULT_RXD, em_rxd);
543		adapter->num_rx_desc = EM_DEFAULT_RXD;
544	} else
545		adapter->num_rx_desc = em_rxd;
546
547	adapter->hw.mac.autoneg = DO_AUTO_NEG;
548	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
549	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
550
551	/* Copper options */
552	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553		adapter->hw.phy.mdix = AUTO_ALL_MODES;
554		adapter->hw.phy.disable_polarity_correction = FALSE;
555		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
556	}
557
558	/*
559	 * Set the frame limits assuming
560	 * standard ethernet sized frames.
561	 */
562	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
563	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
564
565	/*
566	 * This controls when hardware reports transmit completion
567	 * status.
568	 */
569	adapter->hw.mac.report_tx_early = 1;
570
571	/*
572	** Get queue/ring memory
573	*/
574	if (em_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/* Allocate multicast array memory. */
580	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
581	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
582	if (adapter->mta == NULL) {
583		device_printf(dev, "Can not allocate multicast setup array\n");
584		error = ENOMEM;
585		goto err_late;
586	}
587
588	/*
589	** Start from a known state, this is
590	** important in reading the nvm and
591	** mac from that.
592	*/
593	e1000_reset_hw(&adapter->hw);
594
595	/* Make sure we have a good EEPROM before we read from it */
596	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
597		/*
598		** Some PCI-E parts fail the first check due to
599		** the link being in sleep state, call it again,
600		** if it fails a second time its a real issue.
601		*/
602		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
603			device_printf(dev,
604			    "The EEPROM Checksum Is Not Valid\n");
605			error = EIO;
606			goto err_late;
607		}
608	}
609
610	/* Copy the permanent MAC address out of the EEPROM */
611	if (e1000_read_mac_addr(&adapter->hw) < 0) {
612		device_printf(dev, "EEPROM read error while reading MAC"
613		    " address\n");
614		error = EIO;
615		goto err_late;
616	}
617
618	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
619		device_printf(dev, "Invalid MAC address\n");
620		error = EIO;
621		goto err_late;
622	}
623
624	/*
625	**  Do interrupt configuration
626	*/
627	if (adapter->msix > 1) /* Do MSIX */
628		error = em_allocate_msix(adapter);
629	else  /* MSI or Legacy */
630		error = em_allocate_legacy(adapter);
631	if (error)
632		goto err_late;
633
634	/*
635	 * Get Wake-on-Lan and Management info for later use
636	 */
637	em_get_wakeup(dev);
638
639	/* Setup OS specific network interface */
640	if (em_setup_interface(dev, adapter) != 0)
641		goto err_late;
642
643	em_reset(adapter);
644
645	/* Initialize statistics */
646	em_update_stats_counters(adapter);
647
648	adapter->hw.mac.get_link_status = 1;
649	em_update_link_status(adapter);
650
651	/* Indicate SOL/IDER usage */
652	if (e1000_check_reset_block(&adapter->hw))
653		device_printf(dev,
654		    "PHY reset is blocked due to SOL/IDER session.\n");
655
656	/* Register for VLAN events */
657	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
658	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
659	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
660	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
661
662	em_add_hw_stats(adapter);
663
664	/* Non-AMT based hardware can now take control from firmware */
665	if (adapter->has_manage && !adapter->has_amt)
666		em_get_hw_control(adapter);
667
668	/* Tell the stack that the interface is not active */
669	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
670
671	adapter->led_dev = led_create(em_led_func, adapter,
672	    device_get_nameunit(dev));
673
674	INIT_DEBUGOUT("em_attach: end");
675
676	return (0);
677
678err_late:
679	em_free_transmit_structures(adapter);
680	em_free_receive_structures(adapter);
681	em_release_hw_control(adapter);
682	if (adapter->ifp != NULL)
683		if_free(adapter->ifp);
684err_pci:
685	em_free_pci_resources(adapter);
686	free(adapter->mta, M_DEVBUF);
687	EM_CORE_LOCK_DESTROY(adapter);
688
689	return (error);
690}
691
692/*********************************************************************
693 *  Device removal routine
694 *
695 *  The detach entry point is called when the driver is being removed.
696 *  This routine stops the adapter and deallocates all the resources
697 *  that were allocated for driver operation.
698 *
699 *  return 0 on success, positive on failure
700 *********************************************************************/
701
702static int
703em_detach(device_t dev)
704{
705	struct adapter	*adapter = device_get_softc(dev);
706	struct ifnet	*ifp = adapter->ifp;
707
708	INIT_DEBUGOUT("em_detach: begin");
709
710	/* Make sure VLANS are not using driver */
711	if (adapter->ifp->if_vlantrunk != NULL) {
712		device_printf(dev,"Vlan in use, detach first\n");
713		return (EBUSY);
714	}
715
716#ifdef DEVICE_POLLING
717	if (ifp->if_capenable & IFCAP_POLLING)
718		ether_poll_deregister(ifp);
719#endif
720
721	if (adapter->led_dev != NULL)
722		led_destroy(adapter->led_dev);
723
724	EM_CORE_LOCK(adapter);
725	adapter->in_detach = 1;
726	em_stop(adapter);
727	EM_CORE_UNLOCK(adapter);
728	EM_CORE_LOCK_DESTROY(adapter);
729
730	e1000_phy_hw_reset(&adapter->hw);
731
732	em_release_manageability(adapter);
733	em_release_hw_control(adapter);
734
735	/* Unregister VLAN events */
736	if (adapter->vlan_attach != NULL)
737		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
738	if (adapter->vlan_detach != NULL)
739		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
740
741	ether_ifdetach(adapter->ifp);
742	callout_drain(&adapter->timer);
743
744	em_free_pci_resources(adapter);
745	bus_generic_detach(dev);
746	if_free(ifp);
747
748	em_free_transmit_structures(adapter);
749	em_free_receive_structures(adapter);
750
751	em_release_hw_control(adapter);
752	free(adapter->mta, M_DEVBUF);
753
754	return (0);
755}
756
757/*********************************************************************
758 *
759 *  Shutdown entry point
760 *
761 **********************************************************************/
762
763static int
764em_shutdown(device_t dev)
765{
766	return em_suspend(dev);
767}
768
769/*
770 * Suspend/resume device methods.
771 */
772static int
773em_suspend(device_t dev)
774{
775	struct adapter *adapter = device_get_softc(dev);
776
777	EM_CORE_LOCK(adapter);
778
779        em_release_manageability(adapter);
780	em_release_hw_control(adapter);
781	em_enable_wakeup(dev);
782
783	EM_CORE_UNLOCK(adapter);
784
785	return bus_generic_suspend(dev);
786}
787
788static int
789em_resume(device_t dev)
790{
791	struct adapter *adapter = device_get_softc(dev);
792	struct ifnet *ifp = adapter->ifp;
793
794	EM_CORE_LOCK(adapter);
795	em_init_locked(adapter);
796	em_init_manageability(adapter);
797	EM_CORE_UNLOCK(adapter);
798	em_start(ifp);
799
800	return bus_generic_resume(dev);
801}
802
803
804/*********************************************************************
805 *  Transmit entry point
806 *
807 *  em_start is called by the stack to initiate a transmit.
808 *  The driver will remain in this routine as long as there are
809 *  packets to transmit and transmit resources are available.
810 *  In case resources are not available stack is notified and
811 *  the packet is requeued.
812 **********************************************************************/
813
814#ifdef EM_MULTIQUEUE
815static int
816em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
817{
818	struct adapter  *adapter = txr->adapter;
819        struct mbuf     *next;
820        int             err = 0, enq = 0;
821
822	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
823	    IFF_DRV_RUNNING || adapter->link_active == 0) {
824		if (m != NULL)
825			err = drbr_enqueue(ifp, txr->br, m);
826		return (err);
827	}
828
829        /* Call cleanup if number of TX descriptors low */
830	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
831		em_txeof(txr);
832
833	enq = 0;
834	if (m == NULL) {
835		next = drbr_dequeue(ifp, txr->br);
836	} else if (drbr_needs_enqueue(ifp, txr->br)) {
837		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
838			return (err);
839		next = drbr_dequeue(ifp, txr->br);
840	} else
841		next = m;
842
843	/* Process the queue */
844	while (next != NULL) {
845		if ((err = em_xmit(txr, &next)) != 0) {
846                        if (next != NULL)
847                                err = drbr_enqueue(ifp, txr->br, next);
848                        break;
849		}
850		enq++;
851		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
852		ETHER_BPF_MTAP(ifp, next);
853		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
854                        break;
855		if (txr->tx_avail < EM_MAX_SCATTER) {
856			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
857			break;
858		}
859		next = drbr_dequeue(ifp, txr->br);
860	}
861
862	if (enq > 0) {
863                /* Set the watchdog */
864                txr->watchdog_check = TRUE;
865		txr->watchdog_time = ticks;
866	}
867	return (err);
868}
869
870/*
871** Multiqueue capable stack interface, this is not
872** yet truely multiqueue, but that is coming...
873*/
874static int
875em_mq_start(struct ifnet *ifp, struct mbuf *m)
876{
877	struct adapter	*adapter = ifp->if_softc;
878	struct tx_ring	*txr;
879	int 		i, error = 0;
880
881	/* Which queue to use */
882	if ((m->m_flags & M_FLOWID) != 0)
883                i = m->m_pkthdr.flowid % adapter->num_queues;
884	else
885		i = curcpu % adapter->num_queues;
886
887	txr = &adapter->tx_rings[i];
888
889	if (EM_TX_TRYLOCK(txr)) {
890		error = em_mq_start_locked(ifp, txr, m);
891		EM_TX_UNLOCK(txr);
892	} else
893		error = drbr_enqueue(ifp, txr->br, m);
894
895	return (error);
896}
897
898/*
899** Flush all ring buffers
900*/
901static void
902em_qflush(struct ifnet *ifp)
903{
904	struct adapter  *adapter = ifp->if_softc;
905	struct tx_ring  *txr = adapter->tx_rings;
906	struct mbuf     *m;
907
908	for (int i = 0; i < adapter->num_queues; i++, txr++) {
909		EM_TX_LOCK(txr);
910		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
911			m_freem(m);
912		EM_TX_UNLOCK(txr);
913	}
914	if_qflush(ifp);
915}
916
917#endif /* EM_MULTIQUEUE */
918
919static void
920em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
921{
922	struct adapter	*adapter = ifp->if_softc;
923	struct mbuf	*m_head;
924
925	EM_TX_LOCK_ASSERT(txr);
926
927	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
928	    IFF_DRV_RUNNING)
929		return;
930
931	if (!adapter->link_active)
932		return;
933
934        /* Call cleanup if number of TX descriptors low */
935	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
936		em_txeof(txr);
937
938	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
939		if (txr->tx_avail < EM_MAX_SCATTER) {
940			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
941			break;
942		}
943                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
944		if (m_head == NULL)
945			break;
946		/*
947		 *  Encapsulation can modify our pointer, and or make it
948		 *  NULL on failure.  In that event, we can't requeue.
949		 */
950		if (em_xmit(txr, &m_head)) {
951			if (m_head == NULL)
952				break;
953			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
954			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
955			break;
956		}
957
958		/* Send a copy of the frame to the BPF listener */
959		ETHER_BPF_MTAP(ifp, m_head);
960
961		/* Set timeout in case hardware has problems transmitting. */
962		txr->watchdog_time = ticks;
963		txr->watchdog_check = TRUE;
964	}
965
966	return;
967}
968
969static void
970em_start(struct ifnet *ifp)
971{
972	struct adapter	*adapter = ifp->if_softc;
973	struct tx_ring	*txr = adapter->tx_rings;
974
975	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
976		EM_TX_LOCK(txr);
977		em_start_locked(ifp, txr);
978		EM_TX_UNLOCK(txr);
979	}
980	return;
981}
982
983/*********************************************************************
984 *  Ioctl entry point
985 *
986 *  em_ioctl is called when the user wants to configure the
987 *  interface.
988 *
989 *  return 0 on success, positive on failure
990 **********************************************************************/
991
992static int
993em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
994{
995	struct adapter	*adapter = ifp->if_softc;
996	struct ifreq *ifr = (struct ifreq *)data;
997#ifdef INET
998	struct ifaddr *ifa = (struct ifaddr *)data;
999#endif
1000	int error = 0;
1001
1002	if (adapter->in_detach)
1003		return (error);
1004
1005	switch (command) {
1006	case SIOCSIFADDR:
1007#ifdef INET
1008		if (ifa->ifa_addr->sa_family == AF_INET) {
1009			/*
1010			 * XXX
1011			 * Since resetting hardware takes a very long time
1012			 * and results in link renegotiation we only
1013			 * initialize the hardware only when it is absolutely
1014			 * required.
1015			 */
1016			ifp->if_flags |= IFF_UP;
1017			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1018				EM_CORE_LOCK(adapter);
1019				em_init_locked(adapter);
1020				EM_CORE_UNLOCK(adapter);
1021			}
1022			arp_ifinit(ifp, ifa);
1023		} else
1024#endif
1025			error = ether_ioctl(ifp, command, data);
1026		break;
1027	case SIOCSIFMTU:
1028	    {
1029		int max_frame_size;
1030
1031		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1032
1033		EM_CORE_LOCK(adapter);
1034		switch (adapter->hw.mac.type) {
1035		case e1000_82571:
1036		case e1000_82572:
1037		case e1000_ich9lan:
1038		case e1000_ich10lan:
1039		case e1000_82574:
1040		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1041			max_frame_size = 9234;
1042			break;
1043		case e1000_pchlan:
1044			max_frame_size = 4096;
1045			break;
1046			/* Adapters that do not support jumbo frames */
1047		case e1000_82583:
1048		case e1000_ich8lan:
1049			max_frame_size = ETHER_MAX_LEN;
1050			break;
1051		default:
1052			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1053		}
1054		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1055		    ETHER_CRC_LEN) {
1056			EM_CORE_UNLOCK(adapter);
1057			error = EINVAL;
1058			break;
1059		}
1060
1061		ifp->if_mtu = ifr->ifr_mtu;
1062		adapter->max_frame_size =
1063		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1064		em_init_locked(adapter);
1065		EM_CORE_UNLOCK(adapter);
1066		break;
1067	    }
1068	case SIOCSIFFLAGS:
1069		IOCTL_DEBUGOUT("ioctl rcv'd:\
1070		    SIOCSIFFLAGS (Set Interface Flags)");
1071		EM_CORE_LOCK(adapter);
1072		if (ifp->if_flags & IFF_UP) {
1073			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1074				if ((ifp->if_flags ^ adapter->if_flags) &
1075				    (IFF_PROMISC | IFF_ALLMULTI)) {
1076					em_disable_promisc(adapter);
1077					em_set_promisc(adapter);
1078				}
1079			} else
1080				em_init_locked(adapter);
1081		} else
1082			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1083				em_stop(adapter);
1084		adapter->if_flags = ifp->if_flags;
1085		EM_CORE_UNLOCK(adapter);
1086		break;
1087	case SIOCADDMULTI:
1088	case SIOCDELMULTI:
1089		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1090		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1091			EM_CORE_LOCK(adapter);
1092			em_disable_intr(adapter);
1093			em_set_multi(adapter);
1094#ifdef DEVICE_POLLING
1095			if (!(ifp->if_capenable & IFCAP_POLLING))
1096#endif
1097				em_enable_intr(adapter);
1098			EM_CORE_UNLOCK(adapter);
1099		}
1100		break;
1101	case SIOCSIFMEDIA:
1102		/* Check SOL/IDER usage */
1103		EM_CORE_LOCK(adapter);
1104		if (e1000_check_reset_block(&adapter->hw)) {
1105			EM_CORE_UNLOCK(adapter);
1106			device_printf(adapter->dev, "Media change is"
1107			    " blocked due to SOL/IDER session.\n");
1108			break;
1109		}
1110		EM_CORE_UNLOCK(adapter);
1111	case SIOCGIFMEDIA:
1112		IOCTL_DEBUGOUT("ioctl rcv'd: \
1113		    SIOCxIFMEDIA (Get/Set Interface Media)");
1114		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1115		break;
1116	case SIOCSIFCAP:
1117	    {
1118		int mask, reinit;
1119
1120		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1121		reinit = 0;
1122		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1123#ifdef DEVICE_POLLING
1124		if (mask & IFCAP_POLLING) {
1125			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1126				error = ether_poll_register(em_poll, ifp);
1127				if (error)
1128					return (error);
1129				EM_CORE_LOCK(adapter);
1130				em_disable_intr(adapter);
1131				ifp->if_capenable |= IFCAP_POLLING;
1132				EM_CORE_UNLOCK(adapter);
1133			} else {
1134				error = ether_poll_deregister(ifp);
1135				/* Enable interrupt even in error case */
1136				EM_CORE_LOCK(adapter);
1137				em_enable_intr(adapter);
1138				ifp->if_capenable &= ~IFCAP_POLLING;
1139				EM_CORE_UNLOCK(adapter);
1140			}
1141		}
1142#endif
1143		if (mask & IFCAP_HWCSUM) {
1144			ifp->if_capenable ^= IFCAP_HWCSUM;
1145			reinit = 1;
1146		}
1147		if (mask & IFCAP_TSO4) {
1148			ifp->if_capenable ^= IFCAP_TSO4;
1149			reinit = 1;
1150		}
1151		if (mask & IFCAP_VLAN_HWTAGGING) {
1152			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1153			reinit = 1;
1154		}
1155		if (mask & IFCAP_VLAN_HWFILTER) {
1156			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1157			reinit = 1;
1158		}
1159		if ((mask & IFCAP_WOL) &&
1160		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1161			if (mask & IFCAP_WOL_MCAST)
1162				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1163			if (mask & IFCAP_WOL_MAGIC)
1164				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1165		}
1166		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1167			em_init(adapter);
1168		VLAN_CAPABILITIES(ifp);
1169		break;
1170	    }
1171
1172	default:
1173		error = ether_ioctl(ifp, command, data);
1174		break;
1175	}
1176
1177	return (error);
1178}
1179
1180
1181/*********************************************************************
1182 *  Init entry point
1183 *
1184 *  This routine is used in two ways. It is used by the stack as
1185 *  init entry point in network interface structure. It is also used
1186 *  by the driver as a hw/sw initialization routine to get to a
1187 *  consistent state.
1188 *
1189 *  return 0 on success, positive on failure
1190 **********************************************************************/
1191
1192static void
1193em_init_locked(struct adapter *adapter)
1194{
1195	struct ifnet	*ifp = adapter->ifp;
1196	device_t	dev = adapter->dev;
1197	u32		pba;
1198
1199	INIT_DEBUGOUT("em_init: begin");
1200
1201	EM_CORE_LOCK_ASSERT(adapter);
1202
1203	em_disable_intr(adapter);
1204	callout_stop(&adapter->timer);
1205
1206	/*
1207	 * Packet Buffer Allocation (PBA)
1208	 * Writing PBA sets the receive portion of the buffer
1209	 * the remainder is used for the transmit buffer.
1210	 */
1211	switch (adapter->hw.mac.type) {
1212	/* Total Packet Buffer on these is 48K */
1213	case e1000_82571:
1214	case e1000_82572:
1215	case e1000_80003es2lan:
1216			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1217		break;
1218	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1219			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1220		break;
1221	case e1000_82574:
1222	case e1000_82583:
1223			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1224		break;
1225	case e1000_ich9lan:
1226	case e1000_ich10lan:
1227	case e1000_pchlan:
1228		pba = E1000_PBA_10K;
1229		break;
1230	case e1000_ich8lan:
1231		pba = E1000_PBA_8K;
1232		break;
1233	default:
1234		if (adapter->max_frame_size > 8192)
1235			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1236		else
1237			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1238	}
1239
1240	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1241	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1242
1243	/* Get the latest mac address, User can use a LAA */
1244        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1245              ETHER_ADDR_LEN);
1246
1247	/* Put the address into the Receive Address Array */
1248	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1249
1250	/*
1251	 * With the 82571 adapter, RAR[0] may be overwritten
1252	 * when the other port is reset, we make a duplicate
1253	 * in RAR[14] for that eventuality, this assures
1254	 * the interface continues to function.
1255	 */
1256	if (adapter->hw.mac.type == e1000_82571) {
1257		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1258		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1259		    E1000_RAR_ENTRIES - 1);
1260	}
1261
1262	/* Initialize the hardware */
1263	em_reset(adapter);
1264	em_update_link_status(adapter);
1265
1266	/* Setup VLAN support, basic and offload if available */
1267	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1268
1269	/* Use real VLAN Filter support? */
1270	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1271		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1272			/* Use real VLAN Filter support */
1273			em_setup_vlan_hw_support(adapter);
1274		else {
1275			u32 ctrl;
1276			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1277			ctrl |= E1000_CTRL_VME;
1278			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1279		}
1280	}
1281
1282	/* Set hardware offload abilities */
1283	ifp->if_hwassist = 0;
1284	if (ifp->if_capenable & IFCAP_TXCSUM)
1285		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1286	if (ifp->if_capenable & IFCAP_TSO4)
1287		ifp->if_hwassist |= CSUM_TSO;
1288
1289	/* Configure for OS presence */
1290	em_init_manageability(adapter);
1291
1292	/* Prepare transmit descriptors and buffers */
1293	em_setup_transmit_structures(adapter);
1294	em_initialize_transmit_unit(adapter);
1295
1296	/* Setup Multicast table */
1297	em_set_multi(adapter);
1298
1299	/* Prepare receive descriptors and buffers */
1300	if (em_setup_receive_structures(adapter)) {
1301		device_printf(dev, "Could not setup receive structures\n");
1302		em_stop(adapter);
1303		return;
1304	}
1305	em_initialize_receive_unit(adapter);
1306
1307	/* Don't lose promiscuous settings */
1308	em_set_promisc(adapter);
1309
1310	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1311	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1312
1313	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1314	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1315
1316	/* MSI/X configuration for 82574 */
1317	if (adapter->hw.mac.type == e1000_82574) {
1318		int tmp;
1319		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1320		tmp |= E1000_CTRL_EXT_PBA_CLR;
1321		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1322		/* Set the IVAR - interrupt vector routing. */
1323		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1324	}
1325
1326#ifdef DEVICE_POLLING
1327	/*
1328	 * Only enable interrupts if we are not polling, make sure
1329	 * they are off otherwise.
1330	 */
1331	if (ifp->if_capenable & IFCAP_POLLING)
1332		em_disable_intr(adapter);
1333	else
1334#endif /* DEVICE_POLLING */
1335		em_enable_intr(adapter);
1336
1337	/* AMT based hardware can now take control from firmware */
1338	if (adapter->has_manage && adapter->has_amt)
1339		em_get_hw_control(adapter);
1340
1341	/* Don't reset the phy next time init gets called */
1342	adapter->hw.phy.reset_disable = TRUE;
1343}
1344
1345static void
1346em_init(void *arg)
1347{
1348	struct adapter *adapter = arg;
1349
1350	EM_CORE_LOCK(adapter);
1351	em_init_locked(adapter);
1352	EM_CORE_UNLOCK(adapter);
1353}
1354
1355
1356#ifdef DEVICE_POLLING
1357/*********************************************************************
1358 *
1359 *  Legacy polling routine: note this only works with single queue
1360 *
1361 *********************************************************************/
1362static int
1363em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1364{
1365	struct adapter *adapter = ifp->if_softc;
1366	struct tx_ring	*txr = adapter->tx_rings;
1367	struct rx_ring	*rxr = adapter->rx_rings;
1368	u32		reg_icr;
1369	int		rx_done;
1370
1371	EM_CORE_LOCK(adapter);
1372	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1373		EM_CORE_UNLOCK(adapter);
1374		return (0);
1375	}
1376
1377	if (cmd == POLL_AND_CHECK_STATUS) {
1378		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1379		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1380			callout_stop(&adapter->timer);
1381			adapter->hw.mac.get_link_status = 1;
1382			em_update_link_status(adapter);
1383			callout_reset(&adapter->timer, hz,
1384			    em_local_timer, adapter);
1385		}
1386	}
1387	EM_CORE_UNLOCK(adapter);
1388
1389	em_rxeof(rxr, count, &rx_done);
1390
1391	EM_TX_LOCK(txr);
1392	em_txeof(txr);
1393#ifdef EM_MULTIQUEUE
1394	if (!drbr_empty(ifp, txr->br))
1395		em_mq_start_locked(ifp, txr, NULL);
1396#else
1397	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1398		em_start_locked(ifp, txr);
1399#endif
1400	EM_TX_UNLOCK(txr);
1401
1402	return (rx_done);
1403}
1404#endif /* DEVICE_POLLING */
1405
1406
1407/*********************************************************************
1408 *
1409 *  Fast Legacy/MSI Combined Interrupt Service routine
1410 *
1411 *********************************************************************/
1412static int
1413em_irq_fast(void *arg)
1414{
1415	struct adapter	*adapter = arg;
1416	struct ifnet	*ifp;
1417	u32		reg_icr;
1418
1419	ifp = adapter->ifp;
1420
1421	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1422
1423	/* Hot eject?  */
1424	if (reg_icr == 0xffffffff)
1425		return FILTER_STRAY;
1426
1427	/* Definitely not our interrupt.  */
1428	if (reg_icr == 0x0)
1429		return FILTER_STRAY;
1430
1431	/*
1432	 * Starting with the 82571 chip, bit 31 should be used to
1433	 * determine whether the interrupt belongs to us.
1434	 */
1435	if (adapter->hw.mac.type >= e1000_82571 &&
1436	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1437		return FILTER_STRAY;
1438
1439	em_disable_intr(adapter);
1440	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1441
1442	/* Link status change */
1443	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1444		adapter->hw.mac.get_link_status = 1;
1445		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1446	}
1447
1448	if (reg_icr & E1000_ICR_RXO)
1449		adapter->rx_overruns++;
1450	return FILTER_HANDLED;
1451}
1452
1453/* Combined RX/TX handler, used by Legacy and MSI */
1454static void
1455em_handle_que(void *context, int pending)
1456{
1457	struct adapter	*adapter = context;
1458	struct ifnet	*ifp = adapter->ifp;
1459	struct tx_ring	*txr = adapter->tx_rings;
1460	struct rx_ring	*rxr = adapter->rx_rings;
1461	bool		more;
1462
1463
1464	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1465		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1466
1467		EM_TX_LOCK(txr);
1468		if (em_txeof(txr))
1469			more = TRUE;
1470#ifdef EM_MULTIQUEUE
1471		if (!drbr_empty(ifp, txr->br))
1472			em_mq_start_locked(ifp, txr, NULL);
1473#else
1474		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1475			em_start_locked(ifp, txr);
1476#endif
1477		EM_TX_UNLOCK(txr);
1478		if (more) {
1479			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1480			return;
1481		}
1482	}
1483
1484	em_enable_intr(adapter);
1485	return;
1486}
1487
1488
1489/*********************************************************************
1490 *
1491 *  MSIX Interrupt Service Routines
1492 *
1493 **********************************************************************/
1494static void
1495em_msix_tx(void *arg)
1496{
1497	struct tx_ring *txr = arg;
1498	struct adapter *adapter = txr->adapter;
1499	bool		more;
1500
1501	++txr->tx_irq;
1502	EM_TX_LOCK(txr);
1503	more = em_txeof(txr);
1504	EM_TX_UNLOCK(txr);
1505	if (more)
1506		taskqueue_enqueue(txr->tq, &txr->tx_task);
1507	else
1508		/* Reenable this interrupt */
1509		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1510	return;
1511}
1512
1513/*********************************************************************
1514 *
1515 *  MSIX RX Interrupt Service routine
1516 *
1517 **********************************************************************/
1518
1519static void
1520em_msix_rx(void *arg)
1521{
1522	struct rx_ring	*rxr = arg;
1523	struct adapter	*adapter = rxr->adapter;
1524	bool		more;
1525
1526	++rxr->rx_irq;
1527	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528	if (more)
1529		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1530	else
1531		/* Reenable this interrupt */
1532		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1533	return;
1534}
1535
1536/*********************************************************************
1537 *
1538 *  MSIX Link Fast Interrupt Service routine
1539 *
1540 **********************************************************************/
1541static void
1542em_msix_link(void *arg)
1543{
1544	struct adapter	*adapter = arg;
1545	u32		reg_icr;
1546
1547	++adapter->link_irq;
1548	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1549
1550	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1551		adapter->hw.mac.get_link_status = 1;
1552		em_handle_link(adapter, 0);
1553	} else
1554		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1555		    EM_MSIX_LINK | E1000_IMS_LSC);
1556	return;
1557}
1558
1559static void
1560em_handle_rx(void *context, int pending)
1561{
1562	struct rx_ring	*rxr = context;
1563	struct adapter	*adapter = rxr->adapter;
1564        bool            more;
1565
1566	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1567	if (more)
1568		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1569	else
1570		/* Reenable this interrupt */
1571		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1572}
1573
1574static void
1575em_handle_tx(void *context, int pending)
1576{
1577	struct tx_ring	*txr = context;
1578	struct adapter	*adapter = txr->adapter;
1579	struct ifnet	*ifp = adapter->ifp;
1580
1581	if (!EM_TX_TRYLOCK(txr))
1582		return;
1583
1584	em_txeof(txr);
1585
1586#ifdef EM_MULTIQUEUE
1587	if (!drbr_empty(ifp, txr->br))
1588		em_mq_start_locked(ifp, txr, NULL);
1589#else
1590	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1591		em_start_locked(ifp, txr);
1592#endif
1593	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1594	EM_TX_UNLOCK(txr);
1595}
1596
1597static void
1598em_handle_link(void *context, int pending)
1599{
1600	struct adapter	*adapter = context;
1601	struct ifnet *ifp = adapter->ifp;
1602
1603	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1604		return;
1605
1606	EM_CORE_LOCK(adapter);
1607	callout_stop(&adapter->timer);
1608	em_update_link_status(adapter);
1609	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1610	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1611	    EM_MSIX_LINK | E1000_IMS_LSC);
1612	EM_CORE_UNLOCK(adapter);
1613}
1614
1615
1616/*********************************************************************
1617 *
1618 *  Media Ioctl callback
1619 *
1620 *  This routine is called whenever the user queries the status of
1621 *  the interface using ifconfig.
1622 *
1623 **********************************************************************/
1624static void
1625em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1626{
1627	struct adapter *adapter = ifp->if_softc;
1628	u_char fiber_type = IFM_1000_SX;
1629
1630	INIT_DEBUGOUT("em_media_status: begin");
1631
1632	EM_CORE_LOCK(adapter);
1633	em_update_link_status(adapter);
1634
1635	ifmr->ifm_status = IFM_AVALID;
1636	ifmr->ifm_active = IFM_ETHER;
1637
1638	if (!adapter->link_active) {
1639		EM_CORE_UNLOCK(adapter);
1640		return;
1641	}
1642
1643	ifmr->ifm_status |= IFM_ACTIVE;
1644
1645	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1646	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1647		ifmr->ifm_active |= fiber_type | IFM_FDX;
1648	} else {
1649		switch (adapter->link_speed) {
1650		case 10:
1651			ifmr->ifm_active |= IFM_10_T;
1652			break;
1653		case 100:
1654			ifmr->ifm_active |= IFM_100_TX;
1655			break;
1656		case 1000:
1657			ifmr->ifm_active |= IFM_1000_T;
1658			break;
1659		}
1660		if (adapter->link_duplex == FULL_DUPLEX)
1661			ifmr->ifm_active |= IFM_FDX;
1662		else
1663			ifmr->ifm_active |= IFM_HDX;
1664	}
1665	EM_CORE_UNLOCK(adapter);
1666}
1667
1668/*********************************************************************
1669 *
1670 *  Media Ioctl callback
1671 *
1672 *  This routine is called when the user changes speed/duplex using
1673 *  media/mediopt option with ifconfig.
1674 *
1675 **********************************************************************/
1676static int
1677em_media_change(struct ifnet *ifp)
1678{
1679	struct adapter *adapter = ifp->if_softc;
1680	struct ifmedia  *ifm = &adapter->media;
1681
1682	INIT_DEBUGOUT("em_media_change: begin");
1683
1684	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1685		return (EINVAL);
1686
1687	EM_CORE_LOCK(adapter);
1688	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1689	case IFM_AUTO:
1690		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1691		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1692		break;
1693	case IFM_1000_LX:
1694	case IFM_1000_SX:
1695	case IFM_1000_T:
1696		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1697		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1698		break;
1699	case IFM_100_TX:
1700		adapter->hw.mac.autoneg = FALSE;
1701		adapter->hw.phy.autoneg_advertised = 0;
1702		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1703			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1704		else
1705			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1706		break;
1707	case IFM_10_T:
1708		adapter->hw.mac.autoneg = FALSE;
1709		adapter->hw.phy.autoneg_advertised = 0;
1710		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1711			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1712		else
1713			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1714		break;
1715	default:
1716		device_printf(adapter->dev, "Unsupported media type\n");
1717	}
1718
1719	/* As the speed/duplex settings my have changed we need to
1720	 * reset the PHY.
1721	 */
1722	adapter->hw.phy.reset_disable = FALSE;
1723
1724	em_init_locked(adapter);
1725	EM_CORE_UNLOCK(adapter);
1726
1727	return (0);
1728}
1729
1730/*********************************************************************
1731 *
1732 *  This routine maps the mbufs to tx descriptors.
1733 *
1734 *  return 0 on success, positive on failure
1735 **********************************************************************/
1736
1737static int
1738em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1739{
1740	struct adapter		*adapter = txr->adapter;
1741	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1742	bus_dmamap_t		map;
1743	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1744	struct e1000_tx_desc	*ctxd = NULL;
1745	struct mbuf		*m_head;
1746	u32			txd_upper, txd_lower, txd_used, txd_saved;
1747	int			nsegs, i, j, first, last = 0;
1748	int			error, do_tso, tso_desc = 0;
1749
1750	m_head = *m_headp;
1751	txd_upper = txd_lower = txd_used = txd_saved = 0;
1752	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1753
1754	/*
1755	** When doing checksum offload, it is critical to
1756	** make sure the first mbuf has more than header,
1757	** because that routine expects data to be present.
1758	*/
1759	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1760	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1761		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1762		*m_headp = m_head;
1763		if (m_head == NULL)
1764			return (ENOBUFS);
1765	}
1766
1767	/*
1768	 * TSO workaround:
1769	 *  If an mbuf is only header we need
1770	 *     to pull 4 bytes of data into it.
1771	 */
1772	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1773		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1774		*m_headp = m_head;
1775		if (m_head == NULL)
1776			return (ENOBUFS);
1777	}
1778
1779	/*
1780	 * Map the packet for DMA
1781	 *
1782	 * Capture the first descriptor index,
1783	 * this descriptor will have the index
1784	 * of the EOP which is the only one that
1785	 * now gets a DONE bit writeback.
1786	 */
1787	first = txr->next_avail_desc;
1788	tx_buffer = &txr->tx_buffers[first];
1789	tx_buffer_mapped = tx_buffer;
1790	map = tx_buffer->map;
1791
1792	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1793	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1794
1795	/*
1796	 * There are two types of errors we can (try) to handle:
1797	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1798	 *   out of segments.  Defragment the mbuf chain and try again.
1799	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1800	 *   at this point in time.  Defer sending and try again later.
1801	 * All other errors, in particular EINVAL, are fatal and prevent the
1802	 * mbuf chain from ever going through.  Drop it and report error.
1803	 */
1804	if (error == EFBIG) {
1805		struct mbuf *m;
1806
1807		m = m_defrag(*m_headp, M_DONTWAIT);
1808		if (m == NULL) {
1809			adapter->mbuf_alloc_failed++;
1810			m_freem(*m_headp);
1811			*m_headp = NULL;
1812			return (ENOBUFS);
1813		}
1814		*m_headp = m;
1815
1816		/* Try it again */
1817		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1818		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1819
1820		if (error) {
1821			adapter->no_tx_dma_setup++;
1822			m_freem(*m_headp);
1823			*m_headp = NULL;
1824			return (error);
1825		}
1826	} else if (error != 0) {
1827		adapter->no_tx_dma_setup++;
1828		return (error);
1829	}
1830
1831	/*
1832	 * TSO Hardware workaround, if this packet is not
1833	 * TSO, and is only a single descriptor long, and
1834	 * it follows a TSO burst, then we need to add a
1835	 * sentinel descriptor to prevent premature writeback.
1836	 */
1837	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1838		if (nsegs == 1)
1839			tso_desc = TRUE;
1840		txr->tx_tso = FALSE;
1841	}
1842
1843        if (nsegs > (txr->tx_avail - 2)) {
1844                txr->no_desc_avail++;
1845		bus_dmamap_unload(txr->txtag, map);
1846		return (ENOBUFS);
1847        }
1848	m_head = *m_headp;
1849
1850	/* Do hardware assists */
1851#if __FreeBSD_version >= 700000
1852	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1853		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1854		if (error != TRUE)
1855			return (ENXIO); /* something foobar */
1856		/* we need to make a final sentinel transmit desc */
1857		tso_desc = TRUE;
1858	} else
1859#endif
1860	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1861		em_transmit_checksum_setup(txr,  m_head,
1862		    &txd_upper, &txd_lower);
1863
1864	i = txr->next_avail_desc;
1865
1866	/* Set up our transmit descriptors */
1867	for (j = 0; j < nsegs; j++) {
1868		bus_size_t seg_len;
1869		bus_addr_t seg_addr;
1870
1871		tx_buffer = &txr->tx_buffers[i];
1872		ctxd = &txr->tx_base[i];
1873		seg_addr = segs[j].ds_addr;
1874		seg_len  = segs[j].ds_len;
1875		/*
1876		** TSO Workaround:
1877		** If this is the last descriptor, we want to
1878		** split it so we have a small final sentinel
1879		*/
1880		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1881			seg_len -= 4;
1882			ctxd->buffer_addr = htole64(seg_addr);
1883			ctxd->lower.data = htole32(
1884			adapter->txd_cmd | txd_lower | seg_len);
1885			ctxd->upper.data =
1886			    htole32(txd_upper);
1887			if (++i == adapter->num_tx_desc)
1888				i = 0;
1889			/* Now make the sentinel */
1890			++txd_used; /* using an extra txd */
1891			ctxd = &txr->tx_base[i];
1892			tx_buffer = &txr->tx_buffers[i];
1893			ctxd->buffer_addr =
1894			    htole64(seg_addr + seg_len);
1895			ctxd->lower.data = htole32(
1896			adapter->txd_cmd | txd_lower | 4);
1897			ctxd->upper.data =
1898			    htole32(txd_upper);
1899			last = i;
1900			if (++i == adapter->num_tx_desc)
1901				i = 0;
1902		} else {
1903			ctxd->buffer_addr = htole64(seg_addr);
1904			ctxd->lower.data = htole32(
1905			adapter->txd_cmd | txd_lower | seg_len);
1906			ctxd->upper.data =
1907			    htole32(txd_upper);
1908			last = i;
1909			if (++i == adapter->num_tx_desc)
1910				i = 0;
1911		}
1912		tx_buffer->m_head = NULL;
1913		tx_buffer->next_eop = -1;
1914	}
1915
1916	txr->next_avail_desc = i;
1917	txr->tx_avail -= nsegs;
1918	if (tso_desc) /* TSO used an extra for sentinel */
1919		txr->tx_avail -= txd_used;
1920
1921	if (m_head->m_flags & M_VLANTAG) {
1922		/* Set the vlan id. */
1923		ctxd->upper.fields.special =
1924		    htole16(m_head->m_pkthdr.ether_vtag);
1925                /* Tell hardware to add tag */
1926                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1927        }
1928
1929        tx_buffer->m_head = m_head;
1930	tx_buffer_mapped->map = tx_buffer->map;
1931	tx_buffer->map = map;
1932        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1933
1934        /*
1935         * Last Descriptor of Packet
1936	 * needs End Of Packet (EOP)
1937	 * and Report Status (RS)
1938         */
1939        ctxd->lower.data |=
1940	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1941	/*
1942	 * Keep track in the first buffer which
1943	 * descriptor will be written back
1944	 */
1945	tx_buffer = &txr->tx_buffers[first];
1946	tx_buffer->next_eop = last;
1947
1948	/*
1949	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1950	 * that this frame is available to transmit.
1951	 */
1952	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1953	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1954	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1955
1956	return (0);
1957}
1958
1959static void
1960em_set_promisc(struct adapter *adapter)
1961{
1962	struct ifnet	*ifp = adapter->ifp;
1963	u32		reg_rctl;
1964
1965	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1966
1967	if (ifp->if_flags & IFF_PROMISC) {
1968		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1969		/* Turn this on if you want to see bad packets */
1970		if (em_debug_sbp)
1971			reg_rctl |= E1000_RCTL_SBP;
1972		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1973	} else if (ifp->if_flags & IFF_ALLMULTI) {
1974		reg_rctl |= E1000_RCTL_MPE;
1975		reg_rctl &= ~E1000_RCTL_UPE;
1976		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1977	}
1978}
1979
1980static void
1981em_disable_promisc(struct adapter *adapter)
1982{
1983	u32	reg_rctl;
1984
1985	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1986
1987	reg_rctl &=  (~E1000_RCTL_UPE);
1988	reg_rctl &=  (~E1000_RCTL_MPE);
1989	reg_rctl &=  (~E1000_RCTL_SBP);
1990	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1991}
1992
1993
1994/*********************************************************************
1995 *  Multicast Update
1996 *
1997 *  This routine is called whenever multicast address list is updated.
1998 *
1999 **********************************************************************/
2000
2001static void
2002em_set_multi(struct adapter *adapter)
2003{
2004	struct ifnet	*ifp = adapter->ifp;
2005	struct ifmultiaddr *ifma;
2006	u32 reg_rctl = 0;
2007	u8  *mta; /* Multicast array memory */
2008	int mcnt = 0;
2009
2010	IOCTL_DEBUGOUT("em_set_multi: begin");
2011
2012	mta = adapter->mta;
2013	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2014
2015	if (adapter->hw.mac.type == e1000_82542 &&
2016	    adapter->hw.revision_id == E1000_REVISION_2) {
2017		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2018		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2019			e1000_pci_clear_mwi(&adapter->hw);
2020		reg_rctl |= E1000_RCTL_RST;
2021		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2022		msec_delay(5);
2023	}
2024
2025#if __FreeBSD_version < 800000
2026	IF_ADDR_LOCK(ifp);
2027#else
2028	if_maddr_rlock(ifp);
2029#endif
2030	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2031		if (ifma->ifma_addr->sa_family != AF_LINK)
2032			continue;
2033
2034		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2035			break;
2036
2037		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2038		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2039		mcnt++;
2040	}
2041#if __FreeBSD_version < 800000
2042	IF_ADDR_UNLOCK(ifp);
2043#else
2044	if_maddr_runlock(ifp);
2045#endif
2046	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2047		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2048		reg_rctl |= E1000_RCTL_MPE;
2049		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2050	} else
2051		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2052
2053	if (adapter->hw.mac.type == e1000_82542 &&
2054	    adapter->hw.revision_id == E1000_REVISION_2) {
2055		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2056		reg_rctl &= ~E1000_RCTL_RST;
2057		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2058		msec_delay(5);
2059		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2060			e1000_pci_set_mwi(&adapter->hw);
2061	}
2062}
2063
2064
2065/*********************************************************************
2066 *  Timer routine
2067 *
2068 *  This routine checks for link status and updates statistics.
2069 *
2070 **********************************************************************/
2071
2072static void
2073em_local_timer(void *arg)
2074{
2075	struct adapter	*adapter = arg;
2076	struct ifnet	*ifp = adapter->ifp;
2077	struct tx_ring	*txr = adapter->tx_rings;
2078
2079	EM_CORE_LOCK_ASSERT(adapter);
2080
2081	em_update_link_status(adapter);
2082	em_update_stats_counters(adapter);
2083
2084	/* Reset LAA into RAR[0] on 82571 */
2085	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2086		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2087
2088	/*
2089	** Check for time since any descriptor was cleaned
2090	*/
2091	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2092		EM_TX_LOCK(txr);
2093		if (txr->watchdog_check == FALSE) {
2094			EM_TX_UNLOCK(txr);
2095			continue;
2096		}
2097		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2098			goto hung;
2099		EM_TX_UNLOCK(txr);
2100	}
2101
2102	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2103	return;
2104hung:
2105	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2106	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2107	adapter->watchdog_events++;
2108	EM_TX_UNLOCK(txr);
2109	em_init_locked(adapter);
2110}
2111
2112
2113static void
2114em_update_link_status(struct adapter *adapter)
2115{
2116	struct e1000_hw *hw = &adapter->hw;
2117	struct ifnet *ifp = adapter->ifp;
2118	device_t dev = adapter->dev;
2119	u32 link_check = 0;
2120
2121	/* Get the cached link value or read phy for real */
2122	switch (hw->phy.media_type) {
2123	case e1000_media_type_copper:
2124		if (hw->mac.get_link_status) {
2125			/* Do the work to read phy */
2126			e1000_check_for_link(hw);
2127			link_check = !hw->mac.get_link_status;
2128			if (link_check) /* ESB2 fix */
2129				e1000_cfg_on_link_up(hw);
2130		} else
2131			link_check = TRUE;
2132		break;
2133	case e1000_media_type_fiber:
2134		e1000_check_for_link(hw);
2135		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2136                                 E1000_STATUS_LU);
2137		break;
2138	case e1000_media_type_internal_serdes:
2139		e1000_check_for_link(hw);
2140		link_check = adapter->hw.mac.serdes_has_link;
2141		break;
2142	default:
2143	case e1000_media_type_unknown:
2144		break;
2145	}
2146
2147	/* Now check for a transition */
2148	if (link_check && (adapter->link_active == 0)) {
2149		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2150		    &adapter->link_duplex);
2151		/* Check if we must disable SPEED_MODE bit on PCI-E */
2152		if ((adapter->link_speed != SPEED_1000) &&
2153		    ((hw->mac.type == e1000_82571) ||
2154		    (hw->mac.type == e1000_82572))) {
2155			int tarc0;
2156			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2157			tarc0 &= ~SPEED_MODE_BIT;
2158			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2159		}
2160		if (bootverbose)
2161			device_printf(dev, "Link is up %d Mbps %s\n",
2162			    adapter->link_speed,
2163			    ((adapter->link_duplex == FULL_DUPLEX) ?
2164			    "Full Duplex" : "Half Duplex"));
2165		adapter->link_active = 1;
2166		adapter->smartspeed = 0;
2167		ifp->if_baudrate = adapter->link_speed * 1000000;
2168		if_link_state_change(ifp, LINK_STATE_UP);
2169	} else if (!link_check && (adapter->link_active == 1)) {
2170		ifp->if_baudrate = adapter->link_speed = 0;
2171		adapter->link_duplex = 0;
2172		if (bootverbose)
2173			device_printf(dev, "Link is Down\n");
2174		adapter->link_active = 0;
2175		/* Link down, disable watchdog */
2176		// JFV change later
2177		//adapter->watchdog_check = FALSE;
2178		if_link_state_change(ifp, LINK_STATE_DOWN);
2179	}
2180}
2181
2182/*********************************************************************
2183 *
2184 *  This routine disables all traffic on the adapter by issuing a
2185 *  global reset on the MAC and deallocates TX/RX buffers.
2186 *
2187 *  This routine should always be called with BOTH the CORE
2188 *  and TX locks.
2189 **********************************************************************/
2190
2191static void
2192em_stop(void *arg)
2193{
2194	struct adapter	*adapter = arg;
2195	struct ifnet	*ifp = adapter->ifp;
2196	struct tx_ring	*txr = adapter->tx_rings;
2197
2198	EM_CORE_LOCK_ASSERT(adapter);
2199
2200	INIT_DEBUGOUT("em_stop: begin");
2201
2202	em_disable_intr(adapter);
2203	callout_stop(&adapter->timer);
2204
2205	/* Tell the stack that the interface is no longer active */
2206	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2207
2208        /* Unarm watchdog timer. */
2209	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2210		EM_TX_LOCK(txr);
2211		txr->watchdog_check = FALSE;
2212		EM_TX_UNLOCK(txr);
2213	}
2214
2215	e1000_reset_hw(&adapter->hw);
2216	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2217
2218	e1000_led_off(&adapter->hw);
2219	e1000_cleanup_led(&adapter->hw);
2220}
2221
2222
2223/*********************************************************************
2224 *
2225 *  Determine hardware revision.
2226 *
2227 **********************************************************************/
2228static void
2229em_identify_hardware(struct adapter *adapter)
2230{
2231	device_t dev = adapter->dev;
2232
2233	/* Make sure our PCI config space has the necessary stuff set */
2234	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2235	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2236	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2237		device_printf(dev, "Memory Access and/or Bus Master bits "
2238		    "were not set!\n");
2239		adapter->hw.bus.pci_cmd_word |=
2240		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2241		pci_write_config(dev, PCIR_COMMAND,
2242		    adapter->hw.bus.pci_cmd_word, 2);
2243	}
2244
2245	/* Save off the information about this board */
2246	adapter->hw.vendor_id = pci_get_vendor(dev);
2247	adapter->hw.device_id = pci_get_device(dev);
2248	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2249	adapter->hw.subsystem_vendor_id =
2250	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2251	adapter->hw.subsystem_device_id =
2252	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2253
2254	/* Do Shared Code Init and Setup */
2255	if (e1000_set_mac_type(&adapter->hw)) {
2256		device_printf(dev, "Setup init failure\n");
2257		return;
2258	}
2259}
2260
2261static int
2262em_allocate_pci_resources(struct adapter *adapter)
2263{
2264	device_t	dev = adapter->dev;
2265	int		rid;
2266
2267	rid = PCIR_BAR(0);
2268	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2269	    &rid, RF_ACTIVE);
2270	if (adapter->memory == NULL) {
2271		device_printf(dev, "Unable to allocate bus resource: memory\n");
2272		return (ENXIO);
2273	}
2274	adapter->osdep.mem_bus_space_tag =
2275	    rman_get_bustag(adapter->memory);
2276	adapter->osdep.mem_bus_space_handle =
2277	    rman_get_bushandle(adapter->memory);
2278	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2279
2280	/* Default to a single queue */
2281	adapter->num_queues = 1;
2282
2283	/*
2284	 * Setup MSI/X or MSI if PCI Express
2285	 */
2286	adapter->msix = em_setup_msix(adapter);
2287
2288	adapter->hw.back = &adapter->osdep;
2289
2290	return (0);
2291}
2292
2293/*********************************************************************
2294 *
2295 *  Setup the Legacy or MSI Interrupt handler
2296 *
2297 **********************************************************************/
2298int
2299em_allocate_legacy(struct adapter *adapter)
2300{
2301	device_t dev = adapter->dev;
2302	int error, rid = 0;
2303
2304	/* Manually turn off all interrupts */
2305	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2306
2307	if (adapter->msix == 1) /* using MSI */
2308		rid = 1;
2309	/* We allocate a single interrupt resource */
2310	adapter->res = bus_alloc_resource_any(dev,
2311	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2312	if (adapter->res == NULL) {
2313		device_printf(dev, "Unable to allocate bus resource: "
2314		    "interrupt\n");
2315		return (ENXIO);
2316	}
2317
2318	/*
2319	 * Allocate a fast interrupt and the associated
2320	 * deferred processing contexts.
2321	 */
2322	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2323	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2324	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2325	    taskqueue_thread_enqueue, &adapter->tq);
2326	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2327	    device_get_nameunit(adapter->dev));
2328	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2329	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2330		device_printf(dev, "Failed to register fast interrupt "
2331			    "handler: %d\n", error);
2332		taskqueue_free(adapter->tq);
2333		adapter->tq = NULL;
2334		return (error);
2335	}
2336
2337	return (0);
2338}
2339
2340/*********************************************************************
2341 *
2342 *  Setup the MSIX Interrupt handlers
2343 *   This is not really Multiqueue, rather
2344 *   its just multiple interrupt vectors.
2345 *
2346 **********************************************************************/
2347int
2348em_allocate_msix(struct adapter *adapter)
2349{
2350	device_t	dev = adapter->dev;
2351	struct		tx_ring *txr = adapter->tx_rings;
2352	struct		rx_ring *rxr = adapter->rx_rings;
2353	int		error, rid, vector = 0;
2354
2355
2356	/* Make sure all interrupts are disabled */
2357	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2358
2359	/* First set up ring resources */
2360	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2361
2362		/* RX ring */
2363		rid = vector + 1;
2364
2365		rxr->res = bus_alloc_resource_any(dev,
2366		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2367		if (rxr->res == NULL) {
2368			device_printf(dev,
2369			    "Unable to allocate bus resource: "
2370			    "RX MSIX Interrupt %d\n", i);
2371			return (ENXIO);
2372		}
2373		if ((error = bus_setup_intr(dev, rxr->res,
2374		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2375		    rxr, &rxr->tag)) != 0) {
2376			device_printf(dev, "Failed to register RX handler");
2377			return (error);
2378		}
2379		rxr->msix = vector++; /* NOTE increment vector for TX */
2380		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2381		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2382		    taskqueue_thread_enqueue, &rxr->tq);
2383		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2384		    device_get_nameunit(adapter->dev));
2385		/*
2386		** Set the bit to enable interrupt
2387		** in E1000_IMS -- bits 20 and 21
2388		** are for RX0 and RX1, note this has
2389		** NOTHING to do with the MSIX vector
2390		*/
2391		rxr->ims = 1 << (20 + i);
2392		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2393
2394		/* TX ring */
2395		rid = vector + 1;
2396		txr->res = bus_alloc_resource_any(dev,
2397		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2398		if (txr->res == NULL) {
2399			device_printf(dev,
2400			    "Unable to allocate bus resource: "
2401			    "TX MSIX Interrupt %d\n", i);
2402			return (ENXIO);
2403		}
2404		if ((error = bus_setup_intr(dev, txr->res,
2405		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2406		    txr, &txr->tag)) != 0) {
2407			device_printf(dev, "Failed to register TX handler");
2408			return (error);
2409		}
2410		txr->msix = vector++; /* Increment vector for next pass */
2411		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2412		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2413		    taskqueue_thread_enqueue, &txr->tq);
2414		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2415		    device_get_nameunit(adapter->dev));
2416		/*
2417		** Set the bit to enable interrupt
2418		** in E1000_IMS -- bits 22 and 23
2419		** are for TX0 and TX1, note this has
2420		** NOTHING to do with the MSIX vector
2421		*/
2422		txr->ims = 1 << (22 + i);
2423		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2424	}
2425
2426	/* Link interrupt */
2427	++rid;
2428	adapter->res = bus_alloc_resource_any(dev,
2429	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2430	if (!adapter->res) {
2431		device_printf(dev,"Unable to allocate "
2432		    "bus resource: Link interrupt [%d]\n", rid);
2433		return (ENXIO);
2434        }
2435	/* Set the link handler function */
2436	error = bus_setup_intr(dev, adapter->res,
2437	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2438	    em_msix_link, adapter, &adapter->tag);
2439	if (error) {
2440		adapter->res = NULL;
2441		device_printf(dev, "Failed to register LINK handler");
2442		return (error);
2443	}
2444	adapter->linkvec = vector;
2445	adapter->ivars |=  (8 | vector) << 16;
2446	adapter->ivars |= 0x80000000;
2447
2448	return (0);
2449}
2450
2451
2452static void
2453em_free_pci_resources(struct adapter *adapter)
2454{
2455	device_t	dev = adapter->dev;
2456	struct tx_ring	*txr;
2457	struct rx_ring	*rxr;
2458	int		rid;
2459
2460
2461	/*
2462	** Release all the queue interrupt resources:
2463	*/
2464	for (int i = 0; i < adapter->num_queues; i++) {
2465		txr = &adapter->tx_rings[i];
2466		rxr = &adapter->rx_rings[i];
2467		rid = txr->msix +1;
2468		if (txr->tag != NULL) {
2469			bus_teardown_intr(dev, txr->res, txr->tag);
2470			txr->tag = NULL;
2471		}
2472		if (txr->res != NULL)
2473			bus_release_resource(dev, SYS_RES_IRQ,
2474			    rid, txr->res);
2475		rid = rxr->msix +1;
2476		if (rxr->tag != NULL) {
2477			bus_teardown_intr(dev, rxr->res, rxr->tag);
2478			rxr->tag = NULL;
2479		}
2480		if (rxr->res != NULL)
2481			bus_release_resource(dev, SYS_RES_IRQ,
2482			    rid, rxr->res);
2483	}
2484
2485        if (adapter->linkvec) /* we are doing MSIX */
2486                rid = adapter->linkvec + 1;
2487        else
2488                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2489
2490	if (adapter->tag != NULL) {
2491		bus_teardown_intr(dev, adapter->res, adapter->tag);
2492		adapter->tag = NULL;
2493	}
2494
2495	if (adapter->res != NULL)
2496		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2497
2498
2499	if (adapter->msix)
2500		pci_release_msi(dev);
2501
2502	if (adapter->msix_mem != NULL)
2503		bus_release_resource(dev, SYS_RES_MEMORY,
2504		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2505
2506	if (adapter->memory != NULL)
2507		bus_release_resource(dev, SYS_RES_MEMORY,
2508		    PCIR_BAR(0), adapter->memory);
2509
2510	if (adapter->flash != NULL)
2511		bus_release_resource(dev, SYS_RES_MEMORY,
2512		    EM_FLASH, adapter->flash);
2513}
2514
2515/*
2516 * Setup MSI or MSI/X
2517 */
2518static int
2519em_setup_msix(struct adapter *adapter)
2520{
2521	device_t dev = adapter->dev;
2522	int val = 0;
2523
2524
2525	/* Setup MSI/X for Hartwell */
2526	if ((adapter->hw.mac.type == e1000_82574) &&
2527	    (em_enable_msix == TRUE)) {
2528		/* Map the MSIX BAR */
2529		int rid = PCIR_BAR(EM_MSIX_BAR);
2530		adapter->msix_mem = bus_alloc_resource_any(dev,
2531		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2532       		if (!adapter->msix_mem) {
2533			/* May not be enabled */
2534               		device_printf(adapter->dev,
2535			    "Unable to map MSIX table \n");
2536			goto msi;
2537       		}
2538		val = pci_msix_count(dev);
2539		if (val != 5) {
2540			bus_release_resource(dev, SYS_RES_MEMORY,
2541			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2542			adapter->msix_mem = NULL;
2543               		device_printf(adapter->dev,
2544			    "MSIX vectors wrong, using MSI \n");
2545			goto msi;
2546		}
2547		if (em_msix_queues == 2) {
2548			val = 5;
2549			adapter->num_queues = 2;
2550		} else {
2551			val = 3;
2552			adapter->num_queues = 1;
2553		}
2554		if (pci_alloc_msix(dev, &val) == 0) {
2555			device_printf(adapter->dev,
2556			    "Using MSIX interrupts "
2557			    "with %d vectors\n", val);
2558		}
2559
2560		return (val);
2561	}
2562msi:
2563       	val = pci_msi_count(dev);
2564       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2565               	adapter->msix = 1;
2566               	device_printf(adapter->dev,"Using MSI interrupt\n");
2567		return (val);
2568	}
2569	/* Should only happen due to manual invention */
2570	device_printf(adapter->dev,"Setup MSIX failure\n");
2571	return (0);
2572}
2573
2574
2575/*********************************************************************
2576 *
2577 *  Initialize the hardware to a configuration
2578 *  as specified by the adapter structure.
2579 *
2580 **********************************************************************/
2581static void
2582em_reset(struct adapter *adapter)
2583{
2584	device_t	dev = adapter->dev;
2585	struct e1000_hw	*hw = &adapter->hw;
2586	u16		rx_buffer_size;
2587
2588	INIT_DEBUGOUT("em_reset: begin");
2589
2590	/* Set up smart power down as default off on newer adapters. */
2591	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2592	    hw->mac.type == e1000_82572)) {
2593		u16 phy_tmp = 0;
2594
2595		/* Speed up time to link by disabling smart power down. */
2596		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2597		phy_tmp &= ~IGP02E1000_PM_SPD;
2598		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2599	}
2600
2601	/*
2602	 * These parameters control the automatic generation (Tx) and
2603	 * response (Rx) to Ethernet PAUSE frames.
2604	 * - High water mark should allow for at least two frames to be
2605	 *   received after sending an XOFF.
2606	 * - Low water mark works best when it is very near the high water mark.
2607	 *   This allows the receiver to restart by sending XON when it has
2608	 *   drained a bit. Here we use an arbitary value of 1500 which will
2609	 *   restart after one full frame is pulled from the buffer. There
2610	 *   could be several smaller frames in the buffer and if so they will
2611	 *   not trigger the XON until their total number reduces the buffer
2612	 *   by 1500.
2613	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2614	 */
2615	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2616
2617	hw->fc.high_water = rx_buffer_size -
2618	    roundup2(adapter->max_frame_size, 1024);
2619	hw->fc.low_water = hw->fc.high_water - 1500;
2620
2621	if (hw->mac.type == e1000_80003es2lan)
2622		hw->fc.pause_time = 0xFFFF;
2623	else
2624		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2625
2626	hw->fc.send_xon = TRUE;
2627
2628        /* Set Flow control, use the tunable location if sane */
2629        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2630		hw->fc.requested_mode = em_fc_setting;
2631	else
2632		hw->fc.requested_mode = e1000_fc_none;
2633
2634	/* Override - workaround for PCHLAN issue */
2635	if (hw->mac.type == e1000_pchlan)
2636                hw->fc.requested_mode = e1000_fc_rx_pause;
2637
2638	/* Issue a global reset */
2639	e1000_reset_hw(hw);
2640	E1000_WRITE_REG(hw, E1000_WUC, 0);
2641
2642	if (e1000_init_hw(hw) < 0) {
2643		device_printf(dev, "Hardware Initialization Failed\n");
2644		return;
2645	}
2646
2647	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2648	e1000_get_phy_info(hw);
2649	e1000_check_for_link(hw);
2650	return;
2651}
2652
2653/*********************************************************************
2654 *
2655 *  Setup networking device structure and register an interface.
2656 *
2657 **********************************************************************/
2658static int
2659em_setup_interface(device_t dev, struct adapter *adapter)
2660{
2661	struct ifnet   *ifp;
2662
2663	INIT_DEBUGOUT("em_setup_interface: begin");
2664
2665	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2666	if (ifp == NULL) {
2667		device_printf(dev, "can not allocate ifnet structure\n");
2668		return (-1);
2669	}
2670	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2671	ifp->if_mtu = ETHERMTU;
2672	ifp->if_init =  em_init;
2673	ifp->if_softc = adapter;
2674	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2675	ifp->if_ioctl = em_ioctl;
2676	ifp->if_start = em_start;
2677	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2678	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2679	IFQ_SET_READY(&ifp->if_snd);
2680
2681	ether_ifattach(ifp, adapter->hw.mac.addr);
2682
2683	ifp->if_capabilities = ifp->if_capenable = 0;
2684
2685#ifdef EM_MULTIQUEUE
2686	/* Multiqueue tx functions */
2687	ifp->if_transmit = em_mq_start;
2688	ifp->if_qflush = em_qflush;
2689#endif
2690
2691	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2692	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2693
2694	/* Enable TSO by default, can disable with ifconfig */
2695	ifp->if_capabilities |= IFCAP_TSO4;
2696	ifp->if_capenable |= IFCAP_TSO4;
2697
2698	/*
2699	 * Tell the upper layer(s) we
2700	 * support full VLAN capability
2701	 */
2702	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2705
2706	/*
2707	** Dont turn this on by default, if vlans are
2708	** created on another pseudo device (eg. lagg)
2709	** then vlan events are not passed thru, breaking
2710	** operation, but with HW FILTER off it works. If
2711	** using vlans directly on the em driver you can
2712	** enable this and get full hardware tag filtering.
2713	*/
2714	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2715
2716#ifdef DEVICE_POLLING
2717	ifp->if_capabilities |= IFCAP_POLLING;
2718#endif
2719
2720	/* Enable only WOL MAGIC by default */
2721	if (adapter->wol) {
2722		ifp->if_capabilities |= IFCAP_WOL;
2723		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2724	}
2725
2726	/*
2727	 * Specify the media types supported by this adapter and register
2728	 * callbacks to update media and link information
2729	 */
2730	ifmedia_init(&adapter->media, IFM_IMASK,
2731	    em_media_change, em_media_status);
2732	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2733	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2734		u_char fiber_type = IFM_1000_SX;	/* default type */
2735
2736		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2737			    0, NULL);
2738		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2739	} else {
2740		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2741		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2742			    0, NULL);
2743		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2744			    0, NULL);
2745		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2746			    0, NULL);
2747		if (adapter->hw.phy.type != e1000_phy_ife) {
2748			ifmedia_add(&adapter->media,
2749				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2750			ifmedia_add(&adapter->media,
2751				IFM_ETHER | IFM_1000_T, 0, NULL);
2752		}
2753	}
2754	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2755	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2756	return (0);
2757}
2758
2759
2760/*
2761 * Manage DMA'able memory.
2762 */
2763static void
2764em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2765{
2766	if (error)
2767		return;
2768	*(bus_addr_t *) arg = segs[0].ds_addr;
2769}
2770
2771static int
2772em_dma_malloc(struct adapter *adapter, bus_size_t size,
2773        struct em_dma_alloc *dma, int mapflags)
2774{
2775	int error;
2776
2777	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2778				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2779				BUS_SPACE_MAXADDR,	/* lowaddr */
2780				BUS_SPACE_MAXADDR,	/* highaddr */
2781				NULL, NULL,		/* filter, filterarg */
2782				size,			/* maxsize */
2783				1,			/* nsegments */
2784				size,			/* maxsegsize */
2785				0,			/* flags */
2786				NULL,			/* lockfunc */
2787				NULL,			/* lockarg */
2788				&dma->dma_tag);
2789	if (error) {
2790		device_printf(adapter->dev,
2791		    "%s: bus_dma_tag_create failed: %d\n",
2792		    __func__, error);
2793		goto fail_0;
2794	}
2795
2796	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2797	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2798	if (error) {
2799		device_printf(adapter->dev,
2800		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2801		    __func__, (uintmax_t)size, error);
2802		goto fail_2;
2803	}
2804
2805	dma->dma_paddr = 0;
2806	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2807	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2808	if (error || dma->dma_paddr == 0) {
2809		device_printf(adapter->dev,
2810		    "%s: bus_dmamap_load failed: %d\n",
2811		    __func__, error);
2812		goto fail_3;
2813	}
2814
2815	return (0);
2816
2817fail_3:
2818	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2819fail_2:
2820	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2821	bus_dma_tag_destroy(dma->dma_tag);
2822fail_0:
2823	dma->dma_map = NULL;
2824	dma->dma_tag = NULL;
2825
2826	return (error);
2827}
2828
2829static void
2830em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2831{
2832	if (dma->dma_tag == NULL)
2833		return;
2834	if (dma->dma_map != NULL) {
2835		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2836		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2837		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2838		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2839		dma->dma_map = NULL;
2840	}
2841	bus_dma_tag_destroy(dma->dma_tag);
2842	dma->dma_tag = NULL;
2843}
2844
2845
2846/*********************************************************************
2847 *
2848 *  Allocate memory for the transmit and receive rings, and then
2849 *  the descriptors associated with each, called only once at attach.
2850 *
2851 **********************************************************************/
2852static int
2853em_allocate_queues(struct adapter *adapter)
2854{
2855	device_t		dev = adapter->dev;
2856	struct tx_ring		*txr = NULL;
2857	struct rx_ring		*rxr = NULL;
2858	int rsize, tsize, error = E1000_SUCCESS;
2859	int txconf = 0, rxconf = 0;
2860
2861
2862	/* Allocate the TX ring struct memory */
2863	if (!(adapter->tx_rings =
2864	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2865	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2866		device_printf(dev, "Unable to allocate TX ring memory\n");
2867		error = ENOMEM;
2868		goto fail;
2869	}
2870
2871	/* Now allocate the RX */
2872	if (!(adapter->rx_rings =
2873	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2874	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2875		device_printf(dev, "Unable to allocate RX ring memory\n");
2876		error = ENOMEM;
2877		goto rx_fail;
2878	}
2879
2880	tsize = roundup2(adapter->num_tx_desc *
2881	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2882	/*
2883	 * Now set up the TX queues, txconf is needed to handle the
2884	 * possibility that things fail midcourse and we need to
2885	 * undo memory gracefully
2886	 */
2887	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2888		/* Set up some basics */
2889		txr = &adapter->tx_rings[i];
2890		txr->adapter = adapter;
2891		txr->me = i;
2892
2893		/* Initialize the TX lock */
2894		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2895		    device_get_nameunit(dev), txr->me);
2896		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2897
2898		if (em_dma_malloc(adapter, tsize,
2899			&txr->txdma, BUS_DMA_NOWAIT)) {
2900			device_printf(dev,
2901			    "Unable to allocate TX Descriptor memory\n");
2902			error = ENOMEM;
2903			goto err_tx_desc;
2904		}
2905		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2906		bzero((void *)txr->tx_base, tsize);
2907
2908        	if (em_allocate_transmit_buffers(txr)) {
2909			device_printf(dev,
2910			    "Critical Failure setting up transmit buffers\n");
2911			error = ENOMEM;
2912			goto err_tx_desc;
2913        	}
2914#if __FreeBSD_version >= 800000
2915		/* Allocate a buf ring */
2916		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2917		    M_WAITOK, &txr->tx_mtx);
2918#endif
2919	}
2920
2921	/*
2922	 * Next the RX queues...
2923	 */
2924	rsize = roundup2(adapter->num_rx_desc *
2925	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2926	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2927		rxr = &adapter->rx_rings[i];
2928		rxr->adapter = adapter;
2929		rxr->me = i;
2930
2931		/* Initialize the RX lock */
2932		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2933		    device_get_nameunit(dev), txr->me);
2934		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2935
2936		if (em_dma_malloc(adapter, rsize,
2937			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2938			device_printf(dev,
2939			    "Unable to allocate RxDescriptor memory\n");
2940			error = ENOMEM;
2941			goto err_rx_desc;
2942		}
2943		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2944		bzero((void *)rxr->rx_base, rsize);
2945
2946        	/* Allocate receive buffers for the ring*/
2947		if (em_allocate_receive_buffers(rxr)) {
2948			device_printf(dev,
2949			    "Critical Failure setting up receive buffers\n");
2950			error = ENOMEM;
2951			goto err_rx_desc;
2952		}
2953	}
2954
2955	return (0);
2956
2957err_rx_desc:
2958	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2959		em_dma_free(adapter, &rxr->rxdma);
2960err_tx_desc:
2961	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2962		em_dma_free(adapter, &txr->txdma);
2963	free(adapter->rx_rings, M_DEVBUF);
2964rx_fail:
2965#if __FreeBSD_version >= 800000
2966	buf_ring_free(txr->br, M_DEVBUF);
2967#endif
2968	free(adapter->tx_rings, M_DEVBUF);
2969fail:
2970	return (error);
2971}
2972
2973
2974/*********************************************************************
2975 *
2976 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2977 *  the information needed to transmit a packet on the wire. This is
2978 *  called only once at attach, setup is done every reset.
2979 *
2980 **********************************************************************/
2981static int
2982em_allocate_transmit_buffers(struct tx_ring *txr)
2983{
2984	struct adapter *adapter = txr->adapter;
2985	device_t dev = adapter->dev;
2986	struct em_buffer *txbuf;
2987	int error, i;
2988
2989	/*
2990	 * Setup DMA descriptor areas.
2991	 */
2992	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2993			       1, 0,			/* alignment, bounds */
2994			       BUS_SPACE_MAXADDR,	/* lowaddr */
2995			       BUS_SPACE_MAXADDR,	/* highaddr */
2996			       NULL, NULL,		/* filter, filterarg */
2997			       EM_TSO_SIZE,		/* maxsize */
2998			       EM_MAX_SCATTER,		/* nsegments */
2999			       PAGE_SIZE,		/* maxsegsize */
3000			       0,			/* flags */
3001			       NULL,			/* lockfunc */
3002			       NULL,			/* lockfuncarg */
3003			       &txr->txtag))) {
3004		device_printf(dev,"Unable to allocate TX DMA tag\n");
3005		goto fail;
3006	}
3007
3008	if (!(txr->tx_buffers =
3009	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3010	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3011		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3012		error = ENOMEM;
3013		goto fail;
3014	}
3015
3016        /* Create the descriptor buffer dma maps */
3017	txbuf = txr->tx_buffers;
3018	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3019		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3020		if (error != 0) {
3021			device_printf(dev, "Unable to create TX DMA map\n");
3022			goto fail;
3023		}
3024	}
3025
3026	return 0;
3027fail:
3028	/* We free all, it handles case where we are in the middle */
3029	em_free_transmit_structures(adapter);
3030	return (error);
3031}
3032
3033/*********************************************************************
3034 *
3035 *  Initialize a transmit ring.
3036 *
3037 **********************************************************************/
3038static void
3039em_setup_transmit_ring(struct tx_ring *txr)
3040{
3041	struct adapter *adapter = txr->adapter;
3042	struct em_buffer *txbuf;
3043	int i;
3044
3045	/* Clear the old descriptor contents */
3046	EM_TX_LOCK(txr);
3047	bzero((void *)txr->tx_base,
3048	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3049	/* Reset indices */
3050	txr->next_avail_desc = 0;
3051	txr->next_to_clean = 0;
3052
3053	/* Free any existing tx buffers. */
3054        txbuf = txr->tx_buffers;
3055	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3056		if (txbuf->m_head != NULL) {
3057			bus_dmamap_sync(txr->txtag, txbuf->map,
3058			    BUS_DMASYNC_POSTWRITE);
3059			bus_dmamap_unload(txr->txtag, txbuf->map);
3060			m_freem(txbuf->m_head);
3061			txbuf->m_head = NULL;
3062		}
3063		/* clear the watch index */
3064		txbuf->next_eop = -1;
3065        }
3066
3067	/* Set number of descriptors available */
3068	txr->tx_avail = adapter->num_tx_desc;
3069
3070	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3071	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3072	EM_TX_UNLOCK(txr);
3073}
3074
3075/*********************************************************************
3076 *
3077 *  Initialize all transmit rings.
3078 *
3079 **********************************************************************/
3080static void
3081em_setup_transmit_structures(struct adapter *adapter)
3082{
3083	struct tx_ring *txr = adapter->tx_rings;
3084
3085	for (int i = 0; i < adapter->num_queues; i++, txr++)
3086		em_setup_transmit_ring(txr);
3087
3088	return;
3089}
3090
3091/*********************************************************************
3092 *
3093 *  Enable transmit unit.
3094 *
3095 **********************************************************************/
3096static void
3097em_initialize_transmit_unit(struct adapter *adapter)
3098{
3099	struct tx_ring	*txr = adapter->tx_rings;
3100	struct e1000_hw	*hw = &adapter->hw;
3101	u32	tctl, tarc, tipg = 0;
3102
3103	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3104
3105	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3106		u64 bus_addr = txr->txdma.dma_paddr;
3107		/* Base and Len of TX Ring */
3108		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3109	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3110		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3111	    	    (u32)(bus_addr >> 32));
3112		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3113	    	    (u32)bus_addr);
3114		/* Init the HEAD/TAIL indices */
3115		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3116		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3117
3118		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3119		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3120		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3121
3122		txr->watchdog_check = FALSE;
3123	}
3124
3125	/* Set the default values for the Tx Inter Packet Gap timer */
3126	switch (adapter->hw.mac.type) {
3127	case e1000_82542:
3128		tipg = DEFAULT_82542_TIPG_IPGT;
3129		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3130		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3131		break;
3132	case e1000_80003es2lan:
3133		tipg = DEFAULT_82543_TIPG_IPGR1;
3134		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3135		    E1000_TIPG_IPGR2_SHIFT;
3136		break;
3137	default:
3138		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3139		    (adapter->hw.phy.media_type ==
3140		    e1000_media_type_internal_serdes))
3141			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3142		else
3143			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3144		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3145		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3146	}
3147
3148	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3149	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3150
3151	if(adapter->hw.mac.type >= e1000_82540)
3152		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3153		    adapter->tx_abs_int_delay.value);
3154
3155	if ((adapter->hw.mac.type == e1000_82571) ||
3156	    (adapter->hw.mac.type == e1000_82572)) {
3157		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3158		tarc |= SPEED_MODE_BIT;
3159		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3160	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3161		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3162		tarc |= 1;
3163		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3164		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3165		tarc |= 1;
3166		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3167	}
3168
3169	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3170	if (adapter->tx_int_delay.value > 0)
3171		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3172
3173	/* Program the Transmit Control Register */
3174	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3175	tctl &= ~E1000_TCTL_CT;
3176	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3177		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3178
3179	if (adapter->hw.mac.type >= e1000_82571)
3180		tctl |= E1000_TCTL_MULR;
3181
3182	/* This write will effectively turn on the transmit unit. */
3183	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3184
3185}
3186
3187
3188/*********************************************************************
3189 *
3190 *  Free all transmit rings.
3191 *
3192 **********************************************************************/
3193static void
3194em_free_transmit_structures(struct adapter *adapter)
3195{
3196	struct tx_ring *txr = adapter->tx_rings;
3197
3198	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3199		EM_TX_LOCK(txr);
3200		em_free_transmit_buffers(txr);
3201		em_dma_free(adapter, &txr->txdma);
3202		EM_TX_UNLOCK(txr);
3203		EM_TX_LOCK_DESTROY(txr);
3204	}
3205
3206	free(adapter->tx_rings, M_DEVBUF);
3207}
3208
3209/*********************************************************************
3210 *
3211 *  Free transmit ring related data structures.
3212 *
3213 **********************************************************************/
3214static void
3215em_free_transmit_buffers(struct tx_ring *txr)
3216{
3217	struct adapter		*adapter = txr->adapter;
3218	struct em_buffer	*txbuf;
3219
3220	INIT_DEBUGOUT("free_transmit_ring: begin");
3221
3222	if (txr->tx_buffers == NULL)
3223		return;
3224
3225	for (int i = 0; i < adapter->num_tx_desc; i++) {
3226		txbuf = &txr->tx_buffers[i];
3227		if (txbuf->m_head != NULL) {
3228			bus_dmamap_sync(txr->txtag, txbuf->map,
3229			    BUS_DMASYNC_POSTWRITE);
3230			bus_dmamap_unload(txr->txtag,
3231			    txbuf->map);
3232			m_freem(txbuf->m_head);
3233			txbuf->m_head = NULL;
3234			if (txbuf->map != NULL) {
3235				bus_dmamap_destroy(txr->txtag,
3236				    txbuf->map);
3237				txbuf->map = NULL;
3238			}
3239		} else if (txbuf->map != NULL) {
3240			bus_dmamap_unload(txr->txtag,
3241			    txbuf->map);
3242			bus_dmamap_destroy(txr->txtag,
3243			    txbuf->map);
3244			txbuf->map = NULL;
3245		}
3246	}
3247#if __FreeBSD_version >= 800000
3248	if (txr->br != NULL)
3249		buf_ring_free(txr->br, M_DEVBUF);
3250#endif
3251	if (txr->tx_buffers != NULL) {
3252		free(txr->tx_buffers, M_DEVBUF);
3253		txr->tx_buffers = NULL;
3254	}
3255	if (txr->txtag != NULL) {
3256		bus_dma_tag_destroy(txr->txtag);
3257		txr->txtag = NULL;
3258	}
3259	return;
3260}
3261
3262
3263/*********************************************************************
3264 *
3265 *  The offload context needs to be set when we transfer the first
3266 *  packet of a particular protocol (TCP/UDP). This routine has been
3267 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3268 *
3269 *  Added back the old method of keeping the current context type
3270 *  and not setting if unnecessary, as this is reported to be a
3271 *  big performance win.  -jfv
3272 **********************************************************************/
3273static void
3274em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3275    u32 *txd_upper, u32 *txd_lower)
3276{
3277	struct adapter			*adapter = txr->adapter;
3278	struct e1000_context_desc	*TXD = NULL;
3279	struct em_buffer *tx_buffer;
3280	struct ether_vlan_header *eh;
3281	struct ip *ip = NULL;
3282	struct ip6_hdr *ip6;
3283	int cur, ehdrlen;
3284	u32 cmd, hdr_len, ip_hlen;
3285	u16 etype;
3286	u8 ipproto;
3287
3288
3289	cmd = hdr_len = ipproto = 0;
3290	*txd_upper = *txd_lower = 0;
3291	cur = txr->next_avail_desc;
3292
3293	/*
3294	 * Determine where frame payload starts.
3295	 * Jump over vlan headers if already present,
3296	 * helpful for QinQ too.
3297	 */
3298	eh = mtod(mp, struct ether_vlan_header *);
3299	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3300		etype = ntohs(eh->evl_proto);
3301		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3302	} else {
3303		etype = ntohs(eh->evl_encap_proto);
3304		ehdrlen = ETHER_HDR_LEN;
3305	}
3306
3307	/*
3308	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3309	 * TODO: Support SCTP too when it hits the tree.
3310	 */
3311	switch (etype) {
3312	case ETHERTYPE_IP:
3313		ip = (struct ip *)(mp->m_data + ehdrlen);
3314		ip_hlen = ip->ip_hl << 2;
3315
3316		/* Setup of IP header checksum. */
3317		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3318			/*
3319			 * Start offset for header checksum calculation.
3320			 * End offset for header checksum calculation.
3321			 * Offset of place to put the checksum.
3322			 */
3323			TXD = (struct e1000_context_desc *)
3324			    &txr->tx_base[cur];
3325			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3326			TXD->lower_setup.ip_fields.ipcse =
3327			    htole16(ehdrlen + ip_hlen);
3328			TXD->lower_setup.ip_fields.ipcso =
3329			    ehdrlen + offsetof(struct ip, ip_sum);
3330			cmd |= E1000_TXD_CMD_IP;
3331			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3332		}
3333
3334		hdr_len = ehdrlen + ip_hlen;
3335		ipproto = ip->ip_p;
3336		break;
3337
3338	case ETHERTYPE_IPV6:
3339		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3340		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3341
3342		/* IPv6 doesn't have a header checksum. */
3343
3344		hdr_len = ehdrlen + ip_hlen;
3345		ipproto = ip6->ip6_nxt;
3346		break;
3347
3348	default:
3349		return;
3350	}
3351
3352	switch (ipproto) {
3353	case IPPROTO_TCP:
3354		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3355			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3356			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3357			/* no need for context if already set */
3358			if (txr->last_hw_offload == CSUM_TCP)
3359				return;
3360			txr->last_hw_offload = CSUM_TCP;
3361			/*
3362			 * Start offset for payload checksum calculation.
3363			 * End offset for payload checksum calculation.
3364			 * Offset of place to put the checksum.
3365			 */
3366			TXD = (struct e1000_context_desc *)
3367			    &txr->tx_base[cur];
3368			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3369			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3370			TXD->upper_setup.tcp_fields.tucso =
3371			    hdr_len + offsetof(struct tcphdr, th_sum);
3372			cmd |= E1000_TXD_CMD_TCP;
3373		}
3374		break;
3375	case IPPROTO_UDP:
3376	{
3377		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3378			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3379			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3380			/* no need for context if already set */
3381			if (txr->last_hw_offload == CSUM_UDP)
3382				return;
3383			txr->last_hw_offload = CSUM_UDP;
3384			/*
3385			 * Start offset for header checksum calculation.
3386			 * End offset for header checksum calculation.
3387			 * Offset of place to put the checksum.
3388			 */
3389			TXD = (struct e1000_context_desc *)
3390			    &txr->tx_base[cur];
3391			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3392			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3393			TXD->upper_setup.tcp_fields.tucso =
3394			    hdr_len + offsetof(struct udphdr, uh_sum);
3395		}
3396		/* Fall Thru */
3397	}
3398	default:
3399		break;
3400	}
3401
3402	if (TXD == NULL)
3403		return;
3404	TXD->tcp_seg_setup.data = htole32(0);
3405	TXD->cmd_and_length =
3406	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3407	tx_buffer = &txr->tx_buffers[cur];
3408	tx_buffer->m_head = NULL;
3409	tx_buffer->next_eop = -1;
3410
3411	if (++cur == adapter->num_tx_desc)
3412		cur = 0;
3413
3414	txr->tx_avail--;
3415	txr->next_avail_desc = cur;
3416}
3417
3418
3419/**********************************************************************
3420 *
3421 *  Setup work for hardware segmentation offload (TSO)
3422 *
3423 **********************************************************************/
3424static bool
3425em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3426   u32 *txd_lower)
3427{
3428	struct adapter			*adapter = txr->adapter;
3429	struct e1000_context_desc	*TXD;
3430	struct em_buffer		*tx_buffer;
3431	struct ether_vlan_header	*eh;
3432	struct ip			*ip;
3433	struct ip6_hdr			*ip6;
3434	struct tcphdr			*th;
3435	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3436	u16 etype;
3437
3438	/*
3439	 * This function could/should be extended to support IP/IPv6
3440	 * fragmentation as well.  But as they say, one step at a time.
3441	 */
3442
3443	/*
3444	 * Determine where frame payload starts.
3445	 * Jump over vlan headers if already present,
3446	 * helpful for QinQ too.
3447	 */
3448	eh = mtod(mp, struct ether_vlan_header *);
3449	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3450		etype = ntohs(eh->evl_proto);
3451		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3452	} else {
3453		etype = ntohs(eh->evl_encap_proto);
3454		ehdrlen = ETHER_HDR_LEN;
3455	}
3456
3457	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3458	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3459		return FALSE;	/* -1 */
3460
3461	/*
3462	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3463	 * TODO: Support SCTP too when it hits the tree.
3464	 */
3465	switch (etype) {
3466	case ETHERTYPE_IP:
3467		isip6 = 0;
3468		ip = (struct ip *)(mp->m_data + ehdrlen);
3469		if (ip->ip_p != IPPROTO_TCP)
3470			return FALSE;	/* 0 */
3471		ip->ip_len = 0;
3472		ip->ip_sum = 0;
3473		ip_hlen = ip->ip_hl << 2;
3474		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3475			return FALSE;	/* -1 */
3476		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3477#if 1
3478		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3479		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3480#else
3481		th->th_sum = mp->m_pkthdr.csum_data;
3482#endif
3483		break;
3484	case ETHERTYPE_IPV6:
3485		isip6 = 1;
3486		return FALSE;			/* Not supported yet. */
3487		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3488		if (ip6->ip6_nxt != IPPROTO_TCP)
3489			return FALSE;	/* 0 */
3490		ip6->ip6_plen = 0;
3491		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3492		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3493			return FALSE;	/* -1 */
3494		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3495#if 0
3496		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3497		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3498#else
3499		th->th_sum = mp->m_pkthdr.csum_data;
3500#endif
3501		break;
3502	default:
3503		return FALSE;
3504	}
3505	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3506
3507	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3508		      E1000_TXD_DTYP_D |	/* Data descr type */
3509		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3510
3511	/* IP and/or TCP header checksum calculation and insertion. */
3512	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3513		      E1000_TXD_POPTS_TXSM) << 8;
3514
3515	cur = txr->next_avail_desc;
3516	tx_buffer = &txr->tx_buffers[cur];
3517	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3518
3519	/* IPv6 doesn't have a header checksum. */
3520	if (!isip6) {
3521		/*
3522		 * Start offset for header checksum calculation.
3523		 * End offset for header checksum calculation.
3524		 * Offset of place put the checksum.
3525		 */
3526		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3527		TXD->lower_setup.ip_fields.ipcse =
3528		    htole16(ehdrlen + ip_hlen - 1);
3529		TXD->lower_setup.ip_fields.ipcso =
3530		    ehdrlen + offsetof(struct ip, ip_sum);
3531	}
3532	/*
3533	 * Start offset for payload checksum calculation.
3534	 * End offset for payload checksum calculation.
3535	 * Offset of place to put the checksum.
3536	 */
3537	TXD->upper_setup.tcp_fields.tucss =
3538	    ehdrlen + ip_hlen;
3539	TXD->upper_setup.tcp_fields.tucse = 0;
3540	TXD->upper_setup.tcp_fields.tucso =
3541	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3542	/*
3543	 * Payload size per packet w/o any headers.
3544	 * Length of all headers up to payload.
3545	 */
3546	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3547	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3548
3549	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3550				E1000_TXD_CMD_DEXT |	/* Extended descr */
3551				E1000_TXD_CMD_TSE |	/* TSE context */
3552				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3553				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3554				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3555
3556	tx_buffer->m_head = NULL;
3557	tx_buffer->next_eop = -1;
3558
3559	if (++cur == adapter->num_tx_desc)
3560		cur = 0;
3561
3562	txr->tx_avail--;
3563	txr->next_avail_desc = cur;
3564	txr->tx_tso = TRUE;
3565
3566	return TRUE;
3567}
3568
3569
3570/**********************************************************************
3571 *
3572 *  Examine each tx_buffer in the used queue. If the hardware is done
3573 *  processing the packet then free associated resources. The
3574 *  tx_buffer is put back on the free queue.
3575 *
3576 **********************************************************************/
3577static bool
3578em_txeof(struct tx_ring *txr)
3579{
3580	struct adapter	*adapter = txr->adapter;
3581        int first, last, done, num_avail;
3582        struct em_buffer *tx_buffer;
3583        struct e1000_tx_desc   *tx_desc, *eop_desc;
3584	struct ifnet   *ifp = adapter->ifp;
3585
3586	EM_TX_LOCK_ASSERT(txr);
3587
3588        if (txr->tx_avail == adapter->num_tx_desc)
3589                return (FALSE);
3590
3591        num_avail = txr->tx_avail;
3592        first = txr->next_to_clean;
3593        tx_desc = &txr->tx_base[first];
3594        tx_buffer = &txr->tx_buffers[first];
3595	last = tx_buffer->next_eop;
3596        eop_desc = &txr->tx_base[last];
3597
3598	/*
3599	 * What this does is get the index of the
3600	 * first descriptor AFTER the EOP of the
3601	 * first packet, that way we can do the
3602	 * simple comparison on the inner while loop.
3603	 */
3604	if (++last == adapter->num_tx_desc)
3605 		last = 0;
3606	done = last;
3607
3608        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609            BUS_DMASYNC_POSTREAD);
3610
3611        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3612		/* We clean the range of the packet */
3613		while (first != done) {
3614                	tx_desc->upper.data = 0;
3615                	tx_desc->lower.data = 0;
3616                	tx_desc->buffer_addr = 0;
3617                	++num_avail;
3618
3619			if (tx_buffer->m_head) {
3620				ifp->if_opackets++;
3621				bus_dmamap_sync(txr->txtag,
3622				    tx_buffer->map,
3623				    BUS_DMASYNC_POSTWRITE);
3624				bus_dmamap_unload(txr->txtag,
3625				    tx_buffer->map);
3626
3627                        	m_freem(tx_buffer->m_head);
3628                        	tx_buffer->m_head = NULL;
3629                	}
3630			tx_buffer->next_eop = -1;
3631			txr->watchdog_time = ticks;
3632
3633	                if (++first == adapter->num_tx_desc)
3634				first = 0;
3635
3636	                tx_buffer = &txr->tx_buffers[first];
3637			tx_desc = &txr->tx_base[first];
3638		}
3639		/* See if we can continue to the next packet */
3640		last = tx_buffer->next_eop;
3641		if (last != -1) {
3642        		eop_desc = &txr->tx_base[last];
3643			/* Get new done point */
3644			if (++last == adapter->num_tx_desc) last = 0;
3645			done = last;
3646		} else
3647			break;
3648        }
3649        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3650            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3651
3652        txr->next_to_clean = first;
3653
3654        /*
3655         * If we have enough room, clear IFF_DRV_OACTIVE to
3656         * tell the stack that it is OK to send packets.
3657         * If there are no pending descriptors, clear the watchdog.
3658         */
3659        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3660                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3661                if (num_avail == adapter->num_tx_desc) {
3662			txr->watchdog_check = FALSE;
3663        		txr->tx_avail = num_avail;
3664			return (FALSE);
3665		}
3666        }
3667
3668        txr->tx_avail = num_avail;
3669	return (TRUE);
3670}
3671
3672
3673/*********************************************************************
3674 *
3675 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3676 *
3677 **********************************************************************/
3678static void
3679em_refresh_mbufs(struct rx_ring *rxr, int limit)
3680{
3681	struct adapter		*adapter = rxr->adapter;
3682	struct mbuf		*m;
3683	bus_dma_segment_t	segs[1];
3684	bus_dmamap_t		map;
3685	struct em_buffer	*rxbuf;
3686	int			i, error, nsegs, cleaned;
3687
3688	i = rxr->next_to_refresh;
3689	cleaned = -1;
3690	while (i != limit) {
3691		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3692		if (m == NULL)
3693			goto update;
3694		m->m_len = m->m_pkthdr.len = MCLBYTES;
3695
3696		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3697			m_adj(m, ETHER_ALIGN);
3698
3699		/*
3700		 * Using memory from the mbuf cluster pool, invoke the
3701		 * bus_dma machinery to arrange the memory mapping.
3702		 */
3703		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3704		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3705		if (error != 0) {
3706			m_free(m);
3707			goto update;
3708		}
3709
3710		/* If nsegs is wrong then the stack is corrupt. */
3711		KASSERT(nsegs == 1, ("Too many segments returned!"));
3712
3713		rxbuf = &rxr->rx_buffers[i];
3714		if (rxbuf->m_head != NULL)
3715			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3716
3717		map = rxbuf->map;
3718		rxbuf->map = rxr->rx_sparemap;
3719		rxr->rx_sparemap = map;
3720		bus_dmamap_sync(rxr->rxtag,
3721		    rxbuf->map, BUS_DMASYNC_PREREAD);
3722		rxbuf->m_head = m;
3723		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3724
3725		cleaned = i;
3726		/* Calculate next index */
3727		if (++i == adapter->num_rx_desc)
3728			i = 0;
3729		/* This is the work marker for refresh */
3730		rxr->next_to_refresh = i;
3731	}
3732update:
3733	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3734	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3735	if (cleaned != -1) /* Update tail index */
3736		E1000_WRITE_REG(&adapter->hw,
3737		    E1000_RDT(rxr->me), cleaned);
3738
3739	return;
3740}
3741
3742
3743/*********************************************************************
3744 *
3745 *  Allocate memory for rx_buffer structures. Since we use one
3746 *  rx_buffer per received packet, the maximum number of rx_buffer's
3747 *  that we'll need is equal to the number of receive descriptors
3748 *  that we've allocated.
3749 *
3750 **********************************************************************/
3751static int
3752em_allocate_receive_buffers(struct rx_ring *rxr)
3753{
3754	struct adapter		*adapter = rxr->adapter;
3755	device_t		dev = adapter->dev;
3756	struct em_buffer	*rxbuf;
3757	int			error;
3758
3759	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3760	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3761	if (rxr->rx_buffers == NULL) {
3762		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3763		return (ENOMEM);
3764	}
3765
3766	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3767				1, 0,			/* alignment, bounds */
3768				BUS_SPACE_MAXADDR,	/* lowaddr */
3769				BUS_SPACE_MAXADDR,	/* highaddr */
3770				NULL, NULL,		/* filter, filterarg */
3771				MCLBYTES,		/* maxsize */
3772				1,			/* nsegments */
3773				MCLBYTES,		/* maxsegsize */
3774				0,			/* flags */
3775				NULL,			/* lockfunc */
3776				NULL,			/* lockarg */
3777				&rxr->rxtag);
3778	if (error) {
3779		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3780		    __func__, error);
3781		goto fail;
3782	}
3783
3784	/* Create the spare map (used by getbuf) */
3785	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3786	     &rxr->rx_sparemap);
3787	if (error) {
3788		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3789		    __func__, error);
3790		goto fail;
3791	}
3792
3793	rxbuf = rxr->rx_buffers;
3794	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3795		rxbuf = &rxr->rx_buffers[i];
3796		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3797		    &rxbuf->map);
3798		if (error) {
3799			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3800			    __func__, error);
3801			goto fail;
3802		}
3803	}
3804
3805	return (0);
3806
3807fail:
3808	em_free_receive_structures(adapter);
3809	return (error);
3810}
3811
3812
3813/*********************************************************************
3814 *
3815 *  Initialize a receive ring and its buffers.
3816 *
3817 **********************************************************************/
3818static int
3819em_setup_receive_ring(struct rx_ring *rxr)
3820{
3821	struct	adapter 	*adapter = rxr->adapter;
3822	struct em_buffer	*rxbuf;
3823	bus_dma_segment_t	seg[1];
3824	int			rsize, nsegs, error;
3825
3826
3827	/* Clear the ring contents */
3828	EM_RX_LOCK(rxr);
3829	rsize = roundup2(adapter->num_rx_desc *
3830	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3831	bzero((void *)rxr->rx_base, rsize);
3832
3833	/*
3834	** Free current RX buffer structs and their mbufs
3835	*/
3836	for (int i = 0; i < adapter->num_rx_desc; i++) {
3837		rxbuf = &rxr->rx_buffers[i];
3838		if (rxbuf->m_head != NULL) {
3839			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3840			    BUS_DMASYNC_POSTREAD);
3841			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3842			m_freem(rxbuf->m_head);
3843		}
3844	}
3845
3846	/* Now replenish the mbufs */
3847	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3848
3849		rxbuf = &rxr->rx_buffers[j];
3850		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3851		if (rxbuf->m_head == NULL)
3852			return (ENOBUFS);
3853		rxbuf->m_head->m_len = MCLBYTES;
3854		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3855		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3856
3857		/* Get the memory mapping */
3858		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3859		    rxbuf->map, rxbuf->m_head, seg,
3860		    &nsegs, BUS_DMA_NOWAIT);
3861		if (error != 0) {
3862			m_freem(rxbuf->m_head);
3863			rxbuf->m_head = NULL;
3864			return (error);
3865		}
3866		bus_dmamap_sync(rxr->rxtag,
3867		    rxbuf->map, BUS_DMASYNC_PREREAD);
3868
3869		/* Update descriptor */
3870		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3871	}
3872
3873
3874	/* Setup our descriptor indices */
3875	rxr->next_to_check = 0;
3876	rxr->next_to_refresh = 0;
3877
3878	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3879	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3880
3881	EM_RX_UNLOCK(rxr);
3882	return (0);
3883}
3884
3885/*********************************************************************
3886 *
3887 *  Initialize all receive rings.
3888 *
3889 **********************************************************************/
3890static int
3891em_setup_receive_structures(struct adapter *adapter)
3892{
3893	struct rx_ring *rxr = adapter->rx_rings;
3894	int j;
3895
3896	for (j = 0; j < adapter->num_queues; j++, rxr++)
3897		if (em_setup_receive_ring(rxr))
3898			goto fail;
3899
3900	return (0);
3901fail:
3902	/*
3903	 * Free RX buffers allocated so far, we will only handle
3904	 * the rings that completed, the failing case will have
3905	 * cleaned up for itself. 'j' failed, so its the terminus.
3906	 */
3907	for (int i = 0; i < j; ++i) {
3908		rxr = &adapter->rx_rings[i];
3909		for (int n = 0; n < adapter->num_rx_desc; n++) {
3910			struct em_buffer *rxbuf;
3911			rxbuf = &rxr->rx_buffers[n];
3912			if (rxbuf->m_head != NULL) {
3913				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3914			  	  BUS_DMASYNC_POSTREAD);
3915				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3916				m_freem(rxbuf->m_head);
3917				rxbuf->m_head = NULL;
3918			}
3919		}
3920	}
3921
3922	return (ENOBUFS);
3923}
3924
3925/*********************************************************************
3926 *
3927 *  Free all receive rings.
3928 *
3929 **********************************************************************/
3930static void
3931em_free_receive_structures(struct adapter *adapter)
3932{
3933	struct rx_ring *rxr = adapter->rx_rings;
3934
3935	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3936		em_free_receive_buffers(rxr);
3937		/* Free the ring memory as well */
3938		em_dma_free(adapter, &rxr->rxdma);
3939		EM_RX_LOCK_DESTROY(rxr);
3940	}
3941
3942	free(adapter->rx_rings, M_DEVBUF);
3943}
3944
3945
3946/*********************************************************************
3947 *
3948 *  Free receive ring data structures
3949 *
3950 **********************************************************************/
3951static void
3952em_free_receive_buffers(struct rx_ring *rxr)
3953{
3954	struct adapter		*adapter = rxr->adapter;
3955	struct em_buffer	*rxbuf = NULL;
3956
3957	INIT_DEBUGOUT("free_receive_buffers: begin");
3958
3959	if (rxr->rx_sparemap) {
3960		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3961		rxr->rx_sparemap = NULL;
3962	}
3963
3964	if (rxr->rx_buffers != NULL) {
3965		for (int i = 0; i < adapter->num_rx_desc; i++) {
3966			rxbuf = &rxr->rx_buffers[i];
3967			if (rxbuf->map != NULL) {
3968				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3969				    BUS_DMASYNC_POSTREAD);
3970				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3971				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3972			}
3973			if (rxbuf->m_head != NULL) {
3974				m_freem(rxbuf->m_head);
3975				rxbuf->m_head = NULL;
3976			}
3977		}
3978		free(rxr->rx_buffers, M_DEVBUF);
3979		rxr->rx_buffers = NULL;
3980	}
3981
3982	if (rxr->rxtag != NULL) {
3983		bus_dma_tag_destroy(rxr->rxtag);
3984		rxr->rxtag = NULL;
3985	}
3986
3987	return;
3988}
3989
3990
3991/*********************************************************************
3992 *
3993 *  Enable receive unit.
3994 *
3995 **********************************************************************/
3996#define MAX_INTS_PER_SEC	8000
3997#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3998
3999static void
4000em_initialize_receive_unit(struct adapter *adapter)
4001{
4002	struct rx_ring	*rxr = adapter->rx_rings;
4003	struct ifnet	*ifp = adapter->ifp;
4004	struct e1000_hw	*hw = &adapter->hw;
4005	u64	bus_addr;
4006	u32	rctl, rxcsum;
4007
4008	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4009
4010	/*
4011	 * Make sure receives are disabled while setting
4012	 * up the descriptor ring
4013	 */
4014	rctl = E1000_READ_REG(hw, E1000_RCTL);
4015	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4016
4017	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4018	    adapter->rx_abs_int_delay.value);
4019	/*
4020	 * Set the interrupt throttling rate. Value is calculated
4021	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4022	 */
4023	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4024
4025	/*
4026	** When using MSIX interrupts we need to throttle
4027	** using the EITR register (82574 only)
4028	*/
4029	if (hw->mac.type == e1000_82574)
4030		for (int i = 0; i < 4; i++)
4031			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4032			    DEFAULT_ITR);
4033
4034	/* Disable accelerated ackknowledge */
4035	if (adapter->hw.mac.type == e1000_82574)
4036		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4037
4038	if (ifp->if_capenable & IFCAP_RXCSUM) {
4039		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4040		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4041		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4042	}
4043
4044	/*
4045	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4046	** long latencies are observed, like Lenovo X60. This
4047	** change eliminates the problem, but since having positive
4048	** values in RDTR is a known source of problems on other
4049	** platforms another solution is being sought.
4050	*/
4051	if (hw->mac.type == e1000_82573)
4052		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4053
4054	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4055		/* Setup the Base and Length of the Rx Descriptor Ring */
4056		bus_addr = rxr->rxdma.dma_paddr;
4057		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4058		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4059		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4060		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4061		/* Setup the Head and Tail Descriptor Pointers */
4062		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4063		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4064	}
4065
4066	/* Setup the Receive Control Register */
4067	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4068	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4069	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4070	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4071
4072        /* Strip the CRC */
4073        rctl |= E1000_RCTL_SECRC;
4074
4075        /* Make sure VLAN Filters are off */
4076        rctl &= ~E1000_RCTL_VFE;
4077	rctl &= ~E1000_RCTL_SBP;
4078	rctl |= E1000_RCTL_SZ_2048;
4079	if (ifp->if_mtu > ETHERMTU)
4080		rctl |= E1000_RCTL_LPE;
4081	else
4082		rctl &= ~E1000_RCTL_LPE;
4083
4084	/* Write out the settings */
4085	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4086
4087	return;
4088}
4089
4090
4091/*********************************************************************
4092 *
4093 *  This routine executes in interrupt context. It replenishes
4094 *  the mbufs in the descriptor and sends data which has been
4095 *  dma'ed into host memory to upper layer.
4096 *
4097 *  We loop at most count times if count is > 0, or until done if
4098 *  count < 0.
4099 *
4100 *  For polling we also now return the number of cleaned packets
4101 *********************************************************************/
4102static bool
4103em_rxeof(struct rx_ring *rxr, int count, int *done)
4104{
4105	struct adapter		*adapter = rxr->adapter;
4106	struct ifnet		*ifp = adapter->ifp;
4107	struct mbuf		*mp, *sendmp;
4108	u8			status = 0;
4109	u16 			len;
4110	int			i, processed, rxdone = 0;
4111	bool			eop;
4112	struct e1000_rx_desc	*cur;
4113
4114	EM_RX_LOCK(rxr);
4115
4116	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4117
4118		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4119			break;
4120
4121		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4122		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4123
4124		cur = &rxr->rx_base[i];
4125		status = cur->status;
4126		mp = sendmp = NULL;
4127
4128		if ((status & E1000_RXD_STAT_DD) == 0)
4129			break;
4130
4131		len = le16toh(cur->length);
4132		eop = (status & E1000_RXD_STAT_EOP) != 0;
4133		count--;
4134
4135		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4136
4137			/* Assign correct length to the current fragment */
4138			mp = rxr->rx_buffers[i].m_head;
4139			mp->m_len = len;
4140
4141			if (rxr->fmp == NULL) {
4142				mp->m_pkthdr.len = len;
4143				rxr->fmp = mp; /* Store the first mbuf */
4144				rxr->lmp = mp;
4145			} else {
4146				/* Chain mbuf's together */
4147				mp->m_flags &= ~M_PKTHDR;
4148				rxr->lmp->m_next = mp;
4149				rxr->lmp = rxr->lmp->m_next;
4150				rxr->fmp->m_pkthdr.len += len;
4151			}
4152
4153			if (eop) {
4154				rxr->fmp->m_pkthdr.rcvif = ifp;
4155				ifp->if_ipackets++;
4156				em_receive_checksum(cur, rxr->fmp);
4157#ifndef __NO_STRICT_ALIGNMENT
4158				if (adapter->max_frame_size >
4159				    (MCLBYTES - ETHER_ALIGN) &&
4160				    em_fixup_rx(rxr) != 0)
4161					goto skip;
4162#endif
4163				if (status & E1000_RXD_STAT_VP) {
4164					rxr->fmp->m_pkthdr.ether_vtag =
4165					    (le16toh(cur->special) &
4166					    E1000_RXD_SPC_VLAN_MASK);
4167					rxr->fmp->m_flags |= M_VLANTAG;
4168				}
4169#ifdef EM_MULTIQUEUE
4170				rxr->fmp->m_pkthdr.flowid = curcpu;
4171				rxr->fmp->m_flags |= M_FLOWID;
4172#endif
4173#ifndef __NO_STRICT_ALIGNMENT
4174skip:
4175#endif
4176				sendmp = rxr->fmp;
4177				rxr->fmp = NULL;
4178				rxr->lmp = NULL;
4179			}
4180		} else {
4181			ifp->if_ierrors++;
4182			/* Reuse loaded DMA map and just update mbuf chain */
4183			mp = rxr->rx_buffers[i].m_head;
4184			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4185			mp->m_data = mp->m_ext.ext_buf;
4186			mp->m_next = NULL;
4187			if (adapter->max_frame_size <=
4188			    (MCLBYTES - ETHER_ALIGN))
4189				m_adj(mp, ETHER_ALIGN);
4190			if (rxr->fmp != NULL) {
4191				m_freem(rxr->fmp);
4192				rxr->fmp = NULL;
4193				rxr->lmp = NULL;
4194			}
4195			sendmp = NULL;
4196		}
4197
4198		/* Zero out the receive descriptors status. */
4199		cur->status = 0;
4200		++rxdone;	/* cumulative for POLL */
4201		++processed;
4202
4203		/* Advance our pointers to the next descriptor. */
4204		if (++i == adapter->num_rx_desc)
4205			i = 0;
4206
4207		/* Send to the stack */
4208		if (sendmp != NULL) {
4209			rxr->next_to_check = i;
4210			EM_RX_UNLOCK(rxr);
4211			(*ifp->if_input)(ifp, sendmp);
4212			EM_RX_LOCK(rxr);
4213			i = rxr->next_to_check;
4214		}
4215
4216		/* Only refresh mbufs every 8 descriptors */
4217		if (processed == 8) {
4218			em_refresh_mbufs(rxr, i);
4219			processed = 0;
4220		}
4221	}
4222
4223	/* Catch any remaining refresh work */
4224	if (processed != 0) {
4225		em_refresh_mbufs(rxr, i);
4226		processed = 0;
4227	}
4228
4229	rxr->next_to_check = i;
4230	if (done != NULL)
4231		*done = rxdone;
4232	EM_RX_UNLOCK(rxr);
4233
4234	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4235}
4236
4237#ifndef __NO_STRICT_ALIGNMENT
4238/*
4239 * When jumbo frames are enabled we should realign entire payload on
4240 * architecures with strict alignment. This is serious design mistake of 8254x
4241 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4242 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4243 * payload. On architecures without strict alignment restrictions 8254x still
4244 * performs unaligned memory access which would reduce the performance too.
4245 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4246 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4247 * existing mbuf chain.
4248 *
4249 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4250 * not used at all on architectures with strict alignment.
4251 */
4252static int
4253em_fixup_rx(struct rx_ring *rxr)
4254{
4255	struct adapter *adapter = rxr->adapter;
4256	struct mbuf *m, *n;
4257	int error;
4258
4259	error = 0;
4260	m = rxr->fmp;
4261	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4262		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4263		m->m_data += ETHER_HDR_LEN;
4264	} else {
4265		MGETHDR(n, M_DONTWAIT, MT_DATA);
4266		if (n != NULL) {
4267			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4268			m->m_data += ETHER_HDR_LEN;
4269			m->m_len -= ETHER_HDR_LEN;
4270			n->m_len = ETHER_HDR_LEN;
4271			M_MOVE_PKTHDR(n, m);
4272			n->m_next = m;
4273			rxr->fmp = n;
4274		} else {
4275			adapter->dropped_pkts++;
4276			m_freem(rxr->fmp);
4277			rxr->fmp = NULL;
4278			error = ENOMEM;
4279		}
4280	}
4281
4282	return (error);
4283}
4284#endif
4285
4286/*********************************************************************
4287 *
4288 *  Verify that the hardware indicated that the checksum is valid.
4289 *  Inform the stack about the status of checksum so that stack
4290 *  doesn't spend time verifying the checksum.
4291 *
4292 *********************************************************************/
4293static void
4294em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4295{
4296	/* Ignore Checksum bit is set */
4297	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4298		mp->m_pkthdr.csum_flags = 0;
4299		return;
4300	}
4301
4302	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4303		/* Did it pass? */
4304		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4305			/* IP Checksum Good */
4306			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4307			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4308
4309		} else {
4310			mp->m_pkthdr.csum_flags = 0;
4311		}
4312	}
4313
4314	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4315		/* Did it pass? */
4316		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4317			mp->m_pkthdr.csum_flags |=
4318			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4319			mp->m_pkthdr.csum_data = htons(0xffff);
4320		}
4321	}
4322}
4323
4324/*
4325 * This routine is run via an vlan
4326 * config EVENT
4327 */
4328static void
4329em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4330{
4331	struct adapter	*adapter = ifp->if_softc;
4332	u32		index, bit;
4333
4334	if (ifp->if_softc !=  arg)   /* Not our event */
4335		return;
4336
4337	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4338                return;
4339
4340	index = (vtag >> 5) & 0x7F;
4341	bit = vtag & 0x1F;
4342	em_shadow_vfta[index] |= (1 << bit);
4343	++adapter->num_vlans;
4344	/* Re-init to load the changes */
4345	em_init(adapter);
4346}
4347
4348/*
4349 * This routine is run via an vlan
4350 * unconfig EVENT
4351 */
4352static void
4353em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4354{
4355	struct adapter	*adapter = ifp->if_softc;
4356	u32		index, bit;
4357
4358	if (ifp->if_softc !=  arg)
4359		return;
4360
4361	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4362                return;
4363
4364	index = (vtag >> 5) & 0x7F;
4365	bit = vtag & 0x1F;
4366	em_shadow_vfta[index] &= ~(1 << bit);
4367	--adapter->num_vlans;
4368	/* Re-init to load the changes */
4369	em_init(adapter);
4370}
4371
4372static void
4373em_setup_vlan_hw_support(struct adapter *adapter)
4374{
4375	struct e1000_hw *hw = &adapter->hw;
4376	u32             reg;
4377
4378	/*
4379	** We get here thru init_locked, meaning
4380	** a soft reset, this has already cleared
4381	** the VFTA and other state, so if there
4382	** have been no vlan's registered do nothing.
4383	*/
4384	if (adapter->num_vlans == 0)
4385                return;
4386
4387	/*
4388	** A soft reset zero's out the VFTA, so
4389	** we need to repopulate it now.
4390	*/
4391	for (int i = 0; i < EM_VFTA_SIZE; i++)
4392                if (em_shadow_vfta[i] != 0)
4393			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4394                            i, em_shadow_vfta[i]);
4395
4396	reg = E1000_READ_REG(hw, E1000_CTRL);
4397	reg |= E1000_CTRL_VME;
4398	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4399
4400	/* Enable the Filter Table */
4401	reg = E1000_READ_REG(hw, E1000_RCTL);
4402	reg &= ~E1000_RCTL_CFIEN;
4403	reg |= E1000_RCTL_VFE;
4404	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4405
4406	/* Update the frame size */
4407	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4408	    adapter->max_frame_size + VLAN_TAG_SIZE);
4409}
4410
4411static void
4412em_enable_intr(struct adapter *adapter)
4413{
4414	struct e1000_hw *hw = &adapter->hw;
4415	u32 ims_mask = IMS_ENABLE_MASK;
4416
4417	if (hw->mac.type == e1000_82574) {
4418		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4419		ims_mask |= EM_MSIX_MASK;
4420	}
4421	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4422}
4423
4424static void
4425em_disable_intr(struct adapter *adapter)
4426{
4427	struct e1000_hw *hw = &adapter->hw;
4428
4429	if (hw->mac.type == e1000_82574)
4430		E1000_WRITE_REG(hw, EM_EIAC, 0);
4431	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4432}
4433
4434/*
4435 * Bit of a misnomer, what this really means is
4436 * to enable OS management of the system... aka
4437 * to disable special hardware management features
4438 */
4439static void
4440em_init_manageability(struct adapter *adapter)
4441{
4442	/* A shared code workaround */
4443#define E1000_82542_MANC2H E1000_MANC2H
4444	if (adapter->has_manage) {
4445		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4446		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4447
4448		/* disable hardware interception of ARP */
4449		manc &= ~(E1000_MANC_ARP_EN);
4450
4451                /* enable receiving management packets to the host */
4452		manc |= E1000_MANC_EN_MNG2HOST;
4453#define E1000_MNG2HOST_PORT_623 (1 << 5)
4454#define E1000_MNG2HOST_PORT_664 (1 << 6)
4455		manc2h |= E1000_MNG2HOST_PORT_623;
4456		manc2h |= E1000_MNG2HOST_PORT_664;
4457		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4458		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4459	}
4460}
4461
4462/*
4463 * Give control back to hardware management
4464 * controller if there is one.
4465 */
4466static void
4467em_release_manageability(struct adapter *adapter)
4468{
4469	if (adapter->has_manage) {
4470		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4471
4472		/* re-enable hardware interception of ARP */
4473		manc |= E1000_MANC_ARP_EN;
4474		manc &= ~E1000_MANC_EN_MNG2HOST;
4475
4476		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4477	}
4478}
4479
4480/*
4481 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4482 * For ASF and Pass Through versions of f/w this means
4483 * that the driver is loaded. For AMT version type f/w
4484 * this means that the network i/f is open.
4485 */
4486static void
4487em_get_hw_control(struct adapter *adapter)
4488{
4489	u32 ctrl_ext, swsm;
4490
4491	if (adapter->hw.mac.type == e1000_82573) {
4492		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4493		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4494		    swsm | E1000_SWSM_DRV_LOAD);
4495		return;
4496	}
4497	/* else */
4498	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4499	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4500	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4501	return;
4502}
4503
4504/*
4505 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4506 * For ASF and Pass Through versions of f/w this means that
4507 * the driver is no longer loaded. For AMT versions of the
4508 * f/w this means that the network i/f is closed.
4509 */
4510static void
4511em_release_hw_control(struct adapter *adapter)
4512{
4513	u32 ctrl_ext, swsm;
4514
4515	if (!adapter->has_manage)
4516		return;
4517
4518	if (adapter->hw.mac.type == e1000_82573) {
4519		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4520		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4521		    swsm & ~E1000_SWSM_DRV_LOAD);
4522		return;
4523	}
4524	/* else */
4525	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4526	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4527	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4528	return;
4529}
4530
4531static int
4532em_is_valid_ether_addr(u8 *addr)
4533{
4534	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4535
4536	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4537		return (FALSE);
4538	}
4539
4540	return (TRUE);
4541}
4542
4543/*
4544** Parse the interface capabilities with regard
4545** to both system management and wake-on-lan for
4546** later use.
4547*/
4548static void
4549em_get_wakeup(device_t dev)
4550{
4551	struct adapter	*adapter = device_get_softc(dev);
4552	u16		eeprom_data = 0, device_id, apme_mask;
4553
4554	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4555	apme_mask = EM_EEPROM_APME;
4556
4557	switch (adapter->hw.mac.type) {
4558	case e1000_82573:
4559	case e1000_82583:
4560		adapter->has_amt = TRUE;
4561		/* Falls thru */
4562	case e1000_82571:
4563	case e1000_82572:
4564	case e1000_80003es2lan:
4565		if (adapter->hw.bus.func == 1) {
4566			e1000_read_nvm(&adapter->hw,
4567			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4568			break;
4569		} else
4570			e1000_read_nvm(&adapter->hw,
4571			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4572		break;
4573	case e1000_ich8lan:
4574	case e1000_ich9lan:
4575	case e1000_ich10lan:
4576	case e1000_pchlan:
4577		apme_mask = E1000_WUC_APME;
4578		adapter->has_amt = TRUE;
4579		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4580		break;
4581	default:
4582		e1000_read_nvm(&adapter->hw,
4583		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4584		break;
4585	}
4586	if (eeprom_data & apme_mask)
4587		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4588	/*
4589         * We have the eeprom settings, now apply the special cases
4590         * where the eeprom may be wrong or the board won't support
4591         * wake on lan on a particular port
4592	 */
4593	device_id = pci_get_device(dev);
4594        switch (device_id) {
4595	case E1000_DEV_ID_82571EB_FIBER:
4596		/* Wake events only supported on port A for dual fiber
4597		 * regardless of eeprom setting */
4598		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4599		    E1000_STATUS_FUNC_1)
4600			adapter->wol = 0;
4601		break;
4602	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4603	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4604	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4605                /* if quad port adapter, disable WoL on all but port A */
4606		if (global_quad_port_a != 0)
4607			adapter->wol = 0;
4608		/* Reset for multiple quad port adapters */
4609		if (++global_quad_port_a == 4)
4610			global_quad_port_a = 0;
4611                break;
4612	}
4613	return;
4614}
4615
4616
4617/*
4618 * Enable PCI Wake On Lan capability
4619 */
4620static void
4621em_enable_wakeup(device_t dev)
4622{
4623	struct adapter	*adapter = device_get_softc(dev);
4624	struct ifnet	*ifp = adapter->ifp;
4625	u32		pmc, ctrl, ctrl_ext, rctl;
4626	u16     	status;
4627
4628	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4629		return;
4630
4631	/* Advertise the wakeup capability */
4632	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4633	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4634	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4635	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4636
4637	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4638	    (adapter->hw.mac.type == e1000_pchlan) ||
4639	    (adapter->hw.mac.type == e1000_ich9lan) ||
4640	    (adapter->hw.mac.type == e1000_ich10lan)) {
4641		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4642		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4643	}
4644
4645	/* Keep the laser running on Fiber adapters */
4646	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4647	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4648		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4649		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4650		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4651	}
4652
4653	/*
4654	** Determine type of Wakeup: note that wol
4655	** is set with all bits on by default.
4656	*/
4657	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4658		adapter->wol &= ~E1000_WUFC_MAG;
4659
4660	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4661		adapter->wol &= ~E1000_WUFC_MC;
4662	else {
4663		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4664		rctl |= E1000_RCTL_MPE;
4665		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4666	}
4667
4668	if (adapter->hw.mac.type == e1000_pchlan) {
4669		if (em_enable_phy_wakeup(adapter))
4670			return;
4671	} else {
4672		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4673		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4674	}
4675
4676	if (adapter->hw.phy.type == e1000_phy_igp_3)
4677		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4678
4679        /* Request PME */
4680        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4681	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4682	if (ifp->if_capenable & IFCAP_WOL)
4683		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4684        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4685
4686	return;
4687}
4688
4689/*
4690** WOL in the newer chipset interfaces (pchlan)
4691** require thing to be copied into the phy
4692*/
4693static int
4694em_enable_phy_wakeup(struct adapter *adapter)
4695{
4696	struct e1000_hw *hw = &adapter->hw;
4697	u32 mreg, ret = 0;
4698	u16 preg;
4699
4700	/* copy MAC RARs to PHY RARs */
4701	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4702		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4703		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4704		e1000_write_phy_reg(hw, BM_RAR_M(i),
4705		    (u16)((mreg >> 16) & 0xFFFF));
4706		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4707		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4708		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4709		    (u16)((mreg >> 16) & 0xFFFF));
4710	}
4711
4712	/* copy MAC MTA to PHY MTA */
4713	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4714		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4715		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4716		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4717		    (u16)((mreg >> 16) & 0xFFFF));
4718	}
4719
4720	/* configure PHY Rx Control register */
4721	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4722	mreg = E1000_READ_REG(hw, E1000_RCTL);
4723	if (mreg & E1000_RCTL_UPE)
4724		preg |= BM_RCTL_UPE;
4725	if (mreg & E1000_RCTL_MPE)
4726		preg |= BM_RCTL_MPE;
4727	preg &= ~(BM_RCTL_MO_MASK);
4728	if (mreg & E1000_RCTL_MO_3)
4729		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4730				<< BM_RCTL_MO_SHIFT);
4731	if (mreg & E1000_RCTL_BAM)
4732		preg |= BM_RCTL_BAM;
4733	if (mreg & E1000_RCTL_PMCF)
4734		preg |= BM_RCTL_PMCF;
4735	mreg = E1000_READ_REG(hw, E1000_CTRL);
4736	if (mreg & E1000_CTRL_RFCE)
4737		preg |= BM_RCTL_RFCE;
4738	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4739
4740	/* enable PHY wakeup in MAC register */
4741	E1000_WRITE_REG(hw, E1000_WUC,
4742	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4743	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4744
4745	/* configure and enable PHY wakeup in PHY registers */
4746	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4747	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4748
4749	/* activate PHY wakeup */
4750	ret = hw->phy.ops.acquire(hw);
4751	if (ret) {
4752		printf("Could not acquire PHY\n");
4753		return ret;
4754	}
4755	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4756	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4757	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4758	if (ret) {
4759		printf("Could not read PHY page 769\n");
4760		goto out;
4761	}
4762	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4763	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4764	if (ret)
4765		printf("Could not set PHY Host Wakeup bit\n");
4766out:
4767	hw->phy.ops.release(hw);
4768
4769	return ret;
4770}
4771
4772static void
4773em_led_func(void *arg, int onoff)
4774{
4775	struct adapter	*adapter = arg;
4776
4777	EM_CORE_LOCK(adapter);
4778	if (onoff) {
4779		e1000_setup_led(&adapter->hw);
4780		e1000_led_on(&adapter->hw);
4781	} else {
4782		e1000_led_off(&adapter->hw);
4783		e1000_cleanup_led(&adapter->hw);
4784	}
4785	EM_CORE_UNLOCK(adapter);
4786}
4787
4788/**********************************************************************
4789 *
4790 *  Update the board statistics counters.
4791 *
4792 **********************************************************************/
4793static void
4794em_update_stats_counters(struct adapter *adapter)
4795{
4796	struct ifnet   *ifp;
4797
4798	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4799	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4800		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4801		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4802	}
4803	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4804	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4805	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4806	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4807
4808	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4809	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4810	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4811	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4812	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4813	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4814	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4815	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4816	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4817	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4818	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4819	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4820	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4821	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4822	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4823	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4824	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4825	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4826	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4827	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4828
4829	/* For the 64-bit byte counters the low dword must be read first. */
4830	/* Both registers clear on the read of the high dword */
4831
4832	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4833	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4834
4835	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4836	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4837	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4838	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4839	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4840
4841	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4842	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4843
4844	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4845	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4846	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4847	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4848	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4849	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4850	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4851	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4852	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4853	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4854
4855	if (adapter->hw.mac.type >= e1000_82543) {
4856		adapter->stats.algnerrc +=
4857		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4858		adapter->stats.rxerrc +=
4859		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4860		adapter->stats.tncrs +=
4861		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4862		adapter->stats.cexterr +=
4863		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4864		adapter->stats.tsctc +=
4865		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4866		adapter->stats.tsctfc +=
4867		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4868	}
4869	ifp = adapter->ifp;
4870
4871	ifp->if_collisions = adapter->stats.colc;
4872
4873	/* Rx Errors */
4874	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4875	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4876	    adapter->stats.ruc + adapter->stats.roc +
4877	    adapter->stats.mpc + adapter->stats.cexterr;
4878
4879	/* Tx Errors */
4880	ifp->if_oerrors = adapter->stats.ecol +
4881	    adapter->stats.latecol + adapter->watchdog_events;
4882}
4883
4884
4885/*
4886 * Add sysctl variables, one per statistic, to the system.
4887 */
4888static void
4889em_add_hw_stats(struct adapter *adapter)
4890{
4891
4892	device_t dev = adapter->dev;
4893
4894	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4895	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4896	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4897	struct e1000_hw_stats *stats = &adapter->stats;
4898
4899	struct sysctl_oid *stat_node, *int_node, *host_node;
4900	struct sysctl_oid_list *stat_list, *int_list, *host_list;
4901
4902	/* Driver Statistics */
4903	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4904			CTLFLAG_RD, &adapter->link_irq, 0,
4905			"Link MSIX IRQ Handled");
4906	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4907			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4908			 "Std mbuf failed");
4909	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4910			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4911			 "Std mbuf cluster failed");
4912	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4913			CTLFLAG_RD, &adapter->dropped_pkts,
4914			"Driver dropped packets");
4915	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4916			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4917			"Driver tx dma failure in xmit");
4918
4919	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4920			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4921			"Flow Control High Watermark");
4922	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4923			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4924			"Flow Control Low Watermark");
4925
4926	/* MAC stats get the own sub node */
4927
4928	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4929				    CTLFLAG_RD, NULL, "Statistics");
4930	stat_list = SYSCTL_CHILDREN(stat_node);
4931
4932	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4933			CTLFLAG_RD, &stats->ecol,
4934			"Excessive collisions");
4935	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4936			CTLFLAG_RD, &adapter->stats.symerrs,
4937			"Symbol Errors");
4938	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4939			CTLFLAG_RD, &adapter->stats.sec,
4940			"Sequence Errors");
4941	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4942			CTLFLAG_RD, &adapter->stats.dc,
4943			"Defer Count");
4944	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4945			CTLFLAG_RD, &adapter->stats.mpc,
4946			"Missed Packets");
4947	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4948			CTLFLAG_RD, &adapter->stats.rnbc,
4949			"Receive No Buffers");
4950	/* RLEC is inaccurate on some hardware, calculate our own. */
4951/* 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4952/* 			CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4953/* 			"Receive Length Errors"); */
4954
4955	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4956			CTLFLAG_RD, &adapter->stats.rxerrc,
4957			"Receive Errors");
4958	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4959			CTLFLAG_RD, &adapter->stats.crcerrs,
4960			"CRC errors");
4961	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4962			CTLFLAG_RD, &adapter->stats.algnerrc,
4963			"Alignment Errors");
4964	/* On 82575 these are collision counts */
4965	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4966			CTLFLAG_RD, &adapter->stats.cexterr,
4967			"Collision/Carrier extension errors");
4968	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4969			CTLFLAG_RD, &adapter->rx_overruns,
4970			"RX overruns");
4971	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4972			CTLFLAG_RD, &adapter->watchdog_events,
4973			"Watchdog timeouts");
4974	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4975			CTLFLAG_RD, &adapter->stats.xonrxc,
4976			"XON Received");
4977	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4978			CTLFLAG_RD, &adapter->stats.xontxc,
4979			"XON Transmitted");
4980	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4981			CTLFLAG_RD, &adapter->stats.xoffrxc,
4982			"XOFF Received");
4983	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4984			CTLFLAG_RD, &adapter->stats.xofftxc,
4985			"XOFF Transmitted");
4986
4987	/* Packet Reception Stats */
4988	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4989			CTLFLAG_RD, &adapter->stats.tpr,
4990			"Total Packets Received ");
4991	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4992			CTLFLAG_RD, &adapter->stats.gprc,
4993			"Good Packets Received");
4994	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4995			CTLFLAG_RD, &adapter->stats.bprc,
4996			"Broadcast Packets Received");
4997	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4998			CTLFLAG_RD, &adapter->stats.mprc,
4999			"Multicast Packets Received");
5000	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5001			CTLFLAG_RD, &adapter->stats.prc64,
5002			"64 byte frames received ");
5003	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5004			CTLFLAG_RD, &adapter->stats.prc127,
5005			"65-127 byte frames received");
5006	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5007			CTLFLAG_RD, &adapter->stats.prc255,
5008			"128-255 byte frames received");
5009	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5010			CTLFLAG_RD, &adapter->stats.prc511,
5011			"256-511 byte frames received");
5012	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5013			CTLFLAG_RD, &adapter->stats.prc1023,
5014			"512-1023 byte frames received");
5015	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5016			CTLFLAG_RD, &adapter->stats.prc1522,
5017			"1023-1522 byte frames received");
5018 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5019 			CTLFLAG_RD, &adapter->stats.gorc,
5020 			"Good Octets Received");
5021
5022	/* Packet Transmission Stats */
5023 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5024 			CTLFLAG_RD, &adapter->stats.gotc,
5025 			"Good Octest Transmitted");
5026	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5027			CTLFLAG_RD, &adapter->stats.tpt,
5028			"Total Packets Transmitted");
5029	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5030			CTLFLAG_RD, &adapter->stats.gptc,
5031			"Good Packets Transmitted");
5032	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5033			CTLFLAG_RD, &adapter->stats.bptc,
5034			"Broadcast Packets Transmitted");
5035	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5036			CTLFLAG_RD, &adapter->stats.mptc,
5037			"Multicast Packets Transmitted");
5038	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5039			CTLFLAG_RD, &adapter->stats.ptc64,
5040			"64 byte frames transmitted ");
5041	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5042			CTLFLAG_RD, &adapter->stats.ptc127,
5043			"65-127 byte frames transmitted");
5044	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5045			CTLFLAG_RD, &adapter->stats.ptc255,
5046			"128-255 byte frames transmitted");
5047	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5048			CTLFLAG_RD, &adapter->stats.ptc511,
5049			"256-511 byte frames transmitted");
5050	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5051			CTLFLAG_RD, &adapter->stats.ptc1023,
5052			"512-1023 byte frames transmitted");
5053	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5054			CTLFLAG_RD, &adapter->stats.ptc1522,
5055			"1024-1522 byte frames transmitted");
5056	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5057			CTLFLAG_RD, &adapter->stats.tsctc,
5058			"TSO Contexts Transmitted");
5059	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5060			CTLFLAG_RD, &adapter->stats.tsctfc,
5061			"TSO Contexts Failed");
5062
5063
5064	/* Interrupt Stats */
5065
5066	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5067				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5068	int_list = SYSCTL_CHILDREN(int_node);
5069
5070	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5071			CTLFLAG_RD, &adapter->stats.iac,
5072			"Interrupt Assertion Count");
5073
5074	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5075			CTLFLAG_RD, &adapter->stats.icrxptc,
5076			"Interrupt Cause Rx Pkt Timer Expire Count");
5077
5078	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5079			CTLFLAG_RD, &adapter->stats.icrxatc,
5080			"Interrupt Cause Rx Abs Timer Expire Count");
5081
5082	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5083			CTLFLAG_RD, &adapter->stats.ictxptc,
5084			"Interrupt Cause Tx Pkt Timer Expire Count");
5085
5086	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5087			CTLFLAG_RD, &adapter->stats.ictxatc,
5088			"Interrupt Cause Tx Abs Timer Expire Count");
5089
5090	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5091			CTLFLAG_RD, &adapter->stats.ictxqec,
5092			"Interrupt Cause Tx Queue Empty Count");
5093
5094	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5095			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5096			"Interrupt Cause Tx Queue Min Thresh Count");
5097
5098	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5099			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5100			"Interrupt Cause Rx Desc Min Thresh Count");
5101
5102	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5103			CTLFLAG_RD, &adapter->stats.icrxoc,
5104			"Interrupt Cause Receiver Overrun Count");
5105
5106	/* Host to Card Stats */
5107
5108	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5109				    CTLFLAG_RD, NULL,
5110				    "Host to Card Statistics");
5111
5112	host_list = SYSCTL_CHILDREN(host_node);
5113
5114	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5115			CTLFLAG_RD, &adapter->stats.cbtmpc,
5116			"Circuit Breaker Tx Packet Count");
5117
5118	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5119			CTLFLAG_RD, &adapter->stats.htdpmc,
5120			"Host Transmit Discarded Packets");
5121
5122	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5123			CTLFLAG_RD, &adapter->stats.rpthc,
5124			"Rx Packets To Host");
5125
5126	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5127			CTLFLAG_RD, &adapter->stats.cbrmpc,
5128			"Circuit Breaker Rx Packet Count");
5129
5130	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5131			CTLFLAG_RD, &adapter->stats.cbrdpc,
5132			"Circuit Breaker Rx Dropped Count");
5133
5134	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5135			CTLFLAG_RD, &adapter->stats.hgptc,
5136			"Host Good Packets Tx Count");
5137
5138	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5139			CTLFLAG_RD, &adapter->stats.htcbdpc,
5140			"Host Tx Circuit Breaker Dropped Count");
5141
5142	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5143			CTLFLAG_RD, &adapter->stats.hgorc,
5144			"Host Good Octets Received Count");
5145
5146	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5147			CTLFLAG_RD, &adapter->stats.hgotc,
5148			"Host Good Octets Transmit Count");
5149
5150	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5151			CTLFLAG_RD, &adapter->stats.lenerrs,
5152			"Length Errors");
5153
5154	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5155			CTLFLAG_RD, &adapter->stats.scvpc,
5156			"SerDes/SGMII Code Violation Pkt Count");
5157
5158	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5159			CTLFLAG_RD, &adapter->stats.hrmpc,
5160			"Header Redirection Missed Packet Count");
5161
5162
5163
5164}
5165
5166/**********************************************************************
5167 *
5168 *  This routine provides a way to dump out the adapter eeprom,
5169 *  often a useful debug/service tool. This only dumps the first
5170 *  32 words, stuff that matters is in that extent.
5171 *
5172 **********************************************************************/
5173
5174static int
5175em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5176{
5177	struct adapter *adapter;
5178	int error;
5179	int result;
5180
5181	result = -1;
5182	error = sysctl_handle_int(oidp, &result, 0, req);
5183
5184	if (error || !req->newptr)
5185		return (error);
5186
5187	/*
5188	 * This value will cause a hex dump of the
5189	 * first 32 16-bit words of the EEPROM to
5190	 * the screen.
5191	 */
5192	if (result == 1) {
5193		adapter = (struct adapter *)arg1;
5194		em_print_nvm_info(adapter);
5195        }
5196
5197	return (error);
5198}
5199
5200static void
5201em_print_nvm_info(struct adapter *adapter)
5202{
5203	u16	eeprom_data;
5204	int	i, j, row = 0;
5205
5206	/* Its a bit crude, but it gets the job done */
5207	printf("\nInterface EEPROM Dump:\n");
5208	printf("Offset\n0x0000  ");
5209	for (i = 0, j = 0; i < 32; i++, j++) {
5210		if (j == 8) { /* Make the offset block */
5211			j = 0; ++row;
5212			printf("\n0x00%x0  ",row);
5213		}
5214		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5215		printf("%04x ", eeprom_data);
5216	}
5217	printf("\n");
5218}
5219
5220static int
5221em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5222{
5223	struct em_int_delay_info *info;
5224	struct adapter *adapter;
5225	u32 regval;
5226	int error, usecs, ticks;
5227
5228	info = (struct em_int_delay_info *)arg1;
5229	usecs = info->value;
5230	error = sysctl_handle_int(oidp, &usecs, 0, req);
5231	if (error != 0 || req->newptr == NULL)
5232		return (error);
5233	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5234		return (EINVAL);
5235	info->value = usecs;
5236	ticks = EM_USECS_TO_TICKS(usecs);
5237
5238	adapter = info->adapter;
5239
5240	EM_CORE_LOCK(adapter);
5241	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5242	regval = (regval & ~0xffff) | (ticks & 0xffff);
5243	/* Handle a few special cases. */
5244	switch (info->offset) {
5245	case E1000_RDTR:
5246		break;
5247	case E1000_TIDV:
5248		if (ticks == 0) {
5249			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5250			/* Don't write 0 into the TIDV register. */
5251			regval++;
5252		} else
5253			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5254		break;
5255	}
5256	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5257	EM_CORE_UNLOCK(adapter);
5258	return (0);
5259}
5260
5261static void
5262em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5263	const char *description, struct em_int_delay_info *info,
5264	int offset, int value)
5265{
5266	info->adapter = adapter;
5267	info->offset = offset;
5268	info->value = value;
5269	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5270	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5271	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5272	    info, 0, em_sysctl_int_delay, "I", description);
5273}
5274
5275static void
5276em_add_rx_process_limit(struct adapter *adapter, const char *name,
5277	const char *description, int *limit, int value)
5278{
5279	*limit = value;
5280	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5281	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5282	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5283}
5284
5285
5286