if_em.c revision 211909
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 211909 2010-08-28 00:16:49Z yongari $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static int	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static void	em_add_hw_stats(struct adapter *adapter);
234static bool	em_txeof(struct tx_ring *);
235static bool	em_rxeof(struct rx_ring *, int, int *);
236#ifndef __NO_STRICT_ALIGNMENT
237static int	em_fixup_rx(struct rx_ring *);
238#endif
239static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241		    u32 *, u32 *);
242static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243static void	em_set_promisc(struct adapter *);
244static void	em_disable_promisc(struct adapter *);
245static void	em_set_multi(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259static void	em_add_int_delay_sysctl(struct adapter *, const char *,
260		    const char *, struct em_int_delay_info *, int, int);
261/* Management and WOL Support */
262static void	em_init_manageability(struct adapter *);
263static void	em_release_manageability(struct adapter *);
264static void     em_get_hw_control(struct adapter *);
265static void     em_release_hw_control(struct adapter *);
266static void	em_get_wakeup(device_t);
267static void     em_enable_wakeup(device_t);
268static int	em_enable_phy_wakeup(struct adapter *);
269static void	em_led_func(void *, int);
270
271static int	em_irq_fast(void *);
272
273/* MSIX handlers */
274static void	em_msix_tx(void *);
275static void	em_msix_rx(void *);
276static void	em_msix_link(void *);
277static void	em_handle_tx(void *context, int pending);
278static void	em_handle_rx(void *context, int pending);
279static void	em_handle_link(void *context, int pending);
280
281static void	em_add_rx_process_limit(struct adapter *, const char *,
282		    const char *, int *, int);
283
284#ifdef DEVICE_POLLING
285static poll_handler_t em_poll;
286#endif /* POLLING */
287
288/*********************************************************************
289 *  FreeBSD Device Interface Entry Points
290 *********************************************************************/
291
292static device_method_t em_methods[] = {
293	/* Device interface */
294	DEVMETHOD(device_probe, em_probe),
295	DEVMETHOD(device_attach, em_attach),
296	DEVMETHOD(device_detach, em_detach),
297	DEVMETHOD(device_shutdown, em_shutdown),
298	DEVMETHOD(device_suspend, em_suspend),
299	DEVMETHOD(device_resume, em_resume),
300	{0, 0}
301};
302
303static driver_t em_driver = {
304	"em", em_methods, sizeof(struct adapter),
305};
306
307devclass_t em_devclass;
308DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
309MODULE_DEPEND(em, pci, 1, 1, 1);
310MODULE_DEPEND(em, ether, 1, 1, 1);
311
312/*********************************************************************
313 *  Tunable default values.
314 *********************************************************************/
315
316#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
317#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
318#define M_TSO_LEN			66
319
320/* Allow common code without TSO */
321#ifndef CSUM_TSO
322#define CSUM_TSO	0
323#endif
324
325static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
326static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
327TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329
330static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
331static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
332TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
333TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
334
335static int em_rxd = EM_DEFAULT_RXD;
336static int em_txd = EM_DEFAULT_TXD;
337TUNABLE_INT("hw.em.rxd", &em_rxd);
338TUNABLE_INT("hw.em.txd", &em_txd);
339
340static int em_smart_pwr_down = FALSE;
341TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
342
343/* Controls whether promiscuous also shows bad packets */
344static int em_debug_sbp = FALSE;
345TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
346
347/* Local controls for MSI/MSIX */
348#ifdef EM_MULTIQUEUE
349static int em_enable_msix = TRUE;
350static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
351#else
352static int em_enable_msix = FALSE;
353static int em_msix_queues = 0; /* disable */
354#endif
355TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
356TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
357
358/* How many packets rxeof tries to clean at a time */
359static int em_rx_process_limit = 100;
360TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362/* Flow control setting - default to FULL */
363static int em_fc_setting = e1000_fc_full;
364TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366/*
367** Shadow VFTA table, this is needed because
368** the real vlan filter table gets cleared during
369** a soft reset and the driver needs to be able
370** to repopulate it.
371*/
372static u32 em_shadow_vfta[EM_VFTA_SIZE];
373
374/* Global used in WOL setup with multiport cards */
375static int global_quad_port_a = 0;
376
377/*********************************************************************
378 *  Device identification routine
379 *
380 *  em_probe determines if the driver should be loaded on
381 *  adapter based on PCI vendor/device id of the adapter.
382 *
383 *  return BUS_PROBE_DEFAULT on success, positive on failure
384 *********************************************************************/
385
386static int
387em_probe(device_t dev)
388{
389	char		adapter_name[60];
390	u16		pci_vendor_id = 0;
391	u16		pci_device_id = 0;
392	u16		pci_subvendor_id = 0;
393	u16		pci_subdevice_id = 0;
394	em_vendor_info_t *ent;
395
396	INIT_DEBUGOUT("em_probe: begin");
397
398	pci_vendor_id = pci_get_vendor(dev);
399	if (pci_vendor_id != EM_VENDOR_ID)
400		return (ENXIO);
401
402	pci_device_id = pci_get_device(dev);
403	pci_subvendor_id = pci_get_subvendor(dev);
404	pci_subdevice_id = pci_get_subdevice(dev);
405
406	ent = em_vendor_info_array;
407	while (ent->vendor_id != 0) {
408		if ((pci_vendor_id == ent->vendor_id) &&
409		    (pci_device_id == ent->device_id) &&
410
411		    ((pci_subvendor_id == ent->subvendor_id) ||
412		    (ent->subvendor_id == PCI_ANY_ID)) &&
413
414		    ((pci_subdevice_id == ent->subdevice_id) ||
415		    (ent->subdevice_id == PCI_ANY_ID))) {
416			sprintf(adapter_name, "%s %s",
417				em_strings[ent->index],
418				em_driver_version);
419			device_set_desc_copy(dev, adapter_name);
420			return (BUS_PROBE_DEFAULT);
421		}
422		ent++;
423	}
424
425	return (ENXIO);
426}
427
428/*********************************************************************
429 *  Device initialization routine
430 *
431 *  The attach entry point is called when the driver is being loaded.
432 *  This routine identifies the type of hardware, allocates all resources
433 *  and initializes the hardware.
434 *
435 *  return 0 on success, positive on failure
436 *********************************************************************/
437
438static int
439em_attach(device_t dev)
440{
441	struct adapter	*adapter;
442	int		error = 0;
443
444	INIT_DEBUGOUT("em_attach: begin");
445
446	adapter = device_get_softc(dev);
447	adapter->dev = adapter->osdep.dev = dev;
448	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449
450	/* SYSCTL stuff */
451	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454	    em_sysctl_nvm_info, "I", "NVM Information");
455
456	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457
458	/* Determine hardware and mac info */
459	em_identify_hardware(adapter);
460
461	/* Setup PCI resources */
462	if (em_allocate_pci_resources(adapter)) {
463		device_printf(dev, "Allocation of PCI resources failed\n");
464		error = ENXIO;
465		goto err_pci;
466	}
467
468	/*
469	** For ICH8 and family we need to
470	** map the flash memory, and this
471	** must happen after the MAC is
472	** identified
473	*/
474	if ((adapter->hw.mac.type == e1000_ich8lan) ||
475	    (adapter->hw.mac.type == e1000_pchlan) ||
476	    (adapter->hw.mac.type == e1000_ich9lan) ||
477	    (adapter->hw.mac.type == e1000_ich10lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctls for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/*
527	 * Validate number of transmit and receive descriptors. It
528	 * must not exceed hardware maximum, and must be multiple
529	 * of E1000_DBA_ALIGN.
530	 */
531	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
532	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
533		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
534		    EM_DEFAULT_TXD, em_txd);
535		adapter->num_tx_desc = EM_DEFAULT_TXD;
536	} else
537		adapter->num_tx_desc = em_txd;
538
539	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
540	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
541		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
542		    EM_DEFAULT_RXD, em_rxd);
543		adapter->num_rx_desc = EM_DEFAULT_RXD;
544	} else
545		adapter->num_rx_desc = em_rxd;
546
547	adapter->hw.mac.autoneg = DO_AUTO_NEG;
548	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
549	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
550
551	/* Copper options */
552	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553		adapter->hw.phy.mdix = AUTO_ALL_MODES;
554		adapter->hw.phy.disable_polarity_correction = FALSE;
555		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
556	}
557
558	/*
559	 * Set the frame limits assuming
560	 * standard ethernet sized frames.
561	 */
562	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
563	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
564
565	/*
566	 * This controls when hardware reports transmit completion
567	 * status.
568	 */
569	adapter->hw.mac.report_tx_early = 1;
570
571	/*
572	** Get queue/ring memory
573	*/
574	if (em_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/*
580	** Start from a known state, this is
581	** important in reading the nvm and
582	** mac from that.
583	*/
584	e1000_reset_hw(&adapter->hw);
585
586	/* Make sure we have a good EEPROM before we read from it */
587	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
588		/*
589		** Some PCI-E parts fail the first check due to
590		** the link being in sleep state, call it again,
591		** if it fails a second time its a real issue.
592		*/
593		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
594			device_printf(dev,
595			    "The EEPROM Checksum Is Not Valid\n");
596			error = EIO;
597			goto err_late;
598		}
599	}
600
601	/* Copy the permanent MAC address out of the EEPROM */
602	if (e1000_read_mac_addr(&adapter->hw) < 0) {
603		device_printf(dev, "EEPROM read error while reading MAC"
604		    " address\n");
605		error = EIO;
606		goto err_late;
607	}
608
609	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
610		device_printf(dev, "Invalid MAC address\n");
611		error = EIO;
612		goto err_late;
613	}
614
615	/*
616	**  Do interrupt configuration
617	*/
618	if (adapter->msix > 1) /* Do MSIX */
619		error = em_allocate_msix(adapter);
620	else  /* MSI or Legacy */
621		error = em_allocate_legacy(adapter);
622	if (error)
623		goto err_late;
624
625	/*
626	 * Get Wake-on-Lan and Management info for later use
627	 */
628	em_get_wakeup(dev);
629
630	/* Setup OS specific network interface */
631	if (em_setup_interface(dev, adapter) != 0)
632		goto err_late;
633
634	em_reset(adapter);
635
636	/* Initialize statistics */
637	em_update_stats_counters(adapter);
638
639	adapter->hw.mac.get_link_status = 1;
640	em_update_link_status(adapter);
641
642	/* Indicate SOL/IDER usage */
643	if (e1000_check_reset_block(&adapter->hw))
644		device_printf(dev,
645		    "PHY reset is blocked due to SOL/IDER session.\n");
646
647	/* Register for VLAN events */
648	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
649	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
650	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
651	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
652
653	em_add_hw_stats(adapter);
654
655	/* Non-AMT based hardware can now take control from firmware */
656	if (adapter->has_manage && !adapter->has_amt)
657		em_get_hw_control(adapter);
658
659	/* Tell the stack that the interface is not active */
660	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
661
662	adapter->led_dev = led_create(em_led_func, adapter,
663	    device_get_nameunit(dev));
664
665	INIT_DEBUGOUT("em_attach: end");
666
667	return (0);
668
669err_late:
670	em_free_transmit_structures(adapter);
671	em_free_receive_structures(adapter);
672	em_release_hw_control(adapter);
673	if (adapter->ifp != NULL)
674		if_free(adapter->ifp);
675err_pci:
676	em_free_pci_resources(adapter);
677	EM_CORE_LOCK_DESTROY(adapter);
678
679	return (error);
680}
681
682/*********************************************************************
683 *  Device removal routine
684 *
685 *  The detach entry point is called when the driver is being removed.
686 *  This routine stops the adapter and deallocates all the resources
687 *  that were allocated for driver operation.
688 *
689 *  return 0 on success, positive on failure
690 *********************************************************************/
691
692static int
693em_detach(device_t dev)
694{
695	struct adapter	*adapter = device_get_softc(dev);
696	struct ifnet	*ifp = adapter->ifp;
697
698	INIT_DEBUGOUT("em_detach: begin");
699
700	/* Make sure VLANS are not using driver */
701	if (adapter->ifp->if_vlantrunk != NULL) {
702		device_printf(dev,"Vlan in use, detach first\n");
703		return (EBUSY);
704	}
705
706#ifdef DEVICE_POLLING
707	if (ifp->if_capenable & IFCAP_POLLING)
708		ether_poll_deregister(ifp);
709#endif
710
711	if (adapter->led_dev != NULL)
712		led_destroy(adapter->led_dev);
713
714	EM_CORE_LOCK(adapter);
715	adapter->in_detach = 1;
716	em_stop(adapter);
717	EM_CORE_UNLOCK(adapter);
718	EM_CORE_LOCK_DESTROY(adapter);
719
720	e1000_phy_hw_reset(&adapter->hw);
721
722	em_release_manageability(adapter);
723	em_release_hw_control(adapter);
724
725	/* Unregister VLAN events */
726	if (adapter->vlan_attach != NULL)
727		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
728	if (adapter->vlan_detach != NULL)
729		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
730
731	ether_ifdetach(adapter->ifp);
732	callout_drain(&adapter->timer);
733
734	em_free_pci_resources(adapter);
735	bus_generic_detach(dev);
736	if_free(ifp);
737
738	em_free_transmit_structures(adapter);
739	em_free_receive_structures(adapter);
740
741	em_release_hw_control(adapter);
742
743	return (0);
744}
745
746/*********************************************************************
747 *
748 *  Shutdown entry point
749 *
750 **********************************************************************/
751
752static int
753em_shutdown(device_t dev)
754{
755	return em_suspend(dev);
756}
757
758/*
759 * Suspend/resume device methods.
760 */
761static int
762em_suspend(device_t dev)
763{
764	struct adapter *adapter = device_get_softc(dev);
765
766	EM_CORE_LOCK(adapter);
767
768        em_release_manageability(adapter);
769	em_release_hw_control(adapter);
770	em_enable_wakeup(dev);
771
772	EM_CORE_UNLOCK(adapter);
773
774	return bus_generic_suspend(dev);
775}
776
777static int
778em_resume(device_t dev)
779{
780	struct adapter *adapter = device_get_softc(dev);
781	struct ifnet *ifp = adapter->ifp;
782
783	EM_CORE_LOCK(adapter);
784	em_init_locked(adapter);
785	em_init_manageability(adapter);
786	EM_CORE_UNLOCK(adapter);
787	em_start(ifp);
788
789	return bus_generic_resume(dev);
790}
791
792
793/*********************************************************************
794 *  Transmit entry point
795 *
796 *  em_start is called by the stack to initiate a transmit.
797 *  The driver will remain in this routine as long as there are
798 *  packets to transmit and transmit resources are available.
799 *  In case resources are not available stack is notified and
800 *  the packet is requeued.
801 **********************************************************************/
802
803#ifdef EM_MULTIQUEUE
804static int
805em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
806{
807	struct adapter  *adapter = txr->adapter;
808        struct mbuf     *next;
809        int             err = 0, enq = 0;
810
811	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
812	    IFF_DRV_RUNNING || adapter->link_active == 0) {
813		if (m != NULL)
814			err = drbr_enqueue(ifp, txr->br, m);
815		return (err);
816	}
817
818        /* Call cleanup if number of TX descriptors low */
819	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
820		em_txeof(txr);
821
822	enq = 0;
823	if (m == NULL) {
824		next = drbr_dequeue(ifp, txr->br);
825	} else if (drbr_needs_enqueue(ifp, txr->br)) {
826		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
827			return (err);
828		next = drbr_dequeue(ifp, txr->br);
829	} else
830		next = m;
831
832	/* Process the queue */
833	while (next != NULL) {
834		if ((err = em_xmit(txr, &next)) != 0) {
835                        if (next != NULL)
836                                err = drbr_enqueue(ifp, txr->br, next);
837                        break;
838		}
839		enq++;
840		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
841		ETHER_BPF_MTAP(ifp, next);
842		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
843                        break;
844		if (txr->tx_avail < EM_MAX_SCATTER) {
845			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
846			break;
847		}
848		next = drbr_dequeue(ifp, txr->br);
849	}
850
851	if (enq > 0) {
852                /* Set the watchdog */
853                txr->watchdog_check = TRUE;
854		txr->watchdog_time = ticks;
855	}
856	return (err);
857}
858
859/*
860** Multiqueue capable stack interface, this is not
861** yet truely multiqueue, but that is coming...
862*/
863static int
864em_mq_start(struct ifnet *ifp, struct mbuf *m)
865{
866	struct adapter	*adapter = ifp->if_softc;
867	struct tx_ring	*txr;
868	int 		i, error = 0;
869
870	/* Which queue to use */
871	if ((m->m_flags & M_FLOWID) != 0)
872                i = m->m_pkthdr.flowid % adapter->num_queues;
873	else
874		i = curcpu % adapter->num_queues;
875
876	txr = &adapter->tx_rings[i];
877
878	if (EM_TX_TRYLOCK(txr)) {
879		error = em_mq_start_locked(ifp, txr, m);
880		EM_TX_UNLOCK(txr);
881	} else
882		error = drbr_enqueue(ifp, txr->br, m);
883
884	return (error);
885}
886
887/*
888** Flush all ring buffers
889*/
890static void
891em_qflush(struct ifnet *ifp)
892{
893	struct adapter  *adapter = ifp->if_softc;
894	struct tx_ring  *txr = adapter->tx_rings;
895	struct mbuf     *m;
896
897	for (int i = 0; i < adapter->num_queues; i++, txr++) {
898		EM_TX_LOCK(txr);
899		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
900			m_freem(m);
901		EM_TX_UNLOCK(txr);
902	}
903	if_qflush(ifp);
904}
905
906#endif /* EM_MULTIQUEUE */
907
908static void
909em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
910{
911	struct adapter	*adapter = ifp->if_softc;
912	struct mbuf	*m_head;
913
914	EM_TX_LOCK_ASSERT(txr);
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING)
918		return;
919
920	if (!adapter->link_active)
921		return;
922
923        /* Call cleanup if number of TX descriptors low */
924	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
925		em_txeof(txr);
926
927	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
928		if (txr->tx_avail < EM_MAX_SCATTER) {
929			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
930			break;
931		}
932                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
933		if (m_head == NULL)
934			break;
935		/*
936		 *  Encapsulation can modify our pointer, and or make it
937		 *  NULL on failure.  In that event, we can't requeue.
938		 */
939		if (em_xmit(txr, &m_head)) {
940			if (m_head == NULL)
941				break;
942			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
943			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
944			break;
945		}
946
947		/* Send a copy of the frame to the BPF listener */
948		ETHER_BPF_MTAP(ifp, m_head);
949
950		/* Set timeout in case hardware has problems transmitting. */
951		txr->watchdog_time = ticks;
952		txr->watchdog_check = TRUE;
953	}
954
955	return;
956}
957
958static void
959em_start(struct ifnet *ifp)
960{
961	struct adapter	*adapter = ifp->if_softc;
962	struct tx_ring	*txr = adapter->tx_rings;
963
964	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
965		EM_TX_LOCK(txr);
966		em_start_locked(ifp, txr);
967		EM_TX_UNLOCK(txr);
968	}
969	return;
970}
971
972/*********************************************************************
973 *  Ioctl entry point
974 *
975 *  em_ioctl is called when the user wants to configure the
976 *  interface.
977 *
978 *  return 0 on success, positive on failure
979 **********************************************************************/
980
981static int
982em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
983{
984	struct adapter	*adapter = ifp->if_softc;
985	struct ifreq *ifr = (struct ifreq *)data;
986#ifdef INET
987	struct ifaddr *ifa = (struct ifaddr *)data;
988#endif
989	int error = 0;
990
991	if (adapter->in_detach)
992		return (error);
993
994	switch (command) {
995	case SIOCSIFADDR:
996#ifdef INET
997		if (ifa->ifa_addr->sa_family == AF_INET) {
998			/*
999			 * XXX
1000			 * Since resetting hardware takes a very long time
1001			 * and results in link renegotiation we only
1002			 * initialize the hardware only when it is absolutely
1003			 * required.
1004			 */
1005			ifp->if_flags |= IFF_UP;
1006			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1007				EM_CORE_LOCK(adapter);
1008				em_init_locked(adapter);
1009				EM_CORE_UNLOCK(adapter);
1010			}
1011			arp_ifinit(ifp, ifa);
1012		} else
1013#endif
1014			error = ether_ioctl(ifp, command, data);
1015		break;
1016	case SIOCSIFMTU:
1017	    {
1018		int max_frame_size;
1019
1020		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1021
1022		EM_CORE_LOCK(adapter);
1023		switch (adapter->hw.mac.type) {
1024		case e1000_82571:
1025		case e1000_82572:
1026		case e1000_ich9lan:
1027		case e1000_ich10lan:
1028		case e1000_82574:
1029		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1030			max_frame_size = 9234;
1031			break;
1032		case e1000_pchlan:
1033			max_frame_size = 4096;
1034			break;
1035			/* Adapters that do not support jumbo frames */
1036		case e1000_82583:
1037		case e1000_ich8lan:
1038			max_frame_size = ETHER_MAX_LEN;
1039			break;
1040		default:
1041			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1042		}
1043		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1044		    ETHER_CRC_LEN) {
1045			EM_CORE_UNLOCK(adapter);
1046			error = EINVAL;
1047			break;
1048		}
1049
1050		ifp->if_mtu = ifr->ifr_mtu;
1051		adapter->max_frame_size =
1052		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1053		em_init_locked(adapter);
1054		EM_CORE_UNLOCK(adapter);
1055		break;
1056	    }
1057	case SIOCSIFFLAGS:
1058		IOCTL_DEBUGOUT("ioctl rcv'd:\
1059		    SIOCSIFFLAGS (Set Interface Flags)");
1060		EM_CORE_LOCK(adapter);
1061		if (ifp->if_flags & IFF_UP) {
1062			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1063				if ((ifp->if_flags ^ adapter->if_flags) &
1064				    (IFF_PROMISC | IFF_ALLMULTI)) {
1065					em_disable_promisc(adapter);
1066					em_set_promisc(adapter);
1067				}
1068			} else
1069				em_init_locked(adapter);
1070		} else
1071			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1072				em_stop(adapter);
1073		adapter->if_flags = ifp->if_flags;
1074		EM_CORE_UNLOCK(adapter);
1075		break;
1076	case SIOCADDMULTI:
1077	case SIOCDELMULTI:
1078		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1079		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1080			EM_CORE_LOCK(adapter);
1081			em_disable_intr(adapter);
1082			em_set_multi(adapter);
1083#ifdef DEVICE_POLLING
1084			if (!(ifp->if_capenable & IFCAP_POLLING))
1085#endif
1086				em_enable_intr(adapter);
1087			EM_CORE_UNLOCK(adapter);
1088		}
1089		break;
1090	case SIOCSIFMEDIA:
1091		/* Check SOL/IDER usage */
1092		EM_CORE_LOCK(adapter);
1093		if (e1000_check_reset_block(&adapter->hw)) {
1094			EM_CORE_UNLOCK(adapter);
1095			device_printf(adapter->dev, "Media change is"
1096			    " blocked due to SOL/IDER session.\n");
1097			break;
1098		}
1099		EM_CORE_UNLOCK(adapter);
1100	case SIOCGIFMEDIA:
1101		IOCTL_DEBUGOUT("ioctl rcv'd: \
1102		    SIOCxIFMEDIA (Get/Set Interface Media)");
1103		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1104		break;
1105	case SIOCSIFCAP:
1106	    {
1107		int mask, reinit;
1108
1109		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1110		reinit = 0;
1111		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1112#ifdef DEVICE_POLLING
1113		if (mask & IFCAP_POLLING) {
1114			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1115				error = ether_poll_register(em_poll, ifp);
1116				if (error)
1117					return (error);
1118				EM_CORE_LOCK(adapter);
1119				em_disable_intr(adapter);
1120				ifp->if_capenable |= IFCAP_POLLING;
1121				EM_CORE_UNLOCK(adapter);
1122			} else {
1123				error = ether_poll_deregister(ifp);
1124				/* Enable interrupt even in error case */
1125				EM_CORE_LOCK(adapter);
1126				em_enable_intr(adapter);
1127				ifp->if_capenable &= ~IFCAP_POLLING;
1128				EM_CORE_UNLOCK(adapter);
1129			}
1130		}
1131#endif
1132		if (mask & IFCAP_HWCSUM) {
1133			ifp->if_capenable ^= IFCAP_HWCSUM;
1134			reinit = 1;
1135		}
1136		if (mask & IFCAP_TSO4) {
1137			ifp->if_capenable ^= IFCAP_TSO4;
1138			reinit = 1;
1139		}
1140		if (mask & IFCAP_VLAN_HWTAGGING) {
1141			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1142			reinit = 1;
1143		}
1144		if (mask & IFCAP_VLAN_HWFILTER) {
1145			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1146			reinit = 1;
1147		}
1148		if ((mask & IFCAP_WOL) &&
1149		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1150			if (mask & IFCAP_WOL_MCAST)
1151				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1152			if (mask & IFCAP_WOL_MAGIC)
1153				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1154		}
1155		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1156			em_init(adapter);
1157		VLAN_CAPABILITIES(ifp);
1158		break;
1159	    }
1160
1161	default:
1162		error = ether_ioctl(ifp, command, data);
1163		break;
1164	}
1165
1166	return (error);
1167}
1168
1169
1170/*********************************************************************
1171 *  Init entry point
1172 *
1173 *  This routine is used in two ways. It is used by the stack as
1174 *  init entry point in network interface structure. It is also used
1175 *  by the driver as a hw/sw initialization routine to get to a
1176 *  consistent state.
1177 *
1178 *  return 0 on success, positive on failure
1179 **********************************************************************/
1180
1181static void
1182em_init_locked(struct adapter *adapter)
1183{
1184	struct ifnet	*ifp = adapter->ifp;
1185	device_t	dev = adapter->dev;
1186	u32		pba;
1187
1188	INIT_DEBUGOUT("em_init: begin");
1189
1190	EM_CORE_LOCK_ASSERT(adapter);
1191
1192	em_disable_intr(adapter);
1193	callout_stop(&adapter->timer);
1194
1195	/*
1196	 * Packet Buffer Allocation (PBA)
1197	 * Writing PBA sets the receive portion of the buffer
1198	 * the remainder is used for the transmit buffer.
1199	 */
1200	switch (adapter->hw.mac.type) {
1201	/* Total Packet Buffer on these is 48K */
1202	case e1000_82571:
1203	case e1000_82572:
1204	case e1000_80003es2lan:
1205			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1206		break;
1207	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1208			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1209		break;
1210	case e1000_82574:
1211	case e1000_82583:
1212			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1213		break;
1214	case e1000_ich9lan:
1215	case e1000_ich10lan:
1216	case e1000_pchlan:
1217		pba = E1000_PBA_10K;
1218		break;
1219	case e1000_ich8lan:
1220		pba = E1000_PBA_8K;
1221		break;
1222	default:
1223		if (adapter->max_frame_size > 8192)
1224			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1225		else
1226			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1227	}
1228
1229	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1230	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1231
1232	/* Get the latest mac address, User can use a LAA */
1233        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1234              ETHER_ADDR_LEN);
1235
1236	/* Put the address into the Receive Address Array */
1237	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1238
1239	/*
1240	 * With the 82571 adapter, RAR[0] may be overwritten
1241	 * when the other port is reset, we make a duplicate
1242	 * in RAR[14] for that eventuality, this assures
1243	 * the interface continues to function.
1244	 */
1245	if (adapter->hw.mac.type == e1000_82571) {
1246		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1247		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1248		    E1000_RAR_ENTRIES - 1);
1249	}
1250
1251	/* Initialize the hardware */
1252	em_reset(adapter);
1253	em_update_link_status(adapter);
1254
1255	/* Setup VLAN support, basic and offload if available */
1256	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1257
1258	/* Use real VLAN Filter support? */
1259	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1260		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1261			/* Use real VLAN Filter support */
1262			em_setup_vlan_hw_support(adapter);
1263		else {
1264			u32 ctrl;
1265			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1266			ctrl |= E1000_CTRL_VME;
1267			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1268		}
1269	}
1270
1271	/* Set hardware offload abilities */
1272	ifp->if_hwassist = 0;
1273	if (ifp->if_capenable & IFCAP_TXCSUM)
1274		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1275	if (ifp->if_capenable & IFCAP_TSO4)
1276		ifp->if_hwassist |= CSUM_TSO;
1277
1278	/* Configure for OS presence */
1279	em_init_manageability(adapter);
1280
1281	/* Prepare transmit descriptors and buffers */
1282	em_setup_transmit_structures(adapter);
1283	em_initialize_transmit_unit(adapter);
1284
1285	/* Setup Multicast table */
1286	em_set_multi(adapter);
1287
1288	/* Prepare receive descriptors and buffers */
1289	if (em_setup_receive_structures(adapter)) {
1290		device_printf(dev, "Could not setup receive structures\n");
1291		em_stop(adapter);
1292		return;
1293	}
1294	em_initialize_receive_unit(adapter);
1295
1296	/* Don't lose promiscuous settings */
1297	em_set_promisc(adapter);
1298
1299	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1300	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1301
1302	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1303	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1304
1305	/* MSI/X configuration for 82574 */
1306	if (adapter->hw.mac.type == e1000_82574) {
1307		int tmp;
1308		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1309		tmp |= E1000_CTRL_EXT_PBA_CLR;
1310		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1311		/* Set the IVAR - interrupt vector routing. */
1312		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1313	}
1314
1315#ifdef DEVICE_POLLING
1316	/*
1317	 * Only enable interrupts if we are not polling, make sure
1318	 * they are off otherwise.
1319	 */
1320	if (ifp->if_capenable & IFCAP_POLLING)
1321		em_disable_intr(adapter);
1322	else
1323#endif /* DEVICE_POLLING */
1324		em_enable_intr(adapter);
1325
1326	/* AMT based hardware can now take control from firmware */
1327	if (adapter->has_manage && adapter->has_amt)
1328		em_get_hw_control(adapter);
1329
1330	/* Don't reset the phy next time init gets called */
1331	adapter->hw.phy.reset_disable = TRUE;
1332}
1333
1334static void
1335em_init(void *arg)
1336{
1337	struct adapter *adapter = arg;
1338
1339	EM_CORE_LOCK(adapter);
1340	em_init_locked(adapter);
1341	EM_CORE_UNLOCK(adapter);
1342}
1343
1344
1345#ifdef DEVICE_POLLING
1346/*********************************************************************
1347 *
1348 *  Legacy polling routine: note this only works with single queue
1349 *
1350 *********************************************************************/
1351static int
1352em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1353{
1354	struct adapter *adapter = ifp->if_softc;
1355	struct tx_ring	*txr = adapter->tx_rings;
1356	struct rx_ring	*rxr = adapter->rx_rings;
1357	u32		reg_icr;
1358	int		rx_done;
1359
1360	EM_CORE_LOCK(adapter);
1361	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1362		EM_CORE_UNLOCK(adapter);
1363		return (0);
1364	}
1365
1366	if (cmd == POLL_AND_CHECK_STATUS) {
1367		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1368		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1369			callout_stop(&adapter->timer);
1370			adapter->hw.mac.get_link_status = 1;
1371			em_update_link_status(adapter);
1372			callout_reset(&adapter->timer, hz,
1373			    em_local_timer, adapter);
1374		}
1375	}
1376	EM_CORE_UNLOCK(adapter);
1377
1378	em_rxeof(rxr, count, &rx_done);
1379
1380	EM_TX_LOCK(txr);
1381	em_txeof(txr);
1382#ifdef EM_MULTIQUEUE
1383	if (!drbr_empty(ifp, txr->br))
1384		em_mq_start_locked(ifp, txr, NULL);
1385#else
1386	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1387		em_start_locked(ifp, txr);
1388#endif
1389	EM_TX_UNLOCK(txr);
1390
1391	return (rx_done);
1392}
1393#endif /* DEVICE_POLLING */
1394
1395
1396/*********************************************************************
1397 *
1398 *  Fast Legacy/MSI Combined Interrupt Service routine
1399 *
1400 *********************************************************************/
1401static int
1402em_irq_fast(void *arg)
1403{
1404	struct adapter	*adapter = arg;
1405	struct ifnet	*ifp;
1406	u32		reg_icr;
1407
1408	ifp = adapter->ifp;
1409
1410	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1411
1412	/* Hot eject?  */
1413	if (reg_icr == 0xffffffff)
1414		return FILTER_STRAY;
1415
1416	/* Definitely not our interrupt.  */
1417	if (reg_icr == 0x0)
1418		return FILTER_STRAY;
1419
1420	/*
1421	 * Starting with the 82571 chip, bit 31 should be used to
1422	 * determine whether the interrupt belongs to us.
1423	 */
1424	if (adapter->hw.mac.type >= e1000_82571 &&
1425	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1426		return FILTER_STRAY;
1427
1428	em_disable_intr(adapter);
1429	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1430
1431	/* Link status change */
1432	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1433		adapter->hw.mac.get_link_status = 1;
1434		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1435	}
1436
1437	if (reg_icr & E1000_ICR_RXO)
1438		adapter->rx_overruns++;
1439	return FILTER_HANDLED;
1440}
1441
1442/* Combined RX/TX handler, used by Legacy and MSI */
1443static void
1444em_handle_que(void *context, int pending)
1445{
1446	struct adapter	*adapter = context;
1447	struct ifnet	*ifp = adapter->ifp;
1448	struct tx_ring	*txr = adapter->tx_rings;
1449	struct rx_ring	*rxr = adapter->rx_rings;
1450	bool		more;
1451
1452
1453	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1454		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1455
1456		EM_TX_LOCK(txr);
1457		if (em_txeof(txr))
1458			more = TRUE;
1459#ifdef EM_MULTIQUEUE
1460		if (!drbr_empty(ifp, txr->br))
1461			em_mq_start_locked(ifp, txr, NULL);
1462#else
1463		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1464			em_start_locked(ifp, txr);
1465#endif
1466		EM_TX_UNLOCK(txr);
1467		if (more) {
1468			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1469			return;
1470		}
1471	}
1472
1473	em_enable_intr(adapter);
1474	return;
1475}
1476
1477
1478/*********************************************************************
1479 *
1480 *  MSIX Interrupt Service Routines
1481 *
1482 **********************************************************************/
1483static void
1484em_msix_tx(void *arg)
1485{
1486	struct tx_ring *txr = arg;
1487	struct adapter *adapter = txr->adapter;
1488	bool		more;
1489
1490	++txr->tx_irq;
1491	EM_TX_LOCK(txr);
1492	more = em_txeof(txr);
1493	EM_TX_UNLOCK(txr);
1494	if (more)
1495		taskqueue_enqueue(txr->tq, &txr->tx_task);
1496	else
1497		/* Reenable this interrupt */
1498		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1499	return;
1500}
1501
1502/*********************************************************************
1503 *
1504 *  MSIX RX Interrupt Service routine
1505 *
1506 **********************************************************************/
1507
1508static void
1509em_msix_rx(void *arg)
1510{
1511	struct rx_ring	*rxr = arg;
1512	struct adapter	*adapter = rxr->adapter;
1513	bool		more;
1514
1515	++rxr->rx_irq;
1516	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1517	if (more)
1518		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1519	else
1520		/* Reenable this interrupt */
1521		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1522	return;
1523}
1524
1525/*********************************************************************
1526 *
1527 *  MSIX Link Fast Interrupt Service routine
1528 *
1529 **********************************************************************/
1530static void
1531em_msix_link(void *arg)
1532{
1533	struct adapter	*adapter = arg;
1534	u32		reg_icr;
1535
1536	++adapter->link_irq;
1537	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538
1539	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540		adapter->hw.mac.get_link_status = 1;
1541		em_handle_link(adapter, 0);
1542	} else
1543		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1544		    EM_MSIX_LINK | E1000_IMS_LSC);
1545	return;
1546}
1547
1548static void
1549em_handle_rx(void *context, int pending)
1550{
1551	struct rx_ring	*rxr = context;
1552	struct adapter	*adapter = rxr->adapter;
1553        bool            more;
1554
1555	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1556	if (more)
1557		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1558	else
1559		/* Reenable this interrupt */
1560		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1561}
1562
1563static void
1564em_handle_tx(void *context, int pending)
1565{
1566	struct tx_ring	*txr = context;
1567	struct adapter	*adapter = txr->adapter;
1568	struct ifnet	*ifp = adapter->ifp;
1569
1570	if (!EM_TX_TRYLOCK(txr))
1571		return;
1572
1573	em_txeof(txr);
1574
1575#ifdef EM_MULTIQUEUE
1576	if (!drbr_empty(ifp, txr->br))
1577		em_mq_start_locked(ifp, txr, NULL);
1578#else
1579	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1580		em_start_locked(ifp, txr);
1581#endif
1582	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1583	EM_TX_UNLOCK(txr);
1584}
1585
1586static void
1587em_handle_link(void *context, int pending)
1588{
1589	struct adapter	*adapter = context;
1590	struct ifnet *ifp = adapter->ifp;
1591
1592	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1593		return;
1594
1595	EM_CORE_LOCK(adapter);
1596	callout_stop(&adapter->timer);
1597	em_update_link_status(adapter);
1598	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1599	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1600	    EM_MSIX_LINK | E1000_IMS_LSC);
1601	EM_CORE_UNLOCK(adapter);
1602}
1603
1604
1605/*********************************************************************
1606 *
1607 *  Media Ioctl callback
1608 *
1609 *  This routine is called whenever the user queries the status of
1610 *  the interface using ifconfig.
1611 *
1612 **********************************************************************/
1613static void
1614em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1615{
1616	struct adapter *adapter = ifp->if_softc;
1617	u_char fiber_type = IFM_1000_SX;
1618
1619	INIT_DEBUGOUT("em_media_status: begin");
1620
1621	EM_CORE_LOCK(adapter);
1622	em_update_link_status(adapter);
1623
1624	ifmr->ifm_status = IFM_AVALID;
1625	ifmr->ifm_active = IFM_ETHER;
1626
1627	if (!adapter->link_active) {
1628		EM_CORE_UNLOCK(adapter);
1629		return;
1630	}
1631
1632	ifmr->ifm_status |= IFM_ACTIVE;
1633
1634	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1635	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1636		ifmr->ifm_active |= fiber_type | IFM_FDX;
1637	} else {
1638		switch (adapter->link_speed) {
1639		case 10:
1640			ifmr->ifm_active |= IFM_10_T;
1641			break;
1642		case 100:
1643			ifmr->ifm_active |= IFM_100_TX;
1644			break;
1645		case 1000:
1646			ifmr->ifm_active |= IFM_1000_T;
1647			break;
1648		}
1649		if (adapter->link_duplex == FULL_DUPLEX)
1650			ifmr->ifm_active |= IFM_FDX;
1651		else
1652			ifmr->ifm_active |= IFM_HDX;
1653	}
1654	EM_CORE_UNLOCK(adapter);
1655}
1656
1657/*********************************************************************
1658 *
1659 *  Media Ioctl callback
1660 *
1661 *  This routine is called when the user changes speed/duplex using
1662 *  media/mediopt option with ifconfig.
1663 *
1664 **********************************************************************/
1665static int
1666em_media_change(struct ifnet *ifp)
1667{
1668	struct adapter *adapter = ifp->if_softc;
1669	struct ifmedia  *ifm = &adapter->media;
1670
1671	INIT_DEBUGOUT("em_media_change: begin");
1672
1673	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1674		return (EINVAL);
1675
1676	EM_CORE_LOCK(adapter);
1677	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1678	case IFM_AUTO:
1679		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1680		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1681		break;
1682	case IFM_1000_LX:
1683	case IFM_1000_SX:
1684	case IFM_1000_T:
1685		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1686		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1687		break;
1688	case IFM_100_TX:
1689		adapter->hw.mac.autoneg = FALSE;
1690		adapter->hw.phy.autoneg_advertised = 0;
1691		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1692			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1693		else
1694			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1695		break;
1696	case IFM_10_T:
1697		adapter->hw.mac.autoneg = FALSE;
1698		adapter->hw.phy.autoneg_advertised = 0;
1699		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1700			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1701		else
1702			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1703		break;
1704	default:
1705		device_printf(adapter->dev, "Unsupported media type\n");
1706	}
1707
1708	/* As the speed/duplex settings my have changed we need to
1709	 * reset the PHY.
1710	 */
1711	adapter->hw.phy.reset_disable = FALSE;
1712
1713	em_init_locked(adapter);
1714	EM_CORE_UNLOCK(adapter);
1715
1716	return (0);
1717}
1718
1719/*********************************************************************
1720 *
1721 *  This routine maps the mbufs to tx descriptors.
1722 *
1723 *  return 0 on success, positive on failure
1724 **********************************************************************/
1725
1726static int
1727em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1728{
1729	struct adapter		*adapter = txr->adapter;
1730	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1731	bus_dmamap_t		map;
1732	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1733	struct e1000_tx_desc	*ctxd = NULL;
1734	struct mbuf		*m_head;
1735	u32			txd_upper, txd_lower, txd_used, txd_saved;
1736	int			nsegs, i, j, first, last = 0;
1737	int			error, do_tso, tso_desc = 0;
1738
1739	m_head = *m_headp;
1740	txd_upper = txd_lower = txd_used = txd_saved = 0;
1741	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1742
1743	/*
1744	** When doing checksum offload, it is critical to
1745	** make sure the first mbuf has more than header,
1746	** because that routine expects data to be present.
1747	*/
1748	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1749	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1750		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1751		*m_headp = m_head;
1752		if (m_head == NULL)
1753			return (ENOBUFS);
1754	}
1755
1756	/*
1757	 * TSO workaround:
1758	 *  If an mbuf is only header we need
1759	 *     to pull 4 bytes of data into it.
1760	 */
1761	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1762		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1763		*m_headp = m_head;
1764		if (m_head == NULL)
1765			return (ENOBUFS);
1766	}
1767
1768	/*
1769	 * Map the packet for DMA
1770	 *
1771	 * Capture the first descriptor index,
1772	 * this descriptor will have the index
1773	 * of the EOP which is the only one that
1774	 * now gets a DONE bit writeback.
1775	 */
1776	first = txr->next_avail_desc;
1777	tx_buffer = &txr->tx_buffers[first];
1778	tx_buffer_mapped = tx_buffer;
1779	map = tx_buffer->map;
1780
1781	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1782	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1783
1784	/*
1785	 * There are two types of errors we can (try) to handle:
1786	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1787	 *   out of segments.  Defragment the mbuf chain and try again.
1788	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1789	 *   at this point in time.  Defer sending and try again later.
1790	 * All other errors, in particular EINVAL, are fatal and prevent the
1791	 * mbuf chain from ever going through.  Drop it and report error.
1792	 */
1793	if (error == EFBIG) {
1794		struct mbuf *m;
1795
1796		m = m_defrag(*m_headp, M_DONTWAIT);
1797		if (m == NULL) {
1798			adapter->mbuf_alloc_failed++;
1799			m_freem(*m_headp);
1800			*m_headp = NULL;
1801			return (ENOBUFS);
1802		}
1803		*m_headp = m;
1804
1805		/* Try it again */
1806		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1807		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1808
1809		if (error) {
1810			adapter->no_tx_dma_setup++;
1811			m_freem(*m_headp);
1812			*m_headp = NULL;
1813			return (error);
1814		}
1815	} else if (error != 0) {
1816		adapter->no_tx_dma_setup++;
1817		return (error);
1818	}
1819
1820	/*
1821	 * TSO Hardware workaround, if this packet is not
1822	 * TSO, and is only a single descriptor long, and
1823	 * it follows a TSO burst, then we need to add a
1824	 * sentinel descriptor to prevent premature writeback.
1825	 */
1826	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1827		if (nsegs == 1)
1828			tso_desc = TRUE;
1829		txr->tx_tso = FALSE;
1830	}
1831
1832        if (nsegs > (txr->tx_avail - 2)) {
1833                txr->no_desc_avail++;
1834		bus_dmamap_unload(txr->txtag, map);
1835		return (ENOBUFS);
1836        }
1837	m_head = *m_headp;
1838
1839	/* Do hardware assists */
1840#if __FreeBSD_version >= 700000
1841	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1842		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1843		if (error != TRUE)
1844			return (ENXIO); /* something foobar */
1845		/* we need to make a final sentinel transmit desc */
1846		tso_desc = TRUE;
1847	} else
1848#endif
1849	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1850		em_transmit_checksum_setup(txr,  m_head,
1851		    &txd_upper, &txd_lower);
1852
1853	i = txr->next_avail_desc;
1854
1855	/* Set up our transmit descriptors */
1856	for (j = 0; j < nsegs; j++) {
1857		bus_size_t seg_len;
1858		bus_addr_t seg_addr;
1859
1860		tx_buffer = &txr->tx_buffers[i];
1861		ctxd = &txr->tx_base[i];
1862		seg_addr = segs[j].ds_addr;
1863		seg_len  = segs[j].ds_len;
1864		/*
1865		** TSO Workaround:
1866		** If this is the last descriptor, we want to
1867		** split it so we have a small final sentinel
1868		*/
1869		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1870			seg_len -= 4;
1871			ctxd->buffer_addr = htole64(seg_addr);
1872			ctxd->lower.data = htole32(
1873			adapter->txd_cmd | txd_lower | seg_len);
1874			ctxd->upper.data =
1875			    htole32(txd_upper);
1876			if (++i == adapter->num_tx_desc)
1877				i = 0;
1878			/* Now make the sentinel */
1879			++txd_used; /* using an extra txd */
1880			ctxd = &txr->tx_base[i];
1881			tx_buffer = &txr->tx_buffers[i];
1882			ctxd->buffer_addr =
1883			    htole64(seg_addr + seg_len);
1884			ctxd->lower.data = htole32(
1885			adapter->txd_cmd | txd_lower | 4);
1886			ctxd->upper.data =
1887			    htole32(txd_upper);
1888			last = i;
1889			if (++i == adapter->num_tx_desc)
1890				i = 0;
1891		} else {
1892			ctxd->buffer_addr = htole64(seg_addr);
1893			ctxd->lower.data = htole32(
1894			adapter->txd_cmd | txd_lower | seg_len);
1895			ctxd->upper.data =
1896			    htole32(txd_upper);
1897			last = i;
1898			if (++i == adapter->num_tx_desc)
1899				i = 0;
1900		}
1901		tx_buffer->m_head = NULL;
1902		tx_buffer->next_eop = -1;
1903	}
1904
1905	txr->next_avail_desc = i;
1906	txr->tx_avail -= nsegs;
1907	if (tso_desc) /* TSO used an extra for sentinel */
1908		txr->tx_avail -= txd_used;
1909
1910	if (m_head->m_flags & M_VLANTAG) {
1911		/* Set the vlan id. */
1912		ctxd->upper.fields.special =
1913		    htole16(m_head->m_pkthdr.ether_vtag);
1914                /* Tell hardware to add tag */
1915                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1916        }
1917
1918        tx_buffer->m_head = m_head;
1919	tx_buffer_mapped->map = tx_buffer->map;
1920	tx_buffer->map = map;
1921        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1922
1923        /*
1924         * Last Descriptor of Packet
1925	 * needs End Of Packet (EOP)
1926	 * and Report Status (RS)
1927         */
1928        ctxd->lower.data |=
1929	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1930	/*
1931	 * Keep track in the first buffer which
1932	 * descriptor will be written back
1933	 */
1934	tx_buffer = &txr->tx_buffers[first];
1935	tx_buffer->next_eop = last;
1936
1937	/*
1938	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1939	 * that this frame is available to transmit.
1940	 */
1941	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1942	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1943	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1944
1945	return (0);
1946}
1947
1948static void
1949em_set_promisc(struct adapter *adapter)
1950{
1951	struct ifnet	*ifp = adapter->ifp;
1952	u32		reg_rctl;
1953
1954	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1955
1956	if (ifp->if_flags & IFF_PROMISC) {
1957		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1958		/* Turn this on if you want to see bad packets */
1959		if (em_debug_sbp)
1960			reg_rctl |= E1000_RCTL_SBP;
1961		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1962	} else if (ifp->if_flags & IFF_ALLMULTI) {
1963		reg_rctl |= E1000_RCTL_MPE;
1964		reg_rctl &= ~E1000_RCTL_UPE;
1965		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1966	}
1967}
1968
1969static void
1970em_disable_promisc(struct adapter *adapter)
1971{
1972	u32	reg_rctl;
1973
1974	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1975
1976	reg_rctl &=  (~E1000_RCTL_UPE);
1977	reg_rctl &=  (~E1000_RCTL_MPE);
1978	reg_rctl &=  (~E1000_RCTL_SBP);
1979	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1980}
1981
1982
1983/*********************************************************************
1984 *  Multicast Update
1985 *
1986 *  This routine is called whenever multicast address list is updated.
1987 *
1988 **********************************************************************/
1989
1990static void
1991em_set_multi(struct adapter *adapter)
1992{
1993	struct ifnet	*ifp = adapter->ifp;
1994	struct ifmultiaddr *ifma;
1995	u32 reg_rctl = 0;
1996	u8  *mta; /* Multicast array memory */
1997	int mcnt = 0;
1998
1999	IOCTL_DEBUGOUT("em_set_multi: begin");
2000
2001	if (adapter->hw.mac.type == e1000_82542 &&
2002	    adapter->hw.revision_id == E1000_REVISION_2) {
2003		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2004		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2005			e1000_pci_clear_mwi(&adapter->hw);
2006		reg_rctl |= E1000_RCTL_RST;
2007		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2008		msec_delay(5);
2009	}
2010
2011	/* Allocate temporary memory to setup array */
2012	mta = malloc(sizeof(u8) *
2013	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2014	    M_DEVBUF, M_NOWAIT | M_ZERO);
2015	if (mta == NULL)
2016		panic("em_set_multi memory failure\n");
2017
2018#if __FreeBSD_version < 800000
2019	IF_ADDR_LOCK(ifp);
2020#else
2021	if_maddr_rlock(ifp);
2022#endif
2023	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2024		if (ifma->ifma_addr->sa_family != AF_LINK)
2025			continue;
2026
2027		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2028			break;
2029
2030		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2031		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2032		mcnt++;
2033	}
2034#if __FreeBSD_version < 800000
2035	IF_ADDR_UNLOCK(ifp);
2036#else
2037	if_maddr_runlock(ifp);
2038#endif
2039	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2040		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2041		reg_rctl |= E1000_RCTL_MPE;
2042		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2043	} else
2044		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2045
2046	if (adapter->hw.mac.type == e1000_82542 &&
2047	    adapter->hw.revision_id == E1000_REVISION_2) {
2048		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2049		reg_rctl &= ~E1000_RCTL_RST;
2050		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2051		msec_delay(5);
2052		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2053			e1000_pci_set_mwi(&adapter->hw);
2054	}
2055	free(mta, M_DEVBUF);
2056}
2057
2058
2059/*********************************************************************
2060 *  Timer routine
2061 *
2062 *  This routine checks for link status and updates statistics.
2063 *
2064 **********************************************************************/
2065
2066static void
2067em_local_timer(void *arg)
2068{
2069	struct adapter	*adapter = arg;
2070	struct ifnet	*ifp = adapter->ifp;
2071	struct tx_ring	*txr = adapter->tx_rings;
2072
2073	EM_CORE_LOCK_ASSERT(adapter);
2074
2075	em_update_link_status(adapter);
2076	em_update_stats_counters(adapter);
2077
2078	/* Reset LAA into RAR[0] on 82571 */
2079	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2080		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2081
2082	/*
2083	** Check for time since any descriptor was cleaned
2084	*/
2085	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2086		EM_TX_LOCK(txr);
2087		if (txr->watchdog_check == FALSE) {
2088			EM_TX_UNLOCK(txr);
2089			continue;
2090		}
2091		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2092			goto hung;
2093		EM_TX_UNLOCK(txr);
2094	}
2095
2096	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2097	return;
2098hung:
2099	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2100	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2101	adapter->watchdog_events++;
2102	EM_TX_UNLOCK(txr);
2103	em_init_locked(adapter);
2104}
2105
2106
2107static void
2108em_update_link_status(struct adapter *adapter)
2109{
2110	struct e1000_hw *hw = &adapter->hw;
2111	struct ifnet *ifp = adapter->ifp;
2112	device_t dev = adapter->dev;
2113	u32 link_check = 0;
2114
2115	/* Get the cached link value or read phy for real */
2116	switch (hw->phy.media_type) {
2117	case e1000_media_type_copper:
2118		if (hw->mac.get_link_status) {
2119			/* Do the work to read phy */
2120			e1000_check_for_link(hw);
2121			link_check = !hw->mac.get_link_status;
2122			if (link_check) /* ESB2 fix */
2123				e1000_cfg_on_link_up(hw);
2124		} else
2125			link_check = TRUE;
2126		break;
2127	case e1000_media_type_fiber:
2128		e1000_check_for_link(hw);
2129		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2130                                 E1000_STATUS_LU);
2131		break;
2132	case e1000_media_type_internal_serdes:
2133		e1000_check_for_link(hw);
2134		link_check = adapter->hw.mac.serdes_has_link;
2135		break;
2136	default:
2137	case e1000_media_type_unknown:
2138		break;
2139	}
2140
2141	/* Now check for a transition */
2142	if (link_check && (adapter->link_active == 0)) {
2143		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2144		    &adapter->link_duplex);
2145		/* Check if we must disable SPEED_MODE bit on PCI-E */
2146		if ((adapter->link_speed != SPEED_1000) &&
2147		    ((hw->mac.type == e1000_82571) ||
2148		    (hw->mac.type == e1000_82572))) {
2149			int tarc0;
2150			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2151			tarc0 &= ~SPEED_MODE_BIT;
2152			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2153		}
2154		if (bootverbose)
2155			device_printf(dev, "Link is up %d Mbps %s\n",
2156			    adapter->link_speed,
2157			    ((adapter->link_duplex == FULL_DUPLEX) ?
2158			    "Full Duplex" : "Half Duplex"));
2159		adapter->link_active = 1;
2160		adapter->smartspeed = 0;
2161		ifp->if_baudrate = adapter->link_speed * 1000000;
2162		if_link_state_change(ifp, LINK_STATE_UP);
2163	} else if (!link_check && (adapter->link_active == 1)) {
2164		ifp->if_baudrate = adapter->link_speed = 0;
2165		adapter->link_duplex = 0;
2166		if (bootverbose)
2167			device_printf(dev, "Link is Down\n");
2168		adapter->link_active = 0;
2169		/* Link down, disable watchdog */
2170		// JFV change later
2171		//adapter->watchdog_check = FALSE;
2172		if_link_state_change(ifp, LINK_STATE_DOWN);
2173	}
2174}
2175
2176/*********************************************************************
2177 *
2178 *  This routine disables all traffic on the adapter by issuing a
2179 *  global reset on the MAC and deallocates TX/RX buffers.
2180 *
2181 *  This routine should always be called with BOTH the CORE
2182 *  and TX locks.
2183 **********************************************************************/
2184
2185static void
2186em_stop(void *arg)
2187{
2188	struct adapter	*adapter = arg;
2189	struct ifnet	*ifp = adapter->ifp;
2190	struct tx_ring	*txr = adapter->tx_rings;
2191
2192	EM_CORE_LOCK_ASSERT(adapter);
2193
2194	INIT_DEBUGOUT("em_stop: begin");
2195
2196	em_disable_intr(adapter);
2197	callout_stop(&adapter->timer);
2198
2199	/* Tell the stack that the interface is no longer active */
2200	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2201
2202        /* Unarm watchdog timer. */
2203	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2204		EM_TX_LOCK(txr);
2205		txr->watchdog_check = FALSE;
2206		EM_TX_UNLOCK(txr);
2207	}
2208
2209	e1000_reset_hw(&adapter->hw);
2210	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2211
2212	e1000_led_off(&adapter->hw);
2213	e1000_cleanup_led(&adapter->hw);
2214}
2215
2216
2217/*********************************************************************
2218 *
2219 *  Determine hardware revision.
2220 *
2221 **********************************************************************/
2222static void
2223em_identify_hardware(struct adapter *adapter)
2224{
2225	device_t dev = adapter->dev;
2226
2227	/* Make sure our PCI config space has the necessary stuff set */
2228	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2229	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2230	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2231		device_printf(dev, "Memory Access and/or Bus Master bits "
2232		    "were not set!\n");
2233		adapter->hw.bus.pci_cmd_word |=
2234		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2235		pci_write_config(dev, PCIR_COMMAND,
2236		    adapter->hw.bus.pci_cmd_word, 2);
2237	}
2238
2239	/* Save off the information about this board */
2240	adapter->hw.vendor_id = pci_get_vendor(dev);
2241	adapter->hw.device_id = pci_get_device(dev);
2242	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2243	adapter->hw.subsystem_vendor_id =
2244	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2245	adapter->hw.subsystem_device_id =
2246	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2247
2248	/* Do Shared Code Init and Setup */
2249	if (e1000_set_mac_type(&adapter->hw)) {
2250		device_printf(dev, "Setup init failure\n");
2251		return;
2252	}
2253}
2254
2255static int
2256em_allocate_pci_resources(struct adapter *adapter)
2257{
2258	device_t	dev = adapter->dev;
2259	int		rid;
2260
2261	rid = PCIR_BAR(0);
2262	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2263	    &rid, RF_ACTIVE);
2264	if (adapter->memory == NULL) {
2265		device_printf(dev, "Unable to allocate bus resource: memory\n");
2266		return (ENXIO);
2267	}
2268	adapter->osdep.mem_bus_space_tag =
2269	    rman_get_bustag(adapter->memory);
2270	adapter->osdep.mem_bus_space_handle =
2271	    rman_get_bushandle(adapter->memory);
2272	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2273
2274	/* Default to a single queue */
2275	adapter->num_queues = 1;
2276
2277	/*
2278	 * Setup MSI/X or MSI if PCI Express
2279	 */
2280	adapter->msix = em_setup_msix(adapter);
2281
2282	adapter->hw.back = &adapter->osdep;
2283
2284	return (0);
2285}
2286
2287/*********************************************************************
2288 *
2289 *  Setup the Legacy or MSI Interrupt handler
2290 *
2291 **********************************************************************/
2292int
2293em_allocate_legacy(struct adapter *adapter)
2294{
2295	device_t dev = adapter->dev;
2296	int error, rid = 0;
2297
2298	/* Manually turn off all interrupts */
2299	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2300
2301	if (adapter->msix == 1) /* using MSI */
2302		rid = 1;
2303	/* We allocate a single interrupt resource */
2304	adapter->res = bus_alloc_resource_any(dev,
2305	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2306	if (adapter->res == NULL) {
2307		device_printf(dev, "Unable to allocate bus resource: "
2308		    "interrupt\n");
2309		return (ENXIO);
2310	}
2311
2312	/*
2313	 * Allocate a fast interrupt and the associated
2314	 * deferred processing contexts.
2315	 */
2316	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2317	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2318	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2319	    taskqueue_thread_enqueue, &adapter->tq);
2320	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2321	    device_get_nameunit(adapter->dev));
2322	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2323	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2324		device_printf(dev, "Failed to register fast interrupt "
2325			    "handler: %d\n", error);
2326		taskqueue_free(adapter->tq);
2327		adapter->tq = NULL;
2328		return (error);
2329	}
2330
2331	return (0);
2332}
2333
2334/*********************************************************************
2335 *
2336 *  Setup the MSIX Interrupt handlers
2337 *   This is not really Multiqueue, rather
2338 *   its just multiple interrupt vectors.
2339 *
2340 **********************************************************************/
2341int
2342em_allocate_msix(struct adapter *adapter)
2343{
2344	device_t	dev = adapter->dev;
2345	struct		tx_ring *txr = adapter->tx_rings;
2346	struct		rx_ring *rxr = adapter->rx_rings;
2347	int		error, rid, vector = 0;
2348
2349
2350	/* Make sure all interrupts are disabled */
2351	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2352
2353	/* First set up ring resources */
2354	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2355
2356		/* RX ring */
2357		rid = vector + 1;
2358
2359		rxr->res = bus_alloc_resource_any(dev,
2360		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2361		if (rxr->res == NULL) {
2362			device_printf(dev,
2363			    "Unable to allocate bus resource: "
2364			    "RX MSIX Interrupt %d\n", i);
2365			return (ENXIO);
2366		}
2367		if ((error = bus_setup_intr(dev, rxr->res,
2368		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2369		    rxr, &rxr->tag)) != 0) {
2370			device_printf(dev, "Failed to register RX handler");
2371			return (error);
2372		}
2373		rxr->msix = vector++; /* NOTE increment vector for TX */
2374		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2375		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2376		    taskqueue_thread_enqueue, &rxr->tq);
2377		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2378		    device_get_nameunit(adapter->dev));
2379		/*
2380		** Set the bit to enable interrupt
2381		** in E1000_IMS -- bits 20 and 21
2382		** are for RX0 and RX1, note this has
2383		** NOTHING to do with the MSIX vector
2384		*/
2385		rxr->ims = 1 << (20 + i);
2386		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2387
2388		/* TX ring */
2389		rid = vector + 1;
2390		txr->res = bus_alloc_resource_any(dev,
2391		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2392		if (txr->res == NULL) {
2393			device_printf(dev,
2394			    "Unable to allocate bus resource: "
2395			    "TX MSIX Interrupt %d\n", i);
2396			return (ENXIO);
2397		}
2398		if ((error = bus_setup_intr(dev, txr->res,
2399		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2400		    txr, &txr->tag)) != 0) {
2401			device_printf(dev, "Failed to register TX handler");
2402			return (error);
2403		}
2404		txr->msix = vector++; /* Increment vector for next pass */
2405		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2406		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2407		    taskqueue_thread_enqueue, &txr->tq);
2408		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2409		    device_get_nameunit(adapter->dev));
2410		/*
2411		** Set the bit to enable interrupt
2412		** in E1000_IMS -- bits 22 and 23
2413		** are for TX0 and TX1, note this has
2414		** NOTHING to do with the MSIX vector
2415		*/
2416		txr->ims = 1 << (22 + i);
2417		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2418	}
2419
2420	/* Link interrupt */
2421	++rid;
2422	adapter->res = bus_alloc_resource_any(dev,
2423	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2424	if (!adapter->res) {
2425		device_printf(dev,"Unable to allocate "
2426		    "bus resource: Link interrupt [%d]\n", rid);
2427		return (ENXIO);
2428        }
2429	/* Set the link handler function */
2430	error = bus_setup_intr(dev, adapter->res,
2431	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2432	    em_msix_link, adapter, &adapter->tag);
2433	if (error) {
2434		adapter->res = NULL;
2435		device_printf(dev, "Failed to register LINK handler");
2436		return (error);
2437	}
2438	adapter->linkvec = vector;
2439	adapter->ivars |=  (8 | vector) << 16;
2440	adapter->ivars |= 0x80000000;
2441
2442	return (0);
2443}
2444
2445
2446static void
2447em_free_pci_resources(struct adapter *adapter)
2448{
2449	device_t	dev = adapter->dev;
2450	struct tx_ring	*txr;
2451	struct rx_ring	*rxr;
2452	int		rid;
2453
2454
2455	/*
2456	** Release all the queue interrupt resources:
2457	*/
2458	for (int i = 0; i < adapter->num_queues; i++) {
2459		txr = &adapter->tx_rings[i];
2460		rxr = &adapter->rx_rings[i];
2461		rid = txr->msix +1;
2462		if (txr->tag != NULL) {
2463			bus_teardown_intr(dev, txr->res, txr->tag);
2464			txr->tag = NULL;
2465		}
2466		if (txr->res != NULL)
2467			bus_release_resource(dev, SYS_RES_IRQ,
2468			    rid, txr->res);
2469		rid = rxr->msix +1;
2470		if (rxr->tag != NULL) {
2471			bus_teardown_intr(dev, rxr->res, rxr->tag);
2472			rxr->tag = NULL;
2473		}
2474		if (rxr->res != NULL)
2475			bus_release_resource(dev, SYS_RES_IRQ,
2476			    rid, rxr->res);
2477	}
2478
2479        if (adapter->linkvec) /* we are doing MSIX */
2480                rid = adapter->linkvec + 1;
2481        else
2482                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2483
2484	if (adapter->tag != NULL) {
2485		bus_teardown_intr(dev, adapter->res, adapter->tag);
2486		adapter->tag = NULL;
2487	}
2488
2489	if (adapter->res != NULL)
2490		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2491
2492
2493	if (adapter->msix)
2494		pci_release_msi(dev);
2495
2496	if (adapter->msix_mem != NULL)
2497		bus_release_resource(dev, SYS_RES_MEMORY,
2498		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2499
2500	if (adapter->memory != NULL)
2501		bus_release_resource(dev, SYS_RES_MEMORY,
2502		    PCIR_BAR(0), adapter->memory);
2503
2504	if (adapter->flash != NULL)
2505		bus_release_resource(dev, SYS_RES_MEMORY,
2506		    EM_FLASH, adapter->flash);
2507}
2508
2509/*
2510 * Setup MSI or MSI/X
2511 */
2512static int
2513em_setup_msix(struct adapter *adapter)
2514{
2515	device_t dev = adapter->dev;
2516	int val = 0;
2517
2518
2519	/* Setup MSI/X for Hartwell */
2520	if ((adapter->hw.mac.type == e1000_82574) &&
2521	    (em_enable_msix == TRUE)) {
2522		/* Map the MSIX BAR */
2523		int rid = PCIR_BAR(EM_MSIX_BAR);
2524		adapter->msix_mem = bus_alloc_resource_any(dev,
2525		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2526       		if (!adapter->msix_mem) {
2527			/* May not be enabled */
2528               		device_printf(adapter->dev,
2529			    "Unable to map MSIX table \n");
2530			goto msi;
2531       		}
2532		val = pci_msix_count(dev);
2533		if (val != 5) {
2534			bus_release_resource(dev, SYS_RES_MEMORY,
2535			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2536			adapter->msix_mem = NULL;
2537               		device_printf(adapter->dev,
2538			    "MSIX vectors wrong, using MSI \n");
2539			goto msi;
2540		}
2541		if (em_msix_queues == 2) {
2542			val = 5;
2543			adapter->num_queues = 2;
2544		} else {
2545			val = 3;
2546			adapter->num_queues = 1;
2547		}
2548		if (pci_alloc_msix(dev, &val) == 0) {
2549			device_printf(adapter->dev,
2550			    "Using MSIX interrupts "
2551			    "with %d vectors\n", val);
2552		}
2553
2554		return (val);
2555	}
2556msi:
2557       	val = pci_msi_count(dev);
2558       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2559               	adapter->msix = 1;
2560               	device_printf(adapter->dev,"Using MSI interrupt\n");
2561		return (val);
2562	}
2563	/* Should only happen due to manual invention */
2564	device_printf(adapter->dev,"Setup MSIX failure\n");
2565	return (0);
2566}
2567
2568
2569/*********************************************************************
2570 *
2571 *  Initialize the hardware to a configuration
2572 *  as specified by the adapter structure.
2573 *
2574 **********************************************************************/
2575static void
2576em_reset(struct adapter *adapter)
2577{
2578	device_t	dev = adapter->dev;
2579	struct e1000_hw	*hw = &adapter->hw;
2580	u16		rx_buffer_size;
2581
2582	INIT_DEBUGOUT("em_reset: begin");
2583
2584	/* Set up smart power down as default off on newer adapters. */
2585	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2586	    hw->mac.type == e1000_82572)) {
2587		u16 phy_tmp = 0;
2588
2589		/* Speed up time to link by disabling smart power down. */
2590		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2591		phy_tmp &= ~IGP02E1000_PM_SPD;
2592		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2593	}
2594
2595	/*
2596	 * These parameters control the automatic generation (Tx) and
2597	 * response (Rx) to Ethernet PAUSE frames.
2598	 * - High water mark should allow for at least two frames to be
2599	 *   received after sending an XOFF.
2600	 * - Low water mark works best when it is very near the high water mark.
2601	 *   This allows the receiver to restart by sending XON when it has
2602	 *   drained a bit. Here we use an arbitary value of 1500 which will
2603	 *   restart after one full frame is pulled from the buffer. There
2604	 *   could be several smaller frames in the buffer and if so they will
2605	 *   not trigger the XON until their total number reduces the buffer
2606	 *   by 1500.
2607	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2608	 */
2609	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2610
2611	hw->fc.high_water = rx_buffer_size -
2612	    roundup2(adapter->max_frame_size, 1024);
2613	hw->fc.low_water = hw->fc.high_water - 1500;
2614
2615	if (hw->mac.type == e1000_80003es2lan)
2616		hw->fc.pause_time = 0xFFFF;
2617	else
2618		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2619
2620	hw->fc.send_xon = TRUE;
2621
2622        /* Set Flow control, use the tunable location if sane */
2623        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2624		hw->fc.requested_mode = em_fc_setting;
2625	else
2626		hw->fc.requested_mode = e1000_fc_none;
2627
2628	/* Override - workaround for PCHLAN issue */
2629	if (hw->mac.type == e1000_pchlan)
2630                hw->fc.requested_mode = e1000_fc_rx_pause;
2631
2632	/* Issue a global reset */
2633	e1000_reset_hw(hw);
2634	E1000_WRITE_REG(hw, E1000_WUC, 0);
2635
2636	if (e1000_init_hw(hw) < 0) {
2637		device_printf(dev, "Hardware Initialization Failed\n");
2638		return;
2639	}
2640
2641	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2642	e1000_get_phy_info(hw);
2643	e1000_check_for_link(hw);
2644	return;
2645}
2646
2647/*********************************************************************
2648 *
2649 *  Setup networking device structure and register an interface.
2650 *
2651 **********************************************************************/
2652static int
2653em_setup_interface(device_t dev, struct adapter *adapter)
2654{
2655	struct ifnet   *ifp;
2656
2657	INIT_DEBUGOUT("em_setup_interface: begin");
2658
2659	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2660	if (ifp == NULL) {
2661		device_printf(dev, "can not allocate ifnet structure\n");
2662		return (-1);
2663	}
2664	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2665	ifp->if_mtu = ETHERMTU;
2666	ifp->if_init =  em_init;
2667	ifp->if_softc = adapter;
2668	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2669	ifp->if_ioctl = em_ioctl;
2670	ifp->if_start = em_start;
2671	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2672	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2673	IFQ_SET_READY(&ifp->if_snd);
2674
2675	ether_ifattach(ifp, adapter->hw.mac.addr);
2676
2677	ifp->if_capabilities = ifp->if_capenable = 0;
2678
2679#ifdef EM_MULTIQUEUE
2680	/* Multiqueue tx functions */
2681	ifp->if_transmit = em_mq_start;
2682	ifp->if_qflush = em_qflush;
2683#endif
2684
2685	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2686	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2687
2688	/* Enable TSO by default, can disable with ifconfig */
2689	ifp->if_capabilities |= IFCAP_TSO4;
2690	ifp->if_capenable |= IFCAP_TSO4;
2691
2692	/*
2693	 * Tell the upper layer(s) we
2694	 * support full VLAN capability
2695	 */
2696	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2697	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2698	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2699
2700	/*
2701	** Dont turn this on by default, if vlans are
2702	** created on another pseudo device (eg. lagg)
2703	** then vlan events are not passed thru, breaking
2704	** operation, but with HW FILTER off it works. If
2705	** using vlans directly on the em driver you can
2706	** enable this and get full hardware tag filtering.
2707	*/
2708	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2709
2710#ifdef DEVICE_POLLING
2711	ifp->if_capabilities |= IFCAP_POLLING;
2712#endif
2713
2714	/* Enable only WOL MAGIC by default */
2715	if (adapter->wol) {
2716		ifp->if_capabilities |= IFCAP_WOL;
2717		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2718	}
2719
2720	/*
2721	 * Specify the media types supported by this adapter and register
2722	 * callbacks to update media and link information
2723	 */
2724	ifmedia_init(&adapter->media, IFM_IMASK,
2725	    em_media_change, em_media_status);
2726	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2727	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2728		u_char fiber_type = IFM_1000_SX;	/* default type */
2729
2730		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2731			    0, NULL);
2732		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2733	} else {
2734		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2735		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2736			    0, NULL);
2737		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2738			    0, NULL);
2739		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2740			    0, NULL);
2741		if (adapter->hw.phy.type != e1000_phy_ife) {
2742			ifmedia_add(&adapter->media,
2743				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2744			ifmedia_add(&adapter->media,
2745				IFM_ETHER | IFM_1000_T, 0, NULL);
2746		}
2747	}
2748	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2749	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2750	return (0);
2751}
2752
2753
2754/*
2755 * Manage DMA'able memory.
2756 */
2757static void
2758em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2759{
2760	if (error)
2761		return;
2762	*(bus_addr_t *) arg = segs[0].ds_addr;
2763}
2764
2765static int
2766em_dma_malloc(struct adapter *adapter, bus_size_t size,
2767        struct em_dma_alloc *dma, int mapflags)
2768{
2769	int error;
2770
2771	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2772				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2773				BUS_SPACE_MAXADDR,	/* lowaddr */
2774				BUS_SPACE_MAXADDR,	/* highaddr */
2775				NULL, NULL,		/* filter, filterarg */
2776				size,			/* maxsize */
2777				1,			/* nsegments */
2778				size,			/* maxsegsize */
2779				0,			/* flags */
2780				NULL,			/* lockfunc */
2781				NULL,			/* lockarg */
2782				&dma->dma_tag);
2783	if (error) {
2784		device_printf(adapter->dev,
2785		    "%s: bus_dma_tag_create failed: %d\n",
2786		    __func__, error);
2787		goto fail_0;
2788	}
2789
2790	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2791	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2792	if (error) {
2793		device_printf(adapter->dev,
2794		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2795		    __func__, (uintmax_t)size, error);
2796		goto fail_2;
2797	}
2798
2799	dma->dma_paddr = 0;
2800	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2801	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2802	if (error || dma->dma_paddr == 0) {
2803		device_printf(adapter->dev,
2804		    "%s: bus_dmamap_load failed: %d\n",
2805		    __func__, error);
2806		goto fail_3;
2807	}
2808
2809	return (0);
2810
2811fail_3:
2812	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2813fail_2:
2814	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2815	bus_dma_tag_destroy(dma->dma_tag);
2816fail_0:
2817	dma->dma_map = NULL;
2818	dma->dma_tag = NULL;
2819
2820	return (error);
2821}
2822
2823static void
2824em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2825{
2826	if (dma->dma_tag == NULL)
2827		return;
2828	if (dma->dma_map != NULL) {
2829		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2830		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2831		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2832		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2833		dma->dma_map = NULL;
2834	}
2835	bus_dma_tag_destroy(dma->dma_tag);
2836	dma->dma_tag = NULL;
2837}
2838
2839
2840/*********************************************************************
2841 *
2842 *  Allocate memory for the transmit and receive rings, and then
2843 *  the descriptors associated with each, called only once at attach.
2844 *
2845 **********************************************************************/
2846static int
2847em_allocate_queues(struct adapter *adapter)
2848{
2849	device_t		dev = adapter->dev;
2850	struct tx_ring		*txr = NULL;
2851	struct rx_ring		*rxr = NULL;
2852	int rsize, tsize, error = E1000_SUCCESS;
2853	int txconf = 0, rxconf = 0;
2854
2855
2856	/* Allocate the TX ring struct memory */
2857	if (!(adapter->tx_rings =
2858	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2859	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2860		device_printf(dev, "Unable to allocate TX ring memory\n");
2861		error = ENOMEM;
2862		goto fail;
2863	}
2864
2865	/* Now allocate the RX */
2866	if (!(adapter->rx_rings =
2867	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2868	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2869		device_printf(dev, "Unable to allocate RX ring memory\n");
2870		error = ENOMEM;
2871		goto rx_fail;
2872	}
2873
2874	tsize = roundup2(adapter->num_tx_desc *
2875	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2876	/*
2877	 * Now set up the TX queues, txconf is needed to handle the
2878	 * possibility that things fail midcourse and we need to
2879	 * undo memory gracefully
2880	 */
2881	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2882		/* Set up some basics */
2883		txr = &adapter->tx_rings[i];
2884		txr->adapter = adapter;
2885		txr->me = i;
2886
2887		/* Initialize the TX lock */
2888		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2889		    device_get_nameunit(dev), txr->me);
2890		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2891
2892		if (em_dma_malloc(adapter, tsize,
2893			&txr->txdma, BUS_DMA_NOWAIT)) {
2894			device_printf(dev,
2895			    "Unable to allocate TX Descriptor memory\n");
2896			error = ENOMEM;
2897			goto err_tx_desc;
2898		}
2899		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2900		bzero((void *)txr->tx_base, tsize);
2901
2902        	if (em_allocate_transmit_buffers(txr)) {
2903			device_printf(dev,
2904			    "Critical Failure setting up transmit buffers\n");
2905			error = ENOMEM;
2906			goto err_tx_desc;
2907        	}
2908#if __FreeBSD_version >= 800000
2909		/* Allocate a buf ring */
2910		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2911		    M_WAITOK, &txr->tx_mtx);
2912#endif
2913	}
2914
2915	/*
2916	 * Next the RX queues...
2917	 */
2918	rsize = roundup2(adapter->num_rx_desc *
2919	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2920	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2921		rxr = &adapter->rx_rings[i];
2922		rxr->adapter = adapter;
2923		rxr->me = i;
2924
2925		/* Initialize the RX lock */
2926		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2927		    device_get_nameunit(dev), txr->me);
2928		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2929
2930		if (em_dma_malloc(adapter, rsize,
2931			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2932			device_printf(dev,
2933			    "Unable to allocate RxDescriptor memory\n");
2934			error = ENOMEM;
2935			goto err_rx_desc;
2936		}
2937		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2938		bzero((void *)rxr->rx_base, rsize);
2939
2940        	/* Allocate receive buffers for the ring*/
2941		if (em_allocate_receive_buffers(rxr)) {
2942			device_printf(dev,
2943			    "Critical Failure setting up receive buffers\n");
2944			error = ENOMEM;
2945			goto err_rx_desc;
2946		}
2947	}
2948
2949	return (0);
2950
2951err_rx_desc:
2952	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2953		em_dma_free(adapter, &rxr->rxdma);
2954err_tx_desc:
2955	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2956		em_dma_free(adapter, &txr->txdma);
2957	free(adapter->rx_rings, M_DEVBUF);
2958rx_fail:
2959#if __FreeBSD_version >= 800000
2960	buf_ring_free(txr->br, M_DEVBUF);
2961#endif
2962	free(adapter->tx_rings, M_DEVBUF);
2963fail:
2964	return (error);
2965}
2966
2967
2968/*********************************************************************
2969 *
2970 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2971 *  the information needed to transmit a packet on the wire. This is
2972 *  called only once at attach, setup is done every reset.
2973 *
2974 **********************************************************************/
2975static int
2976em_allocate_transmit_buffers(struct tx_ring *txr)
2977{
2978	struct adapter *adapter = txr->adapter;
2979	device_t dev = adapter->dev;
2980	struct em_buffer *txbuf;
2981	int error, i;
2982
2983	/*
2984	 * Setup DMA descriptor areas.
2985	 */
2986	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2987			       1, 0,			/* alignment, bounds */
2988			       BUS_SPACE_MAXADDR,	/* lowaddr */
2989			       BUS_SPACE_MAXADDR,	/* highaddr */
2990			       NULL, NULL,		/* filter, filterarg */
2991			       EM_TSO_SIZE,		/* maxsize */
2992			       EM_MAX_SCATTER,		/* nsegments */
2993			       PAGE_SIZE,		/* maxsegsize */
2994			       0,			/* flags */
2995			       NULL,			/* lockfunc */
2996			       NULL,			/* lockfuncarg */
2997			       &txr->txtag))) {
2998		device_printf(dev,"Unable to allocate TX DMA tag\n");
2999		goto fail;
3000	}
3001
3002	if (!(txr->tx_buffers =
3003	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3004	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3005		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3006		error = ENOMEM;
3007		goto fail;
3008	}
3009
3010        /* Create the descriptor buffer dma maps */
3011	txbuf = txr->tx_buffers;
3012	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3013		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3014		if (error != 0) {
3015			device_printf(dev, "Unable to create TX DMA map\n");
3016			goto fail;
3017		}
3018	}
3019
3020	return 0;
3021fail:
3022	/* We free all, it handles case where we are in the middle */
3023	em_free_transmit_structures(adapter);
3024	return (error);
3025}
3026
3027/*********************************************************************
3028 *
3029 *  Initialize a transmit ring.
3030 *
3031 **********************************************************************/
3032static void
3033em_setup_transmit_ring(struct tx_ring *txr)
3034{
3035	struct adapter *adapter = txr->adapter;
3036	struct em_buffer *txbuf;
3037	int i;
3038
3039	/* Clear the old descriptor contents */
3040	EM_TX_LOCK(txr);
3041	bzero((void *)txr->tx_base,
3042	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3043	/* Reset indices */
3044	txr->next_avail_desc = 0;
3045	txr->next_to_clean = 0;
3046
3047	/* Free any existing tx buffers. */
3048        txbuf = txr->tx_buffers;
3049	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3050		if (txbuf->m_head != NULL) {
3051			bus_dmamap_sync(txr->txtag, txbuf->map,
3052			    BUS_DMASYNC_POSTWRITE);
3053			bus_dmamap_unload(txr->txtag, txbuf->map);
3054			m_freem(txbuf->m_head);
3055			txbuf->m_head = NULL;
3056		}
3057		/* clear the watch index */
3058		txbuf->next_eop = -1;
3059        }
3060
3061	/* Set number of descriptors available */
3062	txr->tx_avail = adapter->num_tx_desc;
3063
3064	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3065	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3066	EM_TX_UNLOCK(txr);
3067}
3068
3069/*********************************************************************
3070 *
3071 *  Initialize all transmit rings.
3072 *
3073 **********************************************************************/
3074static void
3075em_setup_transmit_structures(struct adapter *adapter)
3076{
3077	struct tx_ring *txr = adapter->tx_rings;
3078
3079	for (int i = 0; i < adapter->num_queues; i++, txr++)
3080		em_setup_transmit_ring(txr);
3081
3082	return;
3083}
3084
3085/*********************************************************************
3086 *
3087 *  Enable transmit unit.
3088 *
3089 **********************************************************************/
3090static void
3091em_initialize_transmit_unit(struct adapter *adapter)
3092{
3093	struct tx_ring	*txr = adapter->tx_rings;
3094	struct e1000_hw	*hw = &adapter->hw;
3095	u32	tctl, tarc, tipg = 0;
3096
3097	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3098
3099	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3100		u64 bus_addr = txr->txdma.dma_paddr;
3101		/* Base and Len of TX Ring */
3102		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3103	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3104		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3105	    	    (u32)(bus_addr >> 32));
3106		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3107	    	    (u32)bus_addr);
3108		/* Init the HEAD/TAIL indices */
3109		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3110		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3111
3112		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3113		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3114		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3115
3116		txr->watchdog_check = FALSE;
3117	}
3118
3119	/* Set the default values for the Tx Inter Packet Gap timer */
3120	switch (adapter->hw.mac.type) {
3121	case e1000_82542:
3122		tipg = DEFAULT_82542_TIPG_IPGT;
3123		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3124		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3125		break;
3126	case e1000_80003es2lan:
3127		tipg = DEFAULT_82543_TIPG_IPGR1;
3128		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3129		    E1000_TIPG_IPGR2_SHIFT;
3130		break;
3131	default:
3132		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3133		    (adapter->hw.phy.media_type ==
3134		    e1000_media_type_internal_serdes))
3135			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3136		else
3137			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3138		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3139		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3140	}
3141
3142	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3143	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3144
3145	if(adapter->hw.mac.type >= e1000_82540)
3146		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3147		    adapter->tx_abs_int_delay.value);
3148
3149	if ((adapter->hw.mac.type == e1000_82571) ||
3150	    (adapter->hw.mac.type == e1000_82572)) {
3151		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3152		tarc |= SPEED_MODE_BIT;
3153		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3154	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3155		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3156		tarc |= 1;
3157		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3158		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3159		tarc |= 1;
3160		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3161	}
3162
3163	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3164	if (adapter->tx_int_delay.value > 0)
3165		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3166
3167	/* Program the Transmit Control Register */
3168	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3169	tctl &= ~E1000_TCTL_CT;
3170	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3171		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3172
3173	if (adapter->hw.mac.type >= e1000_82571)
3174		tctl |= E1000_TCTL_MULR;
3175
3176	/* This write will effectively turn on the transmit unit. */
3177	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3178
3179}
3180
3181
3182/*********************************************************************
3183 *
3184 *  Free all transmit rings.
3185 *
3186 **********************************************************************/
3187static void
3188em_free_transmit_structures(struct adapter *adapter)
3189{
3190	struct tx_ring *txr = adapter->tx_rings;
3191
3192	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3193		EM_TX_LOCK(txr);
3194		em_free_transmit_buffers(txr);
3195		em_dma_free(adapter, &txr->txdma);
3196		EM_TX_UNLOCK(txr);
3197		EM_TX_LOCK_DESTROY(txr);
3198	}
3199
3200	free(adapter->tx_rings, M_DEVBUF);
3201}
3202
3203/*********************************************************************
3204 *
3205 *  Free transmit ring related data structures.
3206 *
3207 **********************************************************************/
3208static void
3209em_free_transmit_buffers(struct tx_ring *txr)
3210{
3211	struct adapter		*adapter = txr->adapter;
3212	struct em_buffer	*txbuf;
3213
3214	INIT_DEBUGOUT("free_transmit_ring: begin");
3215
3216	if (txr->tx_buffers == NULL)
3217		return;
3218
3219	for (int i = 0; i < adapter->num_tx_desc; i++) {
3220		txbuf = &txr->tx_buffers[i];
3221		if (txbuf->m_head != NULL) {
3222			bus_dmamap_sync(txr->txtag, txbuf->map,
3223			    BUS_DMASYNC_POSTWRITE);
3224			bus_dmamap_unload(txr->txtag,
3225			    txbuf->map);
3226			m_freem(txbuf->m_head);
3227			txbuf->m_head = NULL;
3228			if (txbuf->map != NULL) {
3229				bus_dmamap_destroy(txr->txtag,
3230				    txbuf->map);
3231				txbuf->map = NULL;
3232			}
3233		} else if (txbuf->map != NULL) {
3234			bus_dmamap_unload(txr->txtag,
3235			    txbuf->map);
3236			bus_dmamap_destroy(txr->txtag,
3237			    txbuf->map);
3238			txbuf->map = NULL;
3239		}
3240	}
3241#if __FreeBSD_version >= 800000
3242	if (txr->br != NULL)
3243		buf_ring_free(txr->br, M_DEVBUF);
3244#endif
3245	if (txr->tx_buffers != NULL) {
3246		free(txr->tx_buffers, M_DEVBUF);
3247		txr->tx_buffers = NULL;
3248	}
3249	if (txr->txtag != NULL) {
3250		bus_dma_tag_destroy(txr->txtag);
3251		txr->txtag = NULL;
3252	}
3253	return;
3254}
3255
3256
3257/*********************************************************************
3258 *
3259 *  The offload context needs to be set when we transfer the first
3260 *  packet of a particular protocol (TCP/UDP). This routine has been
3261 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3262 *
3263 *  Added back the old method of keeping the current context type
3264 *  and not setting if unnecessary, as this is reported to be a
3265 *  big performance win.  -jfv
3266 **********************************************************************/
3267static void
3268em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3269    u32 *txd_upper, u32 *txd_lower)
3270{
3271	struct adapter			*adapter = txr->adapter;
3272	struct e1000_context_desc	*TXD = NULL;
3273	struct em_buffer *tx_buffer;
3274	struct ether_vlan_header *eh;
3275	struct ip *ip = NULL;
3276	struct ip6_hdr *ip6;
3277	int cur, ehdrlen;
3278	u32 cmd, hdr_len, ip_hlen;
3279	u16 etype;
3280	u8 ipproto;
3281
3282
3283	cmd = hdr_len = ipproto = 0;
3284	*txd_upper = *txd_lower = 0;
3285	cur = txr->next_avail_desc;
3286
3287	/*
3288	 * Determine where frame payload starts.
3289	 * Jump over vlan headers if already present,
3290	 * helpful for QinQ too.
3291	 */
3292	eh = mtod(mp, struct ether_vlan_header *);
3293	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3294		etype = ntohs(eh->evl_proto);
3295		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3296	} else {
3297		etype = ntohs(eh->evl_encap_proto);
3298		ehdrlen = ETHER_HDR_LEN;
3299	}
3300
3301	/*
3302	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3303	 * TODO: Support SCTP too when it hits the tree.
3304	 */
3305	switch (etype) {
3306	case ETHERTYPE_IP:
3307		ip = (struct ip *)(mp->m_data + ehdrlen);
3308		ip_hlen = ip->ip_hl << 2;
3309
3310		/* Setup of IP header checksum. */
3311		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3312			/*
3313			 * Start offset for header checksum calculation.
3314			 * End offset for header checksum calculation.
3315			 * Offset of place to put the checksum.
3316			 */
3317			TXD = (struct e1000_context_desc *)
3318			    &txr->tx_base[cur];
3319			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3320			TXD->lower_setup.ip_fields.ipcse =
3321			    htole16(ehdrlen + ip_hlen);
3322			TXD->lower_setup.ip_fields.ipcso =
3323			    ehdrlen + offsetof(struct ip, ip_sum);
3324			cmd |= E1000_TXD_CMD_IP;
3325			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3326		}
3327
3328		hdr_len = ehdrlen + ip_hlen;
3329		ipproto = ip->ip_p;
3330		break;
3331
3332	case ETHERTYPE_IPV6:
3333		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3334		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3335
3336		/* IPv6 doesn't have a header checksum. */
3337
3338		hdr_len = ehdrlen + ip_hlen;
3339		ipproto = ip6->ip6_nxt;
3340		break;
3341
3342	default:
3343		return;
3344	}
3345
3346	switch (ipproto) {
3347	case IPPROTO_TCP:
3348		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3349			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3350			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3351			/* no need for context if already set */
3352			if (txr->last_hw_offload == CSUM_TCP)
3353				return;
3354			txr->last_hw_offload = CSUM_TCP;
3355			/*
3356			 * Start offset for payload checksum calculation.
3357			 * End offset for payload checksum calculation.
3358			 * Offset of place to put the checksum.
3359			 */
3360			TXD = (struct e1000_context_desc *)
3361			    &txr->tx_base[cur];
3362			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3363			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3364			TXD->upper_setup.tcp_fields.tucso =
3365			    hdr_len + offsetof(struct tcphdr, th_sum);
3366			cmd |= E1000_TXD_CMD_TCP;
3367		}
3368		break;
3369	case IPPROTO_UDP:
3370	{
3371		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3372			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3373			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3374			/* no need for context if already set */
3375			if (txr->last_hw_offload == CSUM_UDP)
3376				return;
3377			txr->last_hw_offload = CSUM_UDP;
3378			/*
3379			 * Start offset for header checksum calculation.
3380			 * End offset for header checksum calculation.
3381			 * Offset of place to put the checksum.
3382			 */
3383			TXD = (struct e1000_context_desc *)
3384			    &txr->tx_base[cur];
3385			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3386			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3387			TXD->upper_setup.tcp_fields.tucso =
3388			    hdr_len + offsetof(struct udphdr, uh_sum);
3389		}
3390		/* Fall Thru */
3391	}
3392	default:
3393		break;
3394	}
3395
3396	if (TXD == NULL)
3397		return;
3398	TXD->tcp_seg_setup.data = htole32(0);
3399	TXD->cmd_and_length =
3400	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3401	tx_buffer = &txr->tx_buffers[cur];
3402	tx_buffer->m_head = NULL;
3403	tx_buffer->next_eop = -1;
3404
3405	if (++cur == adapter->num_tx_desc)
3406		cur = 0;
3407
3408	txr->tx_avail--;
3409	txr->next_avail_desc = cur;
3410}
3411
3412
3413/**********************************************************************
3414 *
3415 *  Setup work for hardware segmentation offload (TSO)
3416 *
3417 **********************************************************************/
3418static bool
3419em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3420   u32 *txd_lower)
3421{
3422	struct adapter			*adapter = txr->adapter;
3423	struct e1000_context_desc	*TXD;
3424	struct em_buffer		*tx_buffer;
3425	struct ether_vlan_header	*eh;
3426	struct ip			*ip;
3427	struct ip6_hdr			*ip6;
3428	struct tcphdr			*th;
3429	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3430	u16 etype;
3431
3432	/*
3433	 * This function could/should be extended to support IP/IPv6
3434	 * fragmentation as well.  But as they say, one step at a time.
3435	 */
3436
3437	/*
3438	 * Determine where frame payload starts.
3439	 * Jump over vlan headers if already present,
3440	 * helpful for QinQ too.
3441	 */
3442	eh = mtod(mp, struct ether_vlan_header *);
3443	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3444		etype = ntohs(eh->evl_proto);
3445		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3446	} else {
3447		etype = ntohs(eh->evl_encap_proto);
3448		ehdrlen = ETHER_HDR_LEN;
3449	}
3450
3451	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3452	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3453		return FALSE;	/* -1 */
3454
3455	/*
3456	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3457	 * TODO: Support SCTP too when it hits the tree.
3458	 */
3459	switch (etype) {
3460	case ETHERTYPE_IP:
3461		isip6 = 0;
3462		ip = (struct ip *)(mp->m_data + ehdrlen);
3463		if (ip->ip_p != IPPROTO_TCP)
3464			return FALSE;	/* 0 */
3465		ip->ip_len = 0;
3466		ip->ip_sum = 0;
3467		ip_hlen = ip->ip_hl << 2;
3468		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3469			return FALSE;	/* -1 */
3470		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3471#if 1
3472		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3473		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3474#else
3475		th->th_sum = mp->m_pkthdr.csum_data;
3476#endif
3477		break;
3478	case ETHERTYPE_IPV6:
3479		isip6 = 1;
3480		return FALSE;			/* Not supported yet. */
3481		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3482		if (ip6->ip6_nxt != IPPROTO_TCP)
3483			return FALSE;	/* 0 */
3484		ip6->ip6_plen = 0;
3485		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3486		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3487			return FALSE;	/* -1 */
3488		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3489#if 0
3490		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3491		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3492#else
3493		th->th_sum = mp->m_pkthdr.csum_data;
3494#endif
3495		break;
3496	default:
3497		return FALSE;
3498	}
3499	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3500
3501	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3502		      E1000_TXD_DTYP_D |	/* Data descr type */
3503		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3504
3505	/* IP and/or TCP header checksum calculation and insertion. */
3506	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3507		      E1000_TXD_POPTS_TXSM) << 8;
3508
3509	cur = txr->next_avail_desc;
3510	tx_buffer = &txr->tx_buffers[cur];
3511	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3512
3513	/* IPv6 doesn't have a header checksum. */
3514	if (!isip6) {
3515		/*
3516		 * Start offset for header checksum calculation.
3517		 * End offset for header checksum calculation.
3518		 * Offset of place put the checksum.
3519		 */
3520		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3521		TXD->lower_setup.ip_fields.ipcse =
3522		    htole16(ehdrlen + ip_hlen - 1);
3523		TXD->lower_setup.ip_fields.ipcso =
3524		    ehdrlen + offsetof(struct ip, ip_sum);
3525	}
3526	/*
3527	 * Start offset for payload checksum calculation.
3528	 * End offset for payload checksum calculation.
3529	 * Offset of place to put the checksum.
3530	 */
3531	TXD->upper_setup.tcp_fields.tucss =
3532	    ehdrlen + ip_hlen;
3533	TXD->upper_setup.tcp_fields.tucse = 0;
3534	TXD->upper_setup.tcp_fields.tucso =
3535	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3536	/*
3537	 * Payload size per packet w/o any headers.
3538	 * Length of all headers up to payload.
3539	 */
3540	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3541	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3542
3543	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3544				E1000_TXD_CMD_DEXT |	/* Extended descr */
3545				E1000_TXD_CMD_TSE |	/* TSE context */
3546				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3547				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3548				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3549
3550	tx_buffer->m_head = NULL;
3551	tx_buffer->next_eop = -1;
3552
3553	if (++cur == adapter->num_tx_desc)
3554		cur = 0;
3555
3556	txr->tx_avail--;
3557	txr->next_avail_desc = cur;
3558	txr->tx_tso = TRUE;
3559
3560	return TRUE;
3561}
3562
3563
3564/**********************************************************************
3565 *
3566 *  Examine each tx_buffer in the used queue. If the hardware is done
3567 *  processing the packet then free associated resources. The
3568 *  tx_buffer is put back on the free queue.
3569 *
3570 **********************************************************************/
3571static bool
3572em_txeof(struct tx_ring *txr)
3573{
3574	struct adapter	*adapter = txr->adapter;
3575        int first, last, done, num_avail;
3576        struct em_buffer *tx_buffer;
3577        struct e1000_tx_desc   *tx_desc, *eop_desc;
3578	struct ifnet   *ifp = adapter->ifp;
3579
3580	EM_TX_LOCK_ASSERT(txr);
3581
3582        if (txr->tx_avail == adapter->num_tx_desc)
3583                return (FALSE);
3584
3585        num_avail = txr->tx_avail;
3586        first = txr->next_to_clean;
3587        tx_desc = &txr->tx_base[first];
3588        tx_buffer = &txr->tx_buffers[first];
3589	last = tx_buffer->next_eop;
3590        eop_desc = &txr->tx_base[last];
3591
3592	/*
3593	 * What this does is get the index of the
3594	 * first descriptor AFTER the EOP of the
3595	 * first packet, that way we can do the
3596	 * simple comparison on the inner while loop.
3597	 */
3598	if (++last == adapter->num_tx_desc)
3599 		last = 0;
3600	done = last;
3601
3602        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3603            BUS_DMASYNC_POSTREAD);
3604
3605        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3606		/* We clean the range of the packet */
3607		while (first != done) {
3608                	tx_desc->upper.data = 0;
3609                	tx_desc->lower.data = 0;
3610                	tx_desc->buffer_addr = 0;
3611                	++num_avail;
3612
3613			if (tx_buffer->m_head) {
3614				ifp->if_opackets++;
3615				bus_dmamap_sync(txr->txtag,
3616				    tx_buffer->map,
3617				    BUS_DMASYNC_POSTWRITE);
3618				bus_dmamap_unload(txr->txtag,
3619				    tx_buffer->map);
3620
3621                        	m_freem(tx_buffer->m_head);
3622                        	tx_buffer->m_head = NULL;
3623                	}
3624			tx_buffer->next_eop = -1;
3625			txr->watchdog_time = ticks;
3626
3627	                if (++first == adapter->num_tx_desc)
3628				first = 0;
3629
3630	                tx_buffer = &txr->tx_buffers[first];
3631			tx_desc = &txr->tx_base[first];
3632		}
3633		/* See if we can continue to the next packet */
3634		last = tx_buffer->next_eop;
3635		if (last != -1) {
3636        		eop_desc = &txr->tx_base[last];
3637			/* Get new done point */
3638			if (++last == adapter->num_tx_desc) last = 0;
3639			done = last;
3640		} else
3641			break;
3642        }
3643        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3644            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3645
3646        txr->next_to_clean = first;
3647
3648        /*
3649         * If we have enough room, clear IFF_DRV_OACTIVE to
3650         * tell the stack that it is OK to send packets.
3651         * If there are no pending descriptors, clear the watchdog.
3652         */
3653        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3654                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3655                if (num_avail == adapter->num_tx_desc) {
3656			txr->watchdog_check = FALSE;
3657        		txr->tx_avail = num_avail;
3658			return (FALSE);
3659		}
3660        }
3661
3662        txr->tx_avail = num_avail;
3663	return (TRUE);
3664}
3665
3666
3667/*********************************************************************
3668 *
3669 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3670 *
3671 **********************************************************************/
3672static void
3673em_refresh_mbufs(struct rx_ring *rxr, int limit)
3674{
3675	struct adapter		*adapter = rxr->adapter;
3676	struct mbuf		*m;
3677	bus_dma_segment_t	segs[1];
3678	bus_dmamap_t		map;
3679	struct em_buffer	*rxbuf;
3680	int			i, error, nsegs, cleaned;
3681
3682	i = rxr->next_to_refresh;
3683	cleaned = -1;
3684	while (i != limit) {
3685		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3686		if (m == NULL)
3687			goto update;
3688		m->m_len = m->m_pkthdr.len = MCLBYTES;
3689
3690		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3691			m_adj(m, ETHER_ALIGN);
3692
3693		/*
3694		 * Using memory from the mbuf cluster pool, invoke the
3695		 * bus_dma machinery to arrange the memory mapping.
3696		 */
3697		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3698		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3699		if (error != 0) {
3700			m_free(m);
3701			goto update;
3702		}
3703
3704		/* If nsegs is wrong then the stack is corrupt. */
3705		KASSERT(nsegs == 1, ("Too many segments returned!"));
3706
3707		rxbuf = &rxr->rx_buffers[i];
3708		if (rxbuf->m_head != NULL)
3709			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3710
3711		map = rxbuf->map;
3712		rxbuf->map = rxr->rx_sparemap;
3713		rxr->rx_sparemap = map;
3714		bus_dmamap_sync(rxr->rxtag,
3715		    rxbuf->map, BUS_DMASYNC_PREREAD);
3716		rxbuf->m_head = m;
3717		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3718
3719		cleaned = i;
3720		/* Calculate next index */
3721		if (++i == adapter->num_rx_desc)
3722			i = 0;
3723		/* This is the work marker for refresh */
3724		rxr->next_to_refresh = i;
3725	}
3726update:
3727	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3728	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3729	if (cleaned != -1) /* Update tail index */
3730		E1000_WRITE_REG(&adapter->hw,
3731		    E1000_RDT(rxr->me), cleaned);
3732
3733	return;
3734}
3735
3736
3737/*********************************************************************
3738 *
3739 *  Allocate memory for rx_buffer structures. Since we use one
3740 *  rx_buffer per received packet, the maximum number of rx_buffer's
3741 *  that we'll need is equal to the number of receive descriptors
3742 *  that we've allocated.
3743 *
3744 **********************************************************************/
3745static int
3746em_allocate_receive_buffers(struct rx_ring *rxr)
3747{
3748	struct adapter		*adapter = rxr->adapter;
3749	device_t		dev = adapter->dev;
3750	struct em_buffer	*rxbuf;
3751	int			error;
3752
3753	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3754	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3755	if (rxr->rx_buffers == NULL) {
3756		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3757		return (ENOMEM);
3758	}
3759
3760	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3761				1, 0,			/* alignment, bounds */
3762				BUS_SPACE_MAXADDR,	/* lowaddr */
3763				BUS_SPACE_MAXADDR,	/* highaddr */
3764				NULL, NULL,		/* filter, filterarg */
3765				MCLBYTES,		/* maxsize */
3766				1,			/* nsegments */
3767				MCLBYTES,		/* maxsegsize */
3768				0,			/* flags */
3769				NULL,			/* lockfunc */
3770				NULL,			/* lockarg */
3771				&rxr->rxtag);
3772	if (error) {
3773		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3774		    __func__, error);
3775		goto fail;
3776	}
3777
3778	/* Create the spare map (used by getbuf) */
3779	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3780	     &rxr->rx_sparemap);
3781	if (error) {
3782		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3783		    __func__, error);
3784		goto fail;
3785	}
3786
3787	rxbuf = rxr->rx_buffers;
3788	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3789		rxbuf = &rxr->rx_buffers[i];
3790		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3791		    &rxbuf->map);
3792		if (error) {
3793			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3794			    __func__, error);
3795			goto fail;
3796		}
3797	}
3798
3799	return (0);
3800
3801fail:
3802	em_free_receive_structures(adapter);
3803	return (error);
3804}
3805
3806
3807/*********************************************************************
3808 *
3809 *  Initialize a receive ring and its buffers.
3810 *
3811 **********************************************************************/
3812static int
3813em_setup_receive_ring(struct rx_ring *rxr)
3814{
3815	struct	adapter 	*adapter = rxr->adapter;
3816	struct em_buffer	*rxbuf;
3817	bus_dma_segment_t	seg[1];
3818	int			rsize, nsegs, error;
3819
3820
3821	/* Clear the ring contents */
3822	EM_RX_LOCK(rxr);
3823	rsize = roundup2(adapter->num_rx_desc *
3824	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3825	bzero((void *)rxr->rx_base, rsize);
3826
3827	/*
3828	** Free current RX buffer structs and their mbufs
3829	*/
3830	for (int i = 0; i < adapter->num_rx_desc; i++) {
3831		rxbuf = &rxr->rx_buffers[i];
3832		if (rxbuf->m_head != NULL) {
3833			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3834			    BUS_DMASYNC_POSTREAD);
3835			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3836			m_freem(rxbuf->m_head);
3837		}
3838	}
3839
3840	/* Now replenish the mbufs */
3841	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3842
3843		rxbuf = &rxr->rx_buffers[j];
3844		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3845		if (rxbuf->m_head == NULL)
3846			return (ENOBUFS);
3847		rxbuf->m_head->m_len = MCLBYTES;
3848		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3849		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3850
3851		/* Get the memory mapping */
3852		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3853		    rxbuf->map, rxbuf->m_head, seg,
3854		    &nsegs, BUS_DMA_NOWAIT);
3855		if (error != 0) {
3856			m_freem(rxbuf->m_head);
3857			rxbuf->m_head = NULL;
3858			return (error);
3859		}
3860		bus_dmamap_sync(rxr->rxtag,
3861		    rxbuf->map, BUS_DMASYNC_PREREAD);
3862
3863		/* Update descriptor */
3864		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3865	}
3866
3867
3868	/* Setup our descriptor indices */
3869	rxr->next_to_check = 0;
3870	rxr->next_to_refresh = 0;
3871
3872	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3873	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3874
3875	EM_RX_UNLOCK(rxr);
3876	return (0);
3877}
3878
3879/*********************************************************************
3880 *
3881 *  Initialize all receive rings.
3882 *
3883 **********************************************************************/
3884static int
3885em_setup_receive_structures(struct adapter *adapter)
3886{
3887	struct rx_ring *rxr = adapter->rx_rings;
3888	int j;
3889
3890	for (j = 0; j < adapter->num_queues; j++, rxr++)
3891		if (em_setup_receive_ring(rxr))
3892			goto fail;
3893
3894	return (0);
3895fail:
3896	/*
3897	 * Free RX buffers allocated so far, we will only handle
3898	 * the rings that completed, the failing case will have
3899	 * cleaned up for itself. 'j' failed, so its the terminus.
3900	 */
3901	for (int i = 0; i < j; ++i) {
3902		rxr = &adapter->rx_rings[i];
3903		for (int n = 0; n < adapter->num_rx_desc; n++) {
3904			struct em_buffer *rxbuf;
3905			rxbuf = &rxr->rx_buffers[n];
3906			if (rxbuf->m_head != NULL) {
3907				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3908			  	  BUS_DMASYNC_POSTREAD);
3909				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3910				m_freem(rxbuf->m_head);
3911				rxbuf->m_head = NULL;
3912			}
3913		}
3914	}
3915
3916	return (ENOBUFS);
3917}
3918
3919/*********************************************************************
3920 *
3921 *  Free all receive rings.
3922 *
3923 **********************************************************************/
3924static void
3925em_free_receive_structures(struct adapter *adapter)
3926{
3927	struct rx_ring *rxr = adapter->rx_rings;
3928
3929	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3930		em_free_receive_buffers(rxr);
3931		/* Free the ring memory as well */
3932		em_dma_free(adapter, &rxr->rxdma);
3933		EM_RX_LOCK_DESTROY(rxr);
3934	}
3935
3936	free(adapter->rx_rings, M_DEVBUF);
3937}
3938
3939
3940/*********************************************************************
3941 *
3942 *  Free receive ring data structures
3943 *
3944 **********************************************************************/
3945static void
3946em_free_receive_buffers(struct rx_ring *rxr)
3947{
3948	struct adapter		*adapter = rxr->adapter;
3949	struct em_buffer	*rxbuf = NULL;
3950
3951	INIT_DEBUGOUT("free_receive_buffers: begin");
3952
3953	if (rxr->rx_sparemap) {
3954		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3955		rxr->rx_sparemap = NULL;
3956	}
3957
3958	if (rxr->rx_buffers != NULL) {
3959		for (int i = 0; i < adapter->num_rx_desc; i++) {
3960			rxbuf = &rxr->rx_buffers[i];
3961			if (rxbuf->map != NULL) {
3962				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3963				    BUS_DMASYNC_POSTREAD);
3964				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3965				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3966			}
3967			if (rxbuf->m_head != NULL) {
3968				m_freem(rxbuf->m_head);
3969				rxbuf->m_head = NULL;
3970			}
3971		}
3972		free(rxr->rx_buffers, M_DEVBUF);
3973		rxr->rx_buffers = NULL;
3974	}
3975
3976	if (rxr->rxtag != NULL) {
3977		bus_dma_tag_destroy(rxr->rxtag);
3978		rxr->rxtag = NULL;
3979	}
3980
3981	return;
3982}
3983
3984
3985/*********************************************************************
3986 *
3987 *  Enable receive unit.
3988 *
3989 **********************************************************************/
3990#define MAX_INTS_PER_SEC	8000
3991#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3992
3993static void
3994em_initialize_receive_unit(struct adapter *adapter)
3995{
3996	struct rx_ring	*rxr = adapter->rx_rings;
3997	struct ifnet	*ifp = adapter->ifp;
3998	struct e1000_hw	*hw = &adapter->hw;
3999	u64	bus_addr;
4000	u32	rctl, rxcsum;
4001
4002	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4003
4004	/*
4005	 * Make sure receives are disabled while setting
4006	 * up the descriptor ring
4007	 */
4008	rctl = E1000_READ_REG(hw, E1000_RCTL);
4009	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4010
4011	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4012	    adapter->rx_abs_int_delay.value);
4013	/*
4014	 * Set the interrupt throttling rate. Value is calculated
4015	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4016	 */
4017	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4018
4019	/*
4020	** When using MSIX interrupts we need to throttle
4021	** using the EITR register (82574 only)
4022	*/
4023	if (hw->mac.type == e1000_82574)
4024		for (int i = 0; i < 4; i++)
4025			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4026			    DEFAULT_ITR);
4027
4028	/* Disable accelerated ackknowledge */
4029	if (adapter->hw.mac.type == e1000_82574)
4030		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4031
4032	if (ifp->if_capenable & IFCAP_RXCSUM) {
4033		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4034		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4035		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4036	}
4037
4038	/*
4039	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4040	** long latencies are observed, like Lenovo X60. This
4041	** change eliminates the problem, but since having positive
4042	** values in RDTR is a known source of problems on other
4043	** platforms another solution is being sought.
4044	*/
4045	if (hw->mac.type == e1000_82573)
4046		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4047
4048	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4049		/* Setup the Base and Length of the Rx Descriptor Ring */
4050		bus_addr = rxr->rxdma.dma_paddr;
4051		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4052		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4053		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4054		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4055		/* Setup the Head and Tail Descriptor Pointers */
4056		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4057		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4058	}
4059
4060	/* Setup the Receive Control Register */
4061	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4062	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4063	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4064	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4065
4066        /* Strip the CRC */
4067        rctl |= E1000_RCTL_SECRC;
4068
4069        /* Make sure VLAN Filters are off */
4070        rctl &= ~E1000_RCTL_VFE;
4071	rctl &= ~E1000_RCTL_SBP;
4072	rctl |= E1000_RCTL_SZ_2048;
4073	if (ifp->if_mtu > ETHERMTU)
4074		rctl |= E1000_RCTL_LPE;
4075	else
4076		rctl &= ~E1000_RCTL_LPE;
4077
4078	/* Write out the settings */
4079	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4080
4081	return;
4082}
4083
4084
4085/*********************************************************************
4086 *
4087 *  This routine executes in interrupt context. It replenishes
4088 *  the mbufs in the descriptor and sends data which has been
4089 *  dma'ed into host memory to upper layer.
4090 *
4091 *  We loop at most count times if count is > 0, or until done if
4092 *  count < 0.
4093 *
4094 *  For polling we also now return the number of cleaned packets
4095 *********************************************************************/
4096static bool
4097em_rxeof(struct rx_ring *rxr, int count, int *done)
4098{
4099	struct adapter		*adapter = rxr->adapter;
4100	struct ifnet		*ifp = adapter->ifp;
4101	struct mbuf		*mp, *sendmp;
4102	u8			status = 0;
4103	u16 			len;
4104	int			i, processed, rxdone = 0;
4105	bool			eop;
4106	struct e1000_rx_desc	*cur;
4107
4108	EM_RX_LOCK(rxr);
4109
4110	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4111
4112		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4113			break;
4114
4115		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4116		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4117
4118		cur = &rxr->rx_base[i];
4119		status = cur->status;
4120		mp = sendmp = NULL;
4121
4122		if ((status & E1000_RXD_STAT_DD) == 0)
4123			break;
4124
4125		len = le16toh(cur->length);
4126		eop = (status & E1000_RXD_STAT_EOP) != 0;
4127		count--;
4128
4129		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4130
4131			/* Assign correct length to the current fragment */
4132			mp = rxr->rx_buffers[i].m_head;
4133			mp->m_len = len;
4134
4135			if (rxr->fmp == NULL) {
4136				mp->m_pkthdr.len = len;
4137				rxr->fmp = mp; /* Store the first mbuf */
4138				rxr->lmp = mp;
4139			} else {
4140				/* Chain mbuf's together */
4141				mp->m_flags &= ~M_PKTHDR;
4142				rxr->lmp->m_next = mp;
4143				rxr->lmp = rxr->lmp->m_next;
4144				rxr->fmp->m_pkthdr.len += len;
4145			}
4146
4147			if (eop) {
4148				rxr->fmp->m_pkthdr.rcvif = ifp;
4149				ifp->if_ipackets++;
4150				em_receive_checksum(cur, rxr->fmp);
4151#ifndef __NO_STRICT_ALIGNMENT
4152				if (adapter->max_frame_size >
4153				    (MCLBYTES - ETHER_ALIGN) &&
4154				    em_fixup_rx(rxr) != 0)
4155					goto skip;
4156#endif
4157				if (status & E1000_RXD_STAT_VP) {
4158					rxr->fmp->m_pkthdr.ether_vtag =
4159					    (le16toh(cur->special) &
4160					    E1000_RXD_SPC_VLAN_MASK);
4161					rxr->fmp->m_flags |= M_VLANTAG;
4162				}
4163#ifdef EM_MULTIQUEUE
4164				rxr->fmp->m_pkthdr.flowid = curcpu;
4165				rxr->fmp->m_flags |= M_FLOWID;
4166#endif
4167#ifndef __NO_STRICT_ALIGNMENT
4168skip:
4169#endif
4170				sendmp = rxr->fmp;
4171				rxr->fmp = NULL;
4172				rxr->lmp = NULL;
4173			}
4174		} else {
4175			ifp->if_ierrors++;
4176			/* Reuse loaded DMA map and just update mbuf chain */
4177			mp = rxr->rx_buffers[i].m_head;
4178			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4179			mp->m_data = mp->m_ext.ext_buf;
4180			mp->m_next = NULL;
4181			if (adapter->max_frame_size <=
4182			    (MCLBYTES - ETHER_ALIGN))
4183				m_adj(mp, ETHER_ALIGN);
4184			if (rxr->fmp != NULL) {
4185				m_freem(rxr->fmp);
4186				rxr->fmp = NULL;
4187				rxr->lmp = NULL;
4188			}
4189			sendmp = NULL;
4190		}
4191
4192		/* Zero out the receive descriptors status. */
4193		cur->status = 0;
4194		++rxdone;	/* cumulative for POLL */
4195		++processed;
4196
4197		/* Advance our pointers to the next descriptor. */
4198		if (++i == adapter->num_rx_desc)
4199			i = 0;
4200
4201		/* Send to the stack */
4202		if (sendmp != NULL) {
4203			rxr->next_to_check = i;
4204			EM_RX_UNLOCK(rxr);
4205			(*ifp->if_input)(ifp, sendmp);
4206			EM_RX_LOCK(rxr);
4207			i = rxr->next_to_check;
4208		}
4209
4210		/* Only refresh mbufs every 8 descriptors */
4211		if (processed == 8) {
4212			em_refresh_mbufs(rxr, i);
4213			processed = 0;
4214		}
4215	}
4216
4217	/* Catch any remaining refresh work */
4218	if (processed != 0) {
4219		em_refresh_mbufs(rxr, i);
4220		processed = 0;
4221	}
4222
4223	rxr->next_to_check = i;
4224	if (done != NULL)
4225		*done = rxdone;
4226	EM_RX_UNLOCK(rxr);
4227
4228	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4229}
4230
4231#ifndef __NO_STRICT_ALIGNMENT
4232/*
4233 * When jumbo frames are enabled we should realign entire payload on
4234 * architecures with strict alignment. This is serious design mistake of 8254x
4235 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4236 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4237 * payload. On architecures without strict alignment restrictions 8254x still
4238 * performs unaligned memory access which would reduce the performance too.
4239 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4240 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4241 * existing mbuf chain.
4242 *
4243 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4244 * not used at all on architectures with strict alignment.
4245 */
4246static int
4247em_fixup_rx(struct rx_ring *rxr)
4248{
4249	struct adapter *adapter = rxr->adapter;
4250	struct mbuf *m, *n;
4251	int error;
4252
4253	error = 0;
4254	m = rxr->fmp;
4255	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4256		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4257		m->m_data += ETHER_HDR_LEN;
4258	} else {
4259		MGETHDR(n, M_DONTWAIT, MT_DATA);
4260		if (n != NULL) {
4261			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4262			m->m_data += ETHER_HDR_LEN;
4263			m->m_len -= ETHER_HDR_LEN;
4264			n->m_len = ETHER_HDR_LEN;
4265			M_MOVE_PKTHDR(n, m);
4266			n->m_next = m;
4267			rxr->fmp = n;
4268		} else {
4269			adapter->dropped_pkts++;
4270			m_freem(rxr->fmp);
4271			rxr->fmp = NULL;
4272			error = ENOMEM;
4273		}
4274	}
4275
4276	return (error);
4277}
4278#endif
4279
4280/*********************************************************************
4281 *
4282 *  Verify that the hardware indicated that the checksum is valid.
4283 *  Inform the stack about the status of checksum so that stack
4284 *  doesn't spend time verifying the checksum.
4285 *
4286 *********************************************************************/
4287static void
4288em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4289{
4290	/* Ignore Checksum bit is set */
4291	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4292		mp->m_pkthdr.csum_flags = 0;
4293		return;
4294	}
4295
4296	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4297		/* Did it pass? */
4298		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4299			/* IP Checksum Good */
4300			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4301			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4302
4303		} else {
4304			mp->m_pkthdr.csum_flags = 0;
4305		}
4306	}
4307
4308	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4309		/* Did it pass? */
4310		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4311			mp->m_pkthdr.csum_flags |=
4312			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4313			mp->m_pkthdr.csum_data = htons(0xffff);
4314		}
4315	}
4316}
4317
4318/*
4319 * This routine is run via an vlan
4320 * config EVENT
4321 */
4322static void
4323em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4324{
4325	struct adapter	*adapter = ifp->if_softc;
4326	u32		index, bit;
4327
4328	if (ifp->if_softc !=  arg)   /* Not our event */
4329		return;
4330
4331	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4332                return;
4333
4334	index = (vtag >> 5) & 0x7F;
4335	bit = vtag & 0x1F;
4336	em_shadow_vfta[index] |= (1 << bit);
4337	++adapter->num_vlans;
4338	/* Re-init to load the changes */
4339	em_init(adapter);
4340}
4341
4342/*
4343 * This routine is run via an vlan
4344 * unconfig EVENT
4345 */
4346static void
4347em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4348{
4349	struct adapter	*adapter = ifp->if_softc;
4350	u32		index, bit;
4351
4352	if (ifp->if_softc !=  arg)
4353		return;
4354
4355	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4356                return;
4357
4358	index = (vtag >> 5) & 0x7F;
4359	bit = vtag & 0x1F;
4360	em_shadow_vfta[index] &= ~(1 << bit);
4361	--adapter->num_vlans;
4362	/* Re-init to load the changes */
4363	em_init(adapter);
4364}
4365
4366static void
4367em_setup_vlan_hw_support(struct adapter *adapter)
4368{
4369	struct e1000_hw *hw = &adapter->hw;
4370	u32             reg;
4371
4372	/*
4373	** We get here thru init_locked, meaning
4374	** a soft reset, this has already cleared
4375	** the VFTA and other state, so if there
4376	** have been no vlan's registered do nothing.
4377	*/
4378	if (adapter->num_vlans == 0)
4379                return;
4380
4381	/*
4382	** A soft reset zero's out the VFTA, so
4383	** we need to repopulate it now.
4384	*/
4385	for (int i = 0; i < EM_VFTA_SIZE; i++)
4386                if (em_shadow_vfta[i] != 0)
4387			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4388                            i, em_shadow_vfta[i]);
4389
4390	reg = E1000_READ_REG(hw, E1000_CTRL);
4391	reg |= E1000_CTRL_VME;
4392	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4393
4394	/* Enable the Filter Table */
4395	reg = E1000_READ_REG(hw, E1000_RCTL);
4396	reg &= ~E1000_RCTL_CFIEN;
4397	reg |= E1000_RCTL_VFE;
4398	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4399
4400	/* Update the frame size */
4401	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4402	    adapter->max_frame_size + VLAN_TAG_SIZE);
4403}
4404
4405static void
4406em_enable_intr(struct adapter *adapter)
4407{
4408	struct e1000_hw *hw = &adapter->hw;
4409	u32 ims_mask = IMS_ENABLE_MASK;
4410
4411	if (hw->mac.type == e1000_82574) {
4412		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4413		ims_mask |= EM_MSIX_MASK;
4414	}
4415	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4416}
4417
4418static void
4419em_disable_intr(struct adapter *adapter)
4420{
4421	struct e1000_hw *hw = &adapter->hw;
4422
4423	if (hw->mac.type == e1000_82574)
4424		E1000_WRITE_REG(hw, EM_EIAC, 0);
4425	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4426}
4427
4428/*
4429 * Bit of a misnomer, what this really means is
4430 * to enable OS management of the system... aka
4431 * to disable special hardware management features
4432 */
4433static void
4434em_init_manageability(struct adapter *adapter)
4435{
4436	/* A shared code workaround */
4437#define E1000_82542_MANC2H E1000_MANC2H
4438	if (adapter->has_manage) {
4439		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4440		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4441
4442		/* disable hardware interception of ARP */
4443		manc &= ~(E1000_MANC_ARP_EN);
4444
4445                /* enable receiving management packets to the host */
4446		manc |= E1000_MANC_EN_MNG2HOST;
4447#define E1000_MNG2HOST_PORT_623 (1 << 5)
4448#define E1000_MNG2HOST_PORT_664 (1 << 6)
4449		manc2h |= E1000_MNG2HOST_PORT_623;
4450		manc2h |= E1000_MNG2HOST_PORT_664;
4451		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4452		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4453	}
4454}
4455
4456/*
4457 * Give control back to hardware management
4458 * controller if there is one.
4459 */
4460static void
4461em_release_manageability(struct adapter *adapter)
4462{
4463	if (adapter->has_manage) {
4464		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4465
4466		/* re-enable hardware interception of ARP */
4467		manc |= E1000_MANC_ARP_EN;
4468		manc &= ~E1000_MANC_EN_MNG2HOST;
4469
4470		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4471	}
4472}
4473
4474/*
4475 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4476 * For ASF and Pass Through versions of f/w this means
4477 * that the driver is loaded. For AMT version type f/w
4478 * this means that the network i/f is open.
4479 */
4480static void
4481em_get_hw_control(struct adapter *adapter)
4482{
4483	u32 ctrl_ext, swsm;
4484
4485	if (adapter->hw.mac.type == e1000_82573) {
4486		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4487		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4488		    swsm | E1000_SWSM_DRV_LOAD);
4489		return;
4490	}
4491	/* else */
4492	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4493	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4494	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4495	return;
4496}
4497
4498/*
4499 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4500 * For ASF and Pass Through versions of f/w this means that
4501 * the driver is no longer loaded. For AMT versions of the
4502 * f/w this means that the network i/f is closed.
4503 */
4504static void
4505em_release_hw_control(struct adapter *adapter)
4506{
4507	u32 ctrl_ext, swsm;
4508
4509	if (!adapter->has_manage)
4510		return;
4511
4512	if (adapter->hw.mac.type == e1000_82573) {
4513		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4514		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4515		    swsm & ~E1000_SWSM_DRV_LOAD);
4516		return;
4517	}
4518	/* else */
4519	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4520	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4521	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4522	return;
4523}
4524
4525static int
4526em_is_valid_ether_addr(u8 *addr)
4527{
4528	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4529
4530	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4531		return (FALSE);
4532	}
4533
4534	return (TRUE);
4535}
4536
4537/*
4538** Parse the interface capabilities with regard
4539** to both system management and wake-on-lan for
4540** later use.
4541*/
4542static void
4543em_get_wakeup(device_t dev)
4544{
4545	struct adapter	*adapter = device_get_softc(dev);
4546	u16		eeprom_data = 0, device_id, apme_mask;
4547
4548	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4549	apme_mask = EM_EEPROM_APME;
4550
4551	switch (adapter->hw.mac.type) {
4552	case e1000_82573:
4553	case e1000_82583:
4554		adapter->has_amt = TRUE;
4555		/* Falls thru */
4556	case e1000_82571:
4557	case e1000_82572:
4558	case e1000_80003es2lan:
4559		if (adapter->hw.bus.func == 1) {
4560			e1000_read_nvm(&adapter->hw,
4561			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4562			break;
4563		} else
4564			e1000_read_nvm(&adapter->hw,
4565			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4566		break;
4567	case e1000_ich8lan:
4568	case e1000_ich9lan:
4569	case e1000_ich10lan:
4570	case e1000_pchlan:
4571		apme_mask = E1000_WUC_APME;
4572		adapter->has_amt = TRUE;
4573		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4574		break;
4575	default:
4576		e1000_read_nvm(&adapter->hw,
4577		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4578		break;
4579	}
4580	if (eeprom_data & apme_mask)
4581		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4582	/*
4583         * We have the eeprom settings, now apply the special cases
4584         * where the eeprom may be wrong or the board won't support
4585         * wake on lan on a particular port
4586	 */
4587	device_id = pci_get_device(dev);
4588        switch (device_id) {
4589	case E1000_DEV_ID_82571EB_FIBER:
4590		/* Wake events only supported on port A for dual fiber
4591		 * regardless of eeprom setting */
4592		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4593		    E1000_STATUS_FUNC_1)
4594			adapter->wol = 0;
4595		break;
4596	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4597	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4598	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4599                /* if quad port adapter, disable WoL on all but port A */
4600		if (global_quad_port_a != 0)
4601			adapter->wol = 0;
4602		/* Reset for multiple quad port adapters */
4603		if (++global_quad_port_a == 4)
4604			global_quad_port_a = 0;
4605                break;
4606	}
4607	return;
4608}
4609
4610
4611/*
4612 * Enable PCI Wake On Lan capability
4613 */
4614static void
4615em_enable_wakeup(device_t dev)
4616{
4617	struct adapter	*adapter = device_get_softc(dev);
4618	struct ifnet	*ifp = adapter->ifp;
4619	u32		pmc, ctrl, ctrl_ext, rctl;
4620	u16     	status;
4621
4622	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4623		return;
4624
4625	/* Advertise the wakeup capability */
4626	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4627	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4628	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4629	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4630
4631	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4632	    (adapter->hw.mac.type == e1000_pchlan) ||
4633	    (adapter->hw.mac.type == e1000_ich9lan) ||
4634	    (adapter->hw.mac.type == e1000_ich10lan)) {
4635		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4636		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4637	}
4638
4639	/* Keep the laser running on Fiber adapters */
4640	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4641	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4642		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4643		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4644		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4645	}
4646
4647	/*
4648	** Determine type of Wakeup: note that wol
4649	** is set with all bits on by default.
4650	*/
4651	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4652		adapter->wol &= ~E1000_WUFC_MAG;
4653
4654	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4655		adapter->wol &= ~E1000_WUFC_MC;
4656	else {
4657		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4658		rctl |= E1000_RCTL_MPE;
4659		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4660	}
4661
4662	if (adapter->hw.mac.type == e1000_pchlan) {
4663		if (em_enable_phy_wakeup(adapter))
4664			return;
4665	} else {
4666		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4667		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4668	}
4669
4670	if (adapter->hw.phy.type == e1000_phy_igp_3)
4671		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4672
4673        /* Request PME */
4674        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4675	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4676	if (ifp->if_capenable & IFCAP_WOL)
4677		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4678        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4679
4680	return;
4681}
4682
4683/*
4684** WOL in the newer chipset interfaces (pchlan)
4685** require thing to be copied into the phy
4686*/
4687static int
4688em_enable_phy_wakeup(struct adapter *adapter)
4689{
4690	struct e1000_hw *hw = &adapter->hw;
4691	u32 mreg, ret = 0;
4692	u16 preg;
4693
4694	/* copy MAC RARs to PHY RARs */
4695	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4696		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4697		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4698		e1000_write_phy_reg(hw, BM_RAR_M(i),
4699		    (u16)((mreg >> 16) & 0xFFFF));
4700		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4701		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4702		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4703		    (u16)((mreg >> 16) & 0xFFFF));
4704	}
4705
4706	/* copy MAC MTA to PHY MTA */
4707	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4708		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4709		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4710		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4711		    (u16)((mreg >> 16) & 0xFFFF));
4712	}
4713
4714	/* configure PHY Rx Control register */
4715	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4716	mreg = E1000_READ_REG(hw, E1000_RCTL);
4717	if (mreg & E1000_RCTL_UPE)
4718		preg |= BM_RCTL_UPE;
4719	if (mreg & E1000_RCTL_MPE)
4720		preg |= BM_RCTL_MPE;
4721	preg &= ~(BM_RCTL_MO_MASK);
4722	if (mreg & E1000_RCTL_MO_3)
4723		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4724				<< BM_RCTL_MO_SHIFT);
4725	if (mreg & E1000_RCTL_BAM)
4726		preg |= BM_RCTL_BAM;
4727	if (mreg & E1000_RCTL_PMCF)
4728		preg |= BM_RCTL_PMCF;
4729	mreg = E1000_READ_REG(hw, E1000_CTRL);
4730	if (mreg & E1000_CTRL_RFCE)
4731		preg |= BM_RCTL_RFCE;
4732	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4733
4734	/* enable PHY wakeup in MAC register */
4735	E1000_WRITE_REG(hw, E1000_WUC,
4736	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4737	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4738
4739	/* configure and enable PHY wakeup in PHY registers */
4740	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4741	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4742
4743	/* activate PHY wakeup */
4744	ret = hw->phy.ops.acquire(hw);
4745	if (ret) {
4746		printf("Could not acquire PHY\n");
4747		return ret;
4748	}
4749	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4750	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4751	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4752	if (ret) {
4753		printf("Could not read PHY page 769\n");
4754		goto out;
4755	}
4756	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4757	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4758	if (ret)
4759		printf("Could not set PHY Host Wakeup bit\n");
4760out:
4761	hw->phy.ops.release(hw);
4762
4763	return ret;
4764}
4765
4766static void
4767em_led_func(void *arg, int onoff)
4768{
4769	struct adapter	*adapter = arg;
4770
4771	EM_CORE_LOCK(adapter);
4772	if (onoff) {
4773		e1000_setup_led(&adapter->hw);
4774		e1000_led_on(&adapter->hw);
4775	} else {
4776		e1000_led_off(&adapter->hw);
4777		e1000_cleanup_led(&adapter->hw);
4778	}
4779	EM_CORE_UNLOCK(adapter);
4780}
4781
4782/**********************************************************************
4783 *
4784 *  Update the board statistics counters.
4785 *
4786 **********************************************************************/
4787static void
4788em_update_stats_counters(struct adapter *adapter)
4789{
4790	struct ifnet   *ifp;
4791
4792	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4793	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4794		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4795		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4796	}
4797	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4798	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4799	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4800	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4801
4802	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4803	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4804	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4805	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4806	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4807	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4808	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4809	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4810	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4811	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4812	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4813	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4814	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4815	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4816	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4817	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4818	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4819	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4820	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4821	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4822
4823	/* For the 64-bit byte counters the low dword must be read first. */
4824	/* Both registers clear on the read of the high dword */
4825
4826	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4827	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4828
4829	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4830	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4831	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4832	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4833	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4834
4835	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4836	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4837
4838	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4839	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4840	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4841	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4842	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4843	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4844	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4845	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4846	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4847	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4848
4849	if (adapter->hw.mac.type >= e1000_82543) {
4850		adapter->stats.algnerrc +=
4851		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4852		adapter->stats.rxerrc +=
4853		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4854		adapter->stats.tncrs +=
4855		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4856		adapter->stats.cexterr +=
4857		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4858		adapter->stats.tsctc +=
4859		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4860		adapter->stats.tsctfc +=
4861		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4862	}
4863	ifp = adapter->ifp;
4864
4865	ifp->if_collisions = adapter->stats.colc;
4866
4867	/* Rx Errors */
4868	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4869	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4870	    adapter->stats.ruc + adapter->stats.roc +
4871	    adapter->stats.mpc + adapter->stats.cexterr;
4872
4873	/* Tx Errors */
4874	ifp->if_oerrors = adapter->stats.ecol +
4875	    adapter->stats.latecol + adapter->watchdog_events;
4876}
4877
4878
4879/*
4880 * Add sysctl variables, one per statistic, to the system.
4881 */
4882static void
4883em_add_hw_stats(struct adapter *adapter)
4884{
4885
4886	device_t dev = adapter->dev;
4887
4888	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4889	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4890	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4891	struct e1000_hw_stats *stats = &adapter->stats;
4892
4893	struct sysctl_oid *stat_node, *int_node, *host_node;
4894	struct sysctl_oid_list *stat_list, *int_list, *host_list;
4895
4896	/* Driver Statistics */
4897	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4898			CTLFLAG_RD, &adapter->link_irq, 0,
4899			"Link MSIX IRQ Handled");
4900	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4901			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4902			 "Std mbuf failed");
4903	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4904			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4905			 "Std mbuf cluster failed");
4906	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4907			CTLFLAG_RD, &adapter->dropped_pkts,
4908			"Driver dropped packets");
4909	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4910			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4911			"Driver tx dma failure in xmit");
4912
4913	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4914			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4915			"Flow Control High Watermark");
4916	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4917			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4918			"Flow Control Low Watermark");
4919
4920	/* MAC stats get the own sub node */
4921
4922	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4923				    CTLFLAG_RD, NULL, "Statistics");
4924	stat_list = SYSCTL_CHILDREN(stat_node);
4925
4926	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4927			CTLFLAG_RD, &stats->ecol,
4928			"Excessive collisions");
4929	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4930			CTLFLAG_RD, &adapter->stats.symerrs,
4931			"Symbol Errors");
4932	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4933			CTLFLAG_RD, &adapter->stats.sec,
4934			"Sequence Errors");
4935	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4936			CTLFLAG_RD, &adapter->stats.dc,
4937			"Defer Count");
4938	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4939			CTLFLAG_RD, &adapter->stats.mpc,
4940			"Missed Packets");
4941	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4942			CTLFLAG_RD, &adapter->stats.rnbc,
4943			"Receive No Buffers");
4944	/* RLEC is inaccurate on some hardware, calculate our own. */
4945/* 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4946/* 			CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4947/* 			"Receive Length Errors"); */
4948
4949	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4950			CTLFLAG_RD, &adapter->stats.rxerrc,
4951			"Receive Errors");
4952	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4953			CTLFLAG_RD, &adapter->stats.crcerrs,
4954			"CRC errors");
4955	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4956			CTLFLAG_RD, &adapter->stats.algnerrc,
4957			"Alignment Errors");
4958	/* On 82575 these are collision counts */
4959	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4960			CTLFLAG_RD, &adapter->stats.cexterr,
4961			"Collision/Carrier extension errors");
4962	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4963			CTLFLAG_RD, &adapter->rx_overruns,
4964			"RX overruns");
4965	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4966			CTLFLAG_RD, &adapter->watchdog_events,
4967			"Watchdog timeouts");
4968	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4969			CTLFLAG_RD, &adapter->stats.xonrxc,
4970			"XON Received");
4971	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4972			CTLFLAG_RD, &adapter->stats.xontxc,
4973			"XON Transmitted");
4974	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4975			CTLFLAG_RD, &adapter->stats.xoffrxc,
4976			"XOFF Received");
4977	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4978			CTLFLAG_RD, &adapter->stats.xofftxc,
4979			"XOFF Transmitted");
4980
4981	/* Packet Reception Stats */
4982	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4983			CTLFLAG_RD, &adapter->stats.tpr,
4984			"Total Packets Received ");
4985	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4986			CTLFLAG_RD, &adapter->stats.gprc,
4987			"Good Packets Received");
4988	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4989			CTLFLAG_RD, &adapter->stats.bprc,
4990			"Broadcast Packets Received");
4991	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4992			CTLFLAG_RD, &adapter->stats.mprc,
4993			"Multicast Packets Received");
4994	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4995			CTLFLAG_RD, &adapter->stats.prc64,
4996			"64 byte frames received ");
4997	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4998			CTLFLAG_RD, &adapter->stats.prc127,
4999			"65-127 byte frames received");
5000	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5001			CTLFLAG_RD, &adapter->stats.prc255,
5002			"128-255 byte frames received");
5003	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5004			CTLFLAG_RD, &adapter->stats.prc511,
5005			"256-511 byte frames received");
5006	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5007			CTLFLAG_RD, &adapter->stats.prc1023,
5008			"512-1023 byte frames received");
5009	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5010			CTLFLAG_RD, &adapter->stats.prc1522,
5011			"1023-1522 byte frames received");
5012 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5013 			CTLFLAG_RD, &adapter->stats.gorc,
5014 			"Good Octets Received");
5015
5016	/* Packet Transmission Stats */
5017 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5018 			CTLFLAG_RD, &adapter->stats.gotc,
5019 			"Good Octest Transmitted");
5020	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5021			CTLFLAG_RD, &adapter->stats.tpt,
5022			"Total Packets Transmitted");
5023	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5024			CTLFLAG_RD, &adapter->stats.gptc,
5025			"Good Packets Transmitted");
5026	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5027			CTLFLAG_RD, &adapter->stats.bptc,
5028			"Broadcast Packets Transmitted");
5029	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5030			CTLFLAG_RD, &adapter->stats.mptc,
5031			"Multicast Packets Transmitted");
5032	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5033			CTLFLAG_RD, &adapter->stats.ptc64,
5034			"64 byte frames transmitted ");
5035	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5036			CTLFLAG_RD, &adapter->stats.ptc127,
5037			"65-127 byte frames transmitted");
5038	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5039			CTLFLAG_RD, &adapter->stats.ptc255,
5040			"128-255 byte frames transmitted");
5041	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5042			CTLFLAG_RD, &adapter->stats.ptc511,
5043			"256-511 byte frames transmitted");
5044	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5045			CTLFLAG_RD, &adapter->stats.ptc1023,
5046			"512-1023 byte frames transmitted");
5047	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5048			CTLFLAG_RD, &adapter->stats.ptc1522,
5049			"1024-1522 byte frames transmitted");
5050	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5051			CTLFLAG_RD, &adapter->stats.tsctc,
5052			"TSO Contexts Transmitted");
5053	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5054			CTLFLAG_RD, &adapter->stats.tsctfc,
5055			"TSO Contexts Failed");
5056
5057
5058	/* Interrupt Stats */
5059
5060	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5061				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5062	int_list = SYSCTL_CHILDREN(int_node);
5063
5064	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5065			CTLFLAG_RD, &adapter->stats.iac,
5066			"Interrupt Assertion Count");
5067
5068	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5069			CTLFLAG_RD, &adapter->stats.icrxptc,
5070			"Interrupt Cause Rx Pkt Timer Expire Count");
5071
5072	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5073			CTLFLAG_RD, &adapter->stats.icrxatc,
5074			"Interrupt Cause Rx Abs Timer Expire Count");
5075
5076	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5077			CTLFLAG_RD, &adapter->stats.ictxptc,
5078			"Interrupt Cause Tx Pkt Timer Expire Count");
5079
5080	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5081			CTLFLAG_RD, &adapter->stats.ictxatc,
5082			"Interrupt Cause Tx Abs Timer Expire Count");
5083
5084	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5085			CTLFLAG_RD, &adapter->stats.ictxqec,
5086			"Interrupt Cause Tx Queue Empty Count");
5087
5088	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5089			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5090			"Interrupt Cause Tx Queue Min Thresh Count");
5091
5092	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5093			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5094			"Interrupt Cause Rx Desc Min Thresh Count");
5095
5096	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5097			CTLFLAG_RD, &adapter->stats.icrxoc,
5098			"Interrupt Cause Receiver Overrun Count");
5099
5100	/* Host to Card Stats */
5101
5102	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5103				    CTLFLAG_RD, NULL,
5104				    "Host to Card Statistics");
5105
5106	host_list = SYSCTL_CHILDREN(host_node);
5107
5108	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5109			CTLFLAG_RD, &adapter->stats.cbtmpc,
5110			"Circuit Breaker Tx Packet Count");
5111
5112	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5113			CTLFLAG_RD, &adapter->stats.htdpmc,
5114			"Host Transmit Discarded Packets");
5115
5116	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5117			CTLFLAG_RD, &adapter->stats.rpthc,
5118			"Rx Packets To Host");
5119
5120	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5121			CTLFLAG_RD, &adapter->stats.cbrmpc,
5122			"Circuit Breaker Rx Packet Count");
5123
5124	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5125			CTLFLAG_RD, &adapter->stats.cbrdpc,
5126			"Circuit Breaker Rx Dropped Count");
5127
5128	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5129			CTLFLAG_RD, &adapter->stats.hgptc,
5130			"Host Good Packets Tx Count");
5131
5132	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5133			CTLFLAG_RD, &adapter->stats.htcbdpc,
5134			"Host Tx Circuit Breaker Dropped Count");
5135
5136	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5137			CTLFLAG_RD, &adapter->stats.hgorc,
5138			"Host Good Octets Received Count");
5139
5140	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5141			CTLFLAG_RD, &adapter->stats.hgotc,
5142			"Host Good Octets Transmit Count");
5143
5144	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5145			CTLFLAG_RD, &adapter->stats.lenerrs,
5146			"Length Errors");
5147
5148	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5149			CTLFLAG_RD, &adapter->stats.scvpc,
5150			"SerDes/SGMII Code Violation Pkt Count");
5151
5152	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5153			CTLFLAG_RD, &adapter->stats.hrmpc,
5154			"Header Redirection Missed Packet Count");
5155
5156
5157
5158}
5159
5160/**********************************************************************
5161 *
5162 *  This routine provides a way to dump out the adapter eeprom,
5163 *  often a useful debug/service tool. This only dumps the first
5164 *  32 words, stuff that matters is in that extent.
5165 *
5166 **********************************************************************/
5167
5168static int
5169em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5170{
5171	struct adapter *adapter;
5172	int error;
5173	int result;
5174
5175	result = -1;
5176	error = sysctl_handle_int(oidp, &result, 0, req);
5177
5178	if (error || !req->newptr)
5179		return (error);
5180
5181	/*
5182	 * This value will cause a hex dump of the
5183	 * first 32 16-bit words of the EEPROM to
5184	 * the screen.
5185	 */
5186	if (result == 1) {
5187		adapter = (struct adapter *)arg1;
5188		em_print_nvm_info(adapter);
5189        }
5190
5191	return (error);
5192}
5193
5194static void
5195em_print_nvm_info(struct adapter *adapter)
5196{
5197	u16	eeprom_data;
5198	int	i, j, row = 0;
5199
5200	/* Its a bit crude, but it gets the job done */
5201	printf("\nInterface EEPROM Dump:\n");
5202	printf("Offset\n0x0000  ");
5203	for (i = 0, j = 0; i < 32; i++, j++) {
5204		if (j == 8) { /* Make the offset block */
5205			j = 0; ++row;
5206			printf("\n0x00%x0  ",row);
5207		}
5208		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5209		printf("%04x ", eeprom_data);
5210	}
5211	printf("\n");
5212}
5213
5214static int
5215em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5216{
5217	struct em_int_delay_info *info;
5218	struct adapter *adapter;
5219	u32 regval;
5220	int error, usecs, ticks;
5221
5222	info = (struct em_int_delay_info *)arg1;
5223	usecs = info->value;
5224	error = sysctl_handle_int(oidp, &usecs, 0, req);
5225	if (error != 0 || req->newptr == NULL)
5226		return (error);
5227	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5228		return (EINVAL);
5229	info->value = usecs;
5230	ticks = EM_USECS_TO_TICKS(usecs);
5231
5232	adapter = info->adapter;
5233
5234	EM_CORE_LOCK(adapter);
5235	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5236	regval = (regval & ~0xffff) | (ticks & 0xffff);
5237	/* Handle a few special cases. */
5238	switch (info->offset) {
5239	case E1000_RDTR:
5240		break;
5241	case E1000_TIDV:
5242		if (ticks == 0) {
5243			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5244			/* Don't write 0 into the TIDV register. */
5245			regval++;
5246		} else
5247			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5248		break;
5249	}
5250	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5251	EM_CORE_UNLOCK(adapter);
5252	return (0);
5253}
5254
5255static void
5256em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5257	const char *description, struct em_int_delay_info *info,
5258	int offset, int value)
5259{
5260	info->adapter = adapter;
5261	info->offset = offset;
5262	info->value = value;
5263	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5264	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5265	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5266	    info, 0, em_sysctl_int_delay, "I", description);
5267}
5268
5269static void
5270em_add_rx_process_limit(struct adapter *adapter, const char *name,
5271	const char *description, int *limit, int value)
5272{
5273	*limit = value;
5274	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5275	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5276	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5277}
5278
5279
5280