if_em.c revision 209959
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 209959 2010-07-12 21:47:30Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static void	em_add_hw_stats(struct adapter *adapter);
234static bool	em_txeof(struct tx_ring *);
235static bool	em_rxeof(struct rx_ring *, int, int *);
236#ifndef __NO_STRICT_ALIGNMENT
237static int	em_fixup_rx(struct rx_ring *);
238#endif
239static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241		    u32 *, u32 *);
242static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243static void	em_set_promisc(struct adapter *);
244static void	em_disable_promisc(struct adapter *);
245static void	em_set_multi(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259static void	em_add_int_delay_sysctl(struct adapter *, const char *,
260		    const char *, struct em_int_delay_info *, int, int);
261/* Management and WOL Support */
262static void	em_init_manageability(struct adapter *);
263static void	em_release_manageability(struct adapter *);
264static void     em_get_hw_control(struct adapter *);
265static void     em_release_hw_control(struct adapter *);
266static void	em_get_wakeup(device_t);
267static void     em_enable_wakeup(device_t);
268static int	em_enable_phy_wakeup(struct adapter *);
269static void	em_led_func(void *, int);
270
271static int	em_irq_fast(void *);
272
273/* MSIX handlers */
274static void	em_msix_tx(void *);
275static void	em_msix_rx(void *);
276static void	em_msix_link(void *);
277static void	em_handle_tx(void *context, int pending);
278static void	em_handle_rx(void *context, int pending);
279static void	em_handle_link(void *context, int pending);
280
281static void	em_add_rx_process_limit(struct adapter *, const char *,
282		    const char *, int *, int);
283
284#ifdef DEVICE_POLLING
285static poll_handler_t em_poll;
286#endif /* POLLING */
287
288/*********************************************************************
289 *  FreeBSD Device Interface Entry Points
290 *********************************************************************/
291
292static device_method_t em_methods[] = {
293	/* Device interface */
294	DEVMETHOD(device_probe, em_probe),
295	DEVMETHOD(device_attach, em_attach),
296	DEVMETHOD(device_detach, em_detach),
297	DEVMETHOD(device_shutdown, em_shutdown),
298	DEVMETHOD(device_suspend, em_suspend),
299	DEVMETHOD(device_resume, em_resume),
300	{0, 0}
301};
302
303static driver_t em_driver = {
304	"em", em_methods, sizeof(struct adapter),
305};
306
307devclass_t em_devclass;
308DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
309MODULE_DEPEND(em, pci, 1, 1, 1);
310MODULE_DEPEND(em, ether, 1, 1, 1);
311
312/*********************************************************************
313 *  Tunable default values.
314 *********************************************************************/
315
316#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
317#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
318#define M_TSO_LEN			66
319
320/* Allow common code without TSO */
321#ifndef CSUM_TSO
322#define CSUM_TSO	0
323#endif
324
325static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
326static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
327TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
328TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
329
330static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
331static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
332TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
333TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
334
335static int em_rxd = EM_DEFAULT_RXD;
336static int em_txd = EM_DEFAULT_TXD;
337TUNABLE_INT("hw.em.rxd", &em_rxd);
338TUNABLE_INT("hw.em.txd", &em_txd);
339
340static int em_smart_pwr_down = FALSE;
341TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
342
343/* Controls whether promiscuous also shows bad packets */
344static int em_debug_sbp = FALSE;
345TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
346
347/* Local controls for MSI/MSIX */
348#ifdef EM_MULTIQUEUE
349static int em_enable_msix = TRUE;
350static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
351#else
352static int em_enable_msix = FALSE;
353static int em_msix_queues = 0; /* disable */
354#endif
355TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
356TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
357
358/* How many packets rxeof tries to clean at a time */
359static int em_rx_process_limit = 100;
360TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
361
362/* Flow control setting - default to FULL */
363static int em_fc_setting = e1000_fc_full;
364TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
365
366/*
367** Shadow VFTA table, this is needed because
368** the real vlan filter table gets cleared during
369** a soft reset and the driver needs to be able
370** to repopulate it.
371*/
372static u32 em_shadow_vfta[EM_VFTA_SIZE];
373
374/* Global used in WOL setup with multiport cards */
375static int global_quad_port_a = 0;
376
377/*********************************************************************
378 *  Device identification routine
379 *
380 *  em_probe determines if the driver should be loaded on
381 *  adapter based on PCI vendor/device id of the adapter.
382 *
383 *  return BUS_PROBE_DEFAULT on success, positive on failure
384 *********************************************************************/
385
386static int
387em_probe(device_t dev)
388{
389	char		adapter_name[60];
390	u16		pci_vendor_id = 0;
391	u16		pci_device_id = 0;
392	u16		pci_subvendor_id = 0;
393	u16		pci_subdevice_id = 0;
394	em_vendor_info_t *ent;
395
396	INIT_DEBUGOUT("em_probe: begin");
397
398	pci_vendor_id = pci_get_vendor(dev);
399	if (pci_vendor_id != EM_VENDOR_ID)
400		return (ENXIO);
401
402	pci_device_id = pci_get_device(dev);
403	pci_subvendor_id = pci_get_subvendor(dev);
404	pci_subdevice_id = pci_get_subdevice(dev);
405
406	ent = em_vendor_info_array;
407	while (ent->vendor_id != 0) {
408		if ((pci_vendor_id == ent->vendor_id) &&
409		    (pci_device_id == ent->device_id) &&
410
411		    ((pci_subvendor_id == ent->subvendor_id) ||
412		    (ent->subvendor_id == PCI_ANY_ID)) &&
413
414		    ((pci_subdevice_id == ent->subdevice_id) ||
415		    (ent->subdevice_id == PCI_ANY_ID))) {
416			sprintf(adapter_name, "%s %s",
417				em_strings[ent->index],
418				em_driver_version);
419			device_set_desc_copy(dev, adapter_name);
420			return (BUS_PROBE_DEFAULT);
421		}
422		ent++;
423	}
424
425	return (ENXIO);
426}
427
428/*********************************************************************
429 *  Device initialization routine
430 *
431 *  The attach entry point is called when the driver is being loaded.
432 *  This routine identifies the type of hardware, allocates all resources
433 *  and initializes the hardware.
434 *
435 *  return 0 on success, positive on failure
436 *********************************************************************/
437
438static int
439em_attach(device_t dev)
440{
441	struct adapter	*adapter;
442	int		error = 0;
443
444	INIT_DEBUGOUT("em_attach: begin");
445
446	adapter = device_get_softc(dev);
447	adapter->dev = adapter->osdep.dev = dev;
448	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449
450	/* SYSCTL stuff */
451	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454	    em_sysctl_nvm_info, "I", "NVM Information");
455
456	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457
458	/* Determine hardware and mac info */
459	em_identify_hardware(adapter);
460
461	/* Setup PCI resources */
462	if (em_allocate_pci_resources(adapter)) {
463		device_printf(dev, "Allocation of PCI resources failed\n");
464		error = ENXIO;
465		goto err_pci;
466	}
467
468	/*
469	** For ICH8 and family we need to
470	** map the flash memory, and this
471	** must happen after the MAC is
472	** identified
473	*/
474	if ((adapter->hw.mac.type == e1000_ich8lan) ||
475	    (adapter->hw.mac.type == e1000_pchlan) ||
476	    (adapter->hw.mac.type == e1000_ich9lan) ||
477	    (adapter->hw.mac.type == e1000_ich10lan)) {
478		int rid = EM_BAR_TYPE_FLASH;
479		adapter->flash = bus_alloc_resource_any(dev,
480		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
481		if (adapter->flash == NULL) {
482			device_printf(dev, "Mapping of Flash failed\n");
483			error = ENXIO;
484			goto err_pci;
485		}
486		/* This is used in the shared code */
487		adapter->hw.flash_address = (u8 *)adapter->flash;
488		adapter->osdep.flash_bus_space_tag =
489		    rman_get_bustag(adapter->flash);
490		adapter->osdep.flash_bus_space_handle =
491		    rman_get_bushandle(adapter->flash);
492	}
493
494	/* Do Shared Code initialization */
495	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
496		device_printf(dev, "Setup of Shared code failed\n");
497		error = ENXIO;
498		goto err_pci;
499	}
500
501	e1000_get_bus_info(&adapter->hw);
502
503	/* Set up some sysctls for the tunable interrupt delays */
504	em_add_int_delay_sysctl(adapter, "rx_int_delay",
505	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
506	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
507	em_add_int_delay_sysctl(adapter, "tx_int_delay",
508	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
509	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
510	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
511	    "receive interrupt delay limit in usecs",
512	    &adapter->rx_abs_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RADV),
514	    em_rx_abs_int_delay_dflt);
515	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
516	    "transmit interrupt delay limit in usecs",
517	    &adapter->tx_abs_int_delay,
518	    E1000_REGISTER(&adapter->hw, E1000_TADV),
519	    em_tx_abs_int_delay_dflt);
520
521	/* Sysctls for limiting the amount of work done in the taskqueue */
522	em_add_rx_process_limit(adapter, "rx_processing_limit",
523	    "max number of rx packets to process", &adapter->rx_process_limit,
524	    em_rx_process_limit);
525
526	/*
527	 * Validate number of transmit and receive descriptors. It
528	 * must not exceed hardware maximum, and must be multiple
529	 * of E1000_DBA_ALIGN.
530	 */
531	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
532	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
533		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
534		    EM_DEFAULT_TXD, em_txd);
535		adapter->num_tx_desc = EM_DEFAULT_TXD;
536	} else
537		adapter->num_tx_desc = em_txd;
538
539	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
540	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
541		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
542		    EM_DEFAULT_RXD, em_rxd);
543		adapter->num_rx_desc = EM_DEFAULT_RXD;
544	} else
545		adapter->num_rx_desc = em_rxd;
546
547	adapter->hw.mac.autoneg = DO_AUTO_NEG;
548	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
549	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
550
551	/* Copper options */
552	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553		adapter->hw.phy.mdix = AUTO_ALL_MODES;
554		adapter->hw.phy.disable_polarity_correction = FALSE;
555		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
556	}
557
558	/*
559	 * Set the frame limits assuming
560	 * standard ethernet sized frames.
561	 */
562	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
563	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
564
565	/*
566	 * This controls when hardware reports transmit completion
567	 * status.
568	 */
569	adapter->hw.mac.report_tx_early = 1;
570
571	/*
572	** Get queue/ring memory
573	*/
574	if (em_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/*
580	** Start from a known state, this is
581	** important in reading the nvm and
582	** mac from that.
583	*/
584	e1000_reset_hw(&adapter->hw);
585
586	/* Make sure we have a good EEPROM before we read from it */
587	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
588		/*
589		** Some PCI-E parts fail the first check due to
590		** the link being in sleep state, call it again,
591		** if it fails a second time its a real issue.
592		*/
593		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
594			device_printf(dev,
595			    "The EEPROM Checksum Is Not Valid\n");
596			error = EIO;
597			goto err_late;
598		}
599	}
600
601	/* Copy the permanent MAC address out of the EEPROM */
602	if (e1000_read_mac_addr(&adapter->hw) < 0) {
603		device_printf(dev, "EEPROM read error while reading MAC"
604		    " address\n");
605		error = EIO;
606		goto err_late;
607	}
608
609	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
610		device_printf(dev, "Invalid MAC address\n");
611		error = EIO;
612		goto err_late;
613	}
614
615	/*
616	**  Do interrupt configuration
617	*/
618	if (adapter->msix > 1) /* Do MSIX */
619		error = em_allocate_msix(adapter);
620	else  /* MSI or Legacy */
621		error = em_allocate_legacy(adapter);
622	if (error)
623		goto err_late;
624
625	/*
626	 * Get Wake-on-Lan and Management info for later use
627	 */
628	em_get_wakeup(dev);
629
630	/* Setup OS specific network interface */
631	em_setup_interface(dev, adapter);
632
633	em_reset(adapter);
634
635	/* Initialize statistics */
636	em_update_stats_counters(adapter);
637
638	adapter->hw.mac.get_link_status = 1;
639	em_update_link_status(adapter);
640
641	/* Indicate SOL/IDER usage */
642	if (e1000_check_reset_block(&adapter->hw))
643		device_printf(dev,
644		    "PHY reset is blocked due to SOL/IDER session.\n");
645
646	/* Register for VLAN events */
647	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
648	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
649	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
650	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651
652	em_add_hw_stats(adapter);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	if (adapter->led_dev != NULL)
709		led_destroy(adapter->led_dev);
710
711	EM_CORE_LOCK(adapter);
712	adapter->in_detach = 1;
713	em_stop(adapter);
714	EM_CORE_UNLOCK(adapter);
715	EM_CORE_LOCK_DESTROY(adapter);
716
717	e1000_phy_hw_reset(&adapter->hw);
718
719	em_release_manageability(adapter);
720	em_release_hw_control(adapter);
721
722	/* Unregister VLAN events */
723	if (adapter->vlan_attach != NULL)
724		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725	if (adapter->vlan_detach != NULL)
726		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
727
728	ether_ifdetach(adapter->ifp);
729	callout_drain(&adapter->timer);
730
731	em_free_pci_resources(adapter);
732	bus_generic_detach(dev);
733	if_free(ifp);
734
735	em_free_transmit_structures(adapter);
736	em_free_receive_structures(adapter);
737
738	em_release_hw_control(adapter);
739
740	return (0);
741}
742
743/*********************************************************************
744 *
745 *  Shutdown entry point
746 *
747 **********************************************************************/
748
749static int
750em_shutdown(device_t dev)
751{
752	return em_suspend(dev);
753}
754
755/*
756 * Suspend/resume device methods.
757 */
758static int
759em_suspend(device_t dev)
760{
761	struct adapter *adapter = device_get_softc(dev);
762
763	EM_CORE_LOCK(adapter);
764
765        em_release_manageability(adapter);
766	em_release_hw_control(adapter);
767	em_enable_wakeup(dev);
768
769	EM_CORE_UNLOCK(adapter);
770
771	return bus_generic_suspend(dev);
772}
773
774static int
775em_resume(device_t dev)
776{
777	struct adapter *adapter = device_get_softc(dev);
778	struct ifnet *ifp = adapter->ifp;
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#ifdef EM_MULTIQUEUE
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815        /* Call cleanup if number of TX descriptors low */
816	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817		em_txeof(txr);
818
819	enq = 0;
820	if (m == NULL) {
821		next = drbr_dequeue(ifp, txr->br);
822	} else if (drbr_needs_enqueue(ifp, txr->br)) {
823		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824			return (err);
825		next = drbr_dequeue(ifp, txr->br);
826	} else
827		next = m;
828
829	/* Process the queue */
830	while (next != NULL) {
831		if ((err = em_xmit(txr, &next)) != 0) {
832                        if (next != NULL)
833                                err = drbr_enqueue(ifp, txr->br, next);
834                        break;
835		}
836		enq++;
837		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838		ETHER_BPF_MTAP(ifp, next);
839		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                        break;
841		if (txr->tx_avail < EM_MAX_SCATTER) {
842			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843			break;
844		}
845		next = drbr_dequeue(ifp, txr->br);
846	}
847
848	if (enq > 0) {
849                /* Set the watchdog */
850                txr->watchdog_check = TRUE;
851		txr->watchdog_time = ticks;
852	}
853	return (err);
854}
855
856/*
857** Multiqueue capable stack interface, this is not
858** yet truely multiqueue, but that is coming...
859*/
860static int
861em_mq_start(struct ifnet *ifp, struct mbuf *m)
862{
863	struct adapter	*adapter = ifp->if_softc;
864	struct tx_ring	*txr;
865	int 		i, error = 0;
866
867	/* Which queue to use */
868	if ((m->m_flags & M_FLOWID) != 0)
869                i = m->m_pkthdr.flowid % adapter->num_queues;
870	else
871		i = curcpu % adapter->num_queues;
872
873	txr = &adapter->tx_rings[i];
874
875	if (EM_TX_TRYLOCK(txr)) {
876		error = em_mq_start_locked(ifp, txr, m);
877		EM_TX_UNLOCK(txr);
878	} else
879		error = drbr_enqueue(ifp, txr->br, m);
880
881	return (error);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888em_qflush(struct ifnet *ifp)
889{
890	struct adapter  *adapter = ifp->if_softc;
891	struct tx_ring  *txr = adapter->tx_rings;
892	struct mbuf     *m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		EM_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		EM_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902
903#endif /* EM_MULTIQUEUE */
904
905static void
906em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907{
908	struct adapter	*adapter = ifp->if_softc;
909	struct mbuf	*m_head;
910
911	EM_TX_LOCK_ASSERT(txr);
912
913	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914	    IFF_DRV_RUNNING)
915		return;
916
917	if (!adapter->link_active)
918		return;
919
920        /* Call cleanup if number of TX descriptors low */
921	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922		em_txeof(txr);
923
924	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925		if (txr->tx_avail < EM_MAX_SCATTER) {
926			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927			break;
928		}
929                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930		if (m_head == NULL)
931			break;
932		/*
933		 *  Encapsulation can modify our pointer, and or make it
934		 *  NULL on failure.  In that event, we can't requeue.
935		 */
936		if (em_xmit(txr, &m_head)) {
937			if (m_head == NULL)
938				break;
939			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941			break;
942		}
943
944		/* Send a copy of the frame to the BPF listener */
945		ETHER_BPF_MTAP(ifp, m_head);
946
947		/* Set timeout in case hardware has problems transmitting. */
948		txr->watchdog_time = ticks;
949		txr->watchdog_check = TRUE;
950	}
951
952	return;
953}
954
955static void
956em_start(struct ifnet *ifp)
957{
958	struct adapter	*adapter = ifp->if_softc;
959	struct tx_ring	*txr = adapter->tx_rings;
960
961	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962		EM_TX_LOCK(txr);
963		em_start_locked(ifp, txr);
964		EM_TX_UNLOCK(txr);
965	}
966	return;
967}
968
969/*********************************************************************
970 *  Ioctl entry point
971 *
972 *  em_ioctl is called when the user wants to configure the
973 *  interface.
974 *
975 *  return 0 on success, positive on failure
976 **********************************************************************/
977
978static int
979em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980{
981	struct adapter	*adapter = ifp->if_softc;
982	struct ifreq *ifr = (struct ifreq *)data;
983#ifdef INET
984	struct ifaddr *ifa = (struct ifaddr *)data;
985#endif
986	int error = 0;
987
988	if (adapter->in_detach)
989		return (error);
990
991	switch (command) {
992	case SIOCSIFADDR:
993#ifdef INET
994		if (ifa->ifa_addr->sa_family == AF_INET) {
995			/*
996			 * XXX
997			 * Since resetting hardware takes a very long time
998			 * and results in link renegotiation we only
999			 * initialize the hardware only when it is absolutely
1000			 * required.
1001			 */
1002			ifp->if_flags |= IFF_UP;
1003			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004				EM_CORE_LOCK(adapter);
1005				em_init_locked(adapter);
1006				EM_CORE_UNLOCK(adapter);
1007			}
1008			arp_ifinit(ifp, ifa);
1009		} else
1010#endif
1011			error = ether_ioctl(ifp, command, data);
1012		break;
1013	case SIOCSIFMTU:
1014	    {
1015		int max_frame_size;
1016
1017		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019		EM_CORE_LOCK(adapter);
1020		switch (adapter->hw.mac.type) {
1021		case e1000_82571:
1022		case e1000_82572:
1023		case e1000_ich9lan:
1024		case e1000_ich10lan:
1025		case e1000_82574:
1026		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1027			max_frame_size = 9234;
1028			break;
1029		case e1000_pchlan:
1030			max_frame_size = 4096;
1031			break;
1032			/* Adapters that do not support jumbo frames */
1033		case e1000_82583:
1034		case e1000_ich8lan:
1035			max_frame_size = ETHER_MAX_LEN;
1036			break;
1037		default:
1038			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039		}
1040		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041		    ETHER_CRC_LEN) {
1042			EM_CORE_UNLOCK(adapter);
1043			error = EINVAL;
1044			break;
1045		}
1046
1047		ifp->if_mtu = ifr->ifr_mtu;
1048		adapter->max_frame_size =
1049		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050		em_init_locked(adapter);
1051		EM_CORE_UNLOCK(adapter);
1052		break;
1053	    }
1054	case SIOCSIFFLAGS:
1055		IOCTL_DEBUGOUT("ioctl rcv'd:\
1056		    SIOCSIFFLAGS (Set Interface Flags)");
1057		EM_CORE_LOCK(adapter);
1058		if (ifp->if_flags & IFF_UP) {
1059			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060				if ((ifp->if_flags ^ adapter->if_flags) &
1061				    (IFF_PROMISC | IFF_ALLMULTI)) {
1062					em_disable_promisc(adapter);
1063					em_set_promisc(adapter);
1064				}
1065			} else
1066				em_init_locked(adapter);
1067		} else
1068			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069				em_stop(adapter);
1070		adapter->if_flags = ifp->if_flags;
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	case SIOCADDMULTI:
1074	case SIOCDELMULTI:
1075		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077			EM_CORE_LOCK(adapter);
1078			em_disable_intr(adapter);
1079			em_set_multi(adapter);
1080#ifdef DEVICE_POLLING
1081			if (!(ifp->if_capenable & IFCAP_POLLING))
1082#endif
1083				em_enable_intr(adapter);
1084			EM_CORE_UNLOCK(adapter);
1085		}
1086		break;
1087	case SIOCSIFMEDIA:
1088		/* Check SOL/IDER usage */
1089		EM_CORE_LOCK(adapter);
1090		if (e1000_check_reset_block(&adapter->hw)) {
1091			EM_CORE_UNLOCK(adapter);
1092			device_printf(adapter->dev, "Media change is"
1093			    " blocked due to SOL/IDER session.\n");
1094			break;
1095		}
1096		EM_CORE_UNLOCK(adapter);
1097	case SIOCGIFMEDIA:
1098		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101		break;
1102	case SIOCSIFCAP:
1103	    {
1104		int mask, reinit;
1105
1106		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107		reinit = 0;
1108		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109#ifdef DEVICE_POLLING
1110		if (mask & IFCAP_POLLING) {
1111			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112				error = ether_poll_register(em_poll, ifp);
1113				if (error)
1114					return (error);
1115				EM_CORE_LOCK(adapter);
1116				em_disable_intr(adapter);
1117				ifp->if_capenable |= IFCAP_POLLING;
1118				EM_CORE_UNLOCK(adapter);
1119			} else {
1120				error = ether_poll_deregister(ifp);
1121				/* Enable interrupt even in error case */
1122				EM_CORE_LOCK(adapter);
1123				em_enable_intr(adapter);
1124				ifp->if_capenable &= ~IFCAP_POLLING;
1125				EM_CORE_UNLOCK(adapter);
1126			}
1127		}
1128#endif
1129		if (mask & IFCAP_HWCSUM) {
1130			ifp->if_capenable ^= IFCAP_HWCSUM;
1131			reinit = 1;
1132		}
1133		if (mask & IFCAP_TSO4) {
1134			ifp->if_capenable ^= IFCAP_TSO4;
1135			reinit = 1;
1136		}
1137		if (mask & IFCAP_VLAN_HWTAGGING) {
1138			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139			reinit = 1;
1140		}
1141		if (mask & IFCAP_VLAN_HWFILTER) {
1142			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143			reinit = 1;
1144		}
1145		if ((mask & IFCAP_WOL) &&
1146		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147			if (mask & IFCAP_WOL_MCAST)
1148				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149			if (mask & IFCAP_WOL_MAGIC)
1150				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151		}
1152		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153			em_init(adapter);
1154		VLAN_CAPABILITIES(ifp);
1155		break;
1156	    }
1157
1158	default:
1159		error = ether_ioctl(ifp, command, data);
1160		break;
1161	}
1162
1163	return (error);
1164}
1165
1166
1167/*********************************************************************
1168 *  Init entry point
1169 *
1170 *  This routine is used in two ways. It is used by the stack as
1171 *  init entry point in network interface structure. It is also used
1172 *  by the driver as a hw/sw initialization routine to get to a
1173 *  consistent state.
1174 *
1175 *  return 0 on success, positive on failure
1176 **********************************************************************/
1177
1178static void
1179em_init_locked(struct adapter *adapter)
1180{
1181	struct ifnet	*ifp = adapter->ifp;
1182	device_t	dev = adapter->dev;
1183	u32		pba;
1184
1185	INIT_DEBUGOUT("em_init: begin");
1186
1187	EM_CORE_LOCK_ASSERT(adapter);
1188
1189	em_disable_intr(adapter);
1190	callout_stop(&adapter->timer);
1191
1192	/*
1193	 * Packet Buffer Allocation (PBA)
1194	 * Writing PBA sets the receive portion of the buffer
1195	 * the remainder is used for the transmit buffer.
1196	 */
1197	switch (adapter->hw.mac.type) {
1198	/* Total Packet Buffer on these is 48K */
1199	case e1000_82571:
1200	case e1000_82572:
1201	case e1000_80003es2lan:
1202			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203		break;
1204	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206		break;
1207	case e1000_82574:
1208	case e1000_82583:
1209			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210		break;
1211	case e1000_ich9lan:
1212	case e1000_ich10lan:
1213	case e1000_pchlan:
1214		pba = E1000_PBA_10K;
1215		break;
1216	case e1000_ich8lan:
1217		pba = E1000_PBA_8K;
1218		break;
1219	default:
1220		if (adapter->max_frame_size > 8192)
1221			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222		else
1223			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224	}
1225
1226	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228
1229	/* Get the latest mac address, User can use a LAA */
1230        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231              ETHER_ADDR_LEN);
1232
1233	/* Put the address into the Receive Address Array */
1234	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236	/*
1237	 * With the 82571 adapter, RAR[0] may be overwritten
1238	 * when the other port is reset, we make a duplicate
1239	 * in RAR[14] for that eventuality, this assures
1240	 * the interface continues to function.
1241	 */
1242	if (adapter->hw.mac.type == e1000_82571) {
1243		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245		    E1000_RAR_ENTRIES - 1);
1246	}
1247
1248	/* Initialize the hardware */
1249	em_reset(adapter);
1250	em_update_link_status(adapter);
1251
1252	/* Setup VLAN support, basic and offload if available */
1253	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255	/* Use real VLAN Filter support? */
1256	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258			/* Use real VLAN Filter support */
1259			em_setup_vlan_hw_support(adapter);
1260		else {
1261			u32 ctrl;
1262			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263			ctrl |= E1000_CTRL_VME;
1264			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265		}
1266	}
1267
1268	/* Set hardware offload abilities */
1269	ifp->if_hwassist = 0;
1270	if (ifp->if_capenable & IFCAP_TXCSUM)
1271		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	em_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	em_setup_transmit_structures(adapter);
1280	em_initialize_transmit_unit(adapter);
1281
1282	/* Setup Multicast table */
1283	em_set_multi(adapter);
1284
1285	/* Prepare receive descriptors and buffers */
1286	if (em_setup_receive_structures(adapter)) {
1287		device_printf(dev, "Could not setup receive structures\n");
1288		em_stop(adapter);
1289		return;
1290	}
1291	em_initialize_receive_unit(adapter);
1292
1293	/* Don't lose promiscuous settings */
1294	em_set_promisc(adapter);
1295
1296	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302	/* MSI/X configuration for 82574 */
1303	if (adapter->hw.mac.type == e1000_82574) {
1304		int tmp;
1305		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306		tmp |= E1000_CTRL_EXT_PBA_CLR;
1307		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308		/* Set the IVAR - interrupt vector routing. */
1309		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310	}
1311
1312#ifdef DEVICE_POLLING
1313	/*
1314	 * Only enable interrupts if we are not polling, make sure
1315	 * they are off otherwise.
1316	 */
1317	if (ifp->if_capenable & IFCAP_POLLING)
1318		em_disable_intr(adapter);
1319	else
1320#endif /* DEVICE_POLLING */
1321		em_enable_intr(adapter);
1322
1323	/* AMT based hardware can now take control from firmware */
1324	if (adapter->has_manage && adapter->has_amt)
1325		em_get_hw_control(adapter);
1326
1327	/* Don't reset the phy next time init gets called */
1328	adapter->hw.phy.reset_disable = TRUE;
1329}
1330
1331static void
1332em_init(void *arg)
1333{
1334	struct adapter *adapter = arg;
1335
1336	EM_CORE_LOCK(adapter);
1337	em_init_locked(adapter);
1338	EM_CORE_UNLOCK(adapter);
1339}
1340
1341
1342#ifdef DEVICE_POLLING
1343/*********************************************************************
1344 *
1345 *  Legacy polling routine: note this only works with single queue
1346 *
1347 *********************************************************************/
1348static int
1349em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350{
1351	struct adapter *adapter = ifp->if_softc;
1352	struct tx_ring	*txr = adapter->tx_rings;
1353	struct rx_ring	*rxr = adapter->rx_rings;
1354	u32		reg_icr;
1355	int		rx_done;
1356
1357	EM_CORE_LOCK(adapter);
1358	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1359		EM_CORE_UNLOCK(adapter);
1360		return (0);
1361	}
1362
1363	if (cmd == POLL_AND_CHECK_STATUS) {
1364		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1365		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1366			callout_stop(&adapter->timer);
1367			adapter->hw.mac.get_link_status = 1;
1368			em_update_link_status(adapter);
1369			callout_reset(&adapter->timer, hz,
1370			    em_local_timer, adapter);
1371		}
1372	}
1373	EM_CORE_UNLOCK(adapter);
1374
1375	em_rxeof(rxr, count, &rx_done);
1376
1377	EM_TX_LOCK(txr);
1378	em_txeof(txr);
1379#ifdef EM_MULTIQUEUE
1380	if (!drbr_empty(ifp, txr->br))
1381		em_mq_start_locked(ifp, txr, NULL);
1382#else
1383	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1384		em_start_locked(ifp, txr);
1385#endif
1386	EM_TX_UNLOCK(txr);
1387
1388	return (rx_done);
1389}
1390#endif /* DEVICE_POLLING */
1391
1392
1393/*********************************************************************
1394 *
1395 *  Fast Legacy/MSI Combined Interrupt Service routine
1396 *
1397 *********************************************************************/
1398static int
1399em_irq_fast(void *arg)
1400{
1401	struct adapter	*adapter = arg;
1402	struct ifnet	*ifp;
1403	u32		reg_icr;
1404
1405	ifp = adapter->ifp;
1406
1407	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1408
1409	/* Hot eject?  */
1410	if (reg_icr == 0xffffffff)
1411		return FILTER_STRAY;
1412
1413	/* Definitely not our interrupt.  */
1414	if (reg_icr == 0x0)
1415		return FILTER_STRAY;
1416
1417	/*
1418	 * Starting with the 82571 chip, bit 31 should be used to
1419	 * determine whether the interrupt belongs to us.
1420	 */
1421	if (adapter->hw.mac.type >= e1000_82571 &&
1422	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1423		return FILTER_STRAY;
1424
1425	em_disable_intr(adapter);
1426	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1427
1428	/* Link status change */
1429	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1430		adapter->hw.mac.get_link_status = 1;
1431		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1432	}
1433
1434	if (reg_icr & E1000_ICR_RXO)
1435		adapter->rx_overruns++;
1436	return FILTER_HANDLED;
1437}
1438
1439/* Combined RX/TX handler, used by Legacy and MSI */
1440static void
1441em_handle_que(void *context, int pending)
1442{
1443	struct adapter	*adapter = context;
1444	struct ifnet	*ifp = adapter->ifp;
1445	struct tx_ring	*txr = adapter->tx_rings;
1446	struct rx_ring	*rxr = adapter->rx_rings;
1447	bool		more;
1448
1449
1450	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1451		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1452
1453		EM_TX_LOCK(txr);
1454		if (em_txeof(txr))
1455			more = TRUE;
1456#ifdef EM_MULTIQUEUE
1457		if (!drbr_empty(ifp, txr->br))
1458			em_mq_start_locked(ifp, txr, NULL);
1459#else
1460		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461			em_start_locked(ifp, txr);
1462#endif
1463		EM_TX_UNLOCK(txr);
1464		if (more) {
1465			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1466			return;
1467		}
1468	}
1469
1470	em_enable_intr(adapter);
1471	return;
1472}
1473
1474
1475/*********************************************************************
1476 *
1477 *  MSIX Interrupt Service Routines
1478 *
1479 **********************************************************************/
1480static void
1481em_msix_tx(void *arg)
1482{
1483	struct tx_ring *txr = arg;
1484	struct adapter *adapter = txr->adapter;
1485	bool		more;
1486
1487	++txr->tx_irq;
1488	EM_TX_LOCK(txr);
1489	more = em_txeof(txr);
1490	EM_TX_UNLOCK(txr);
1491	if (more)
1492		taskqueue_enqueue(txr->tq, &txr->tx_task);
1493	else
1494		/* Reenable this interrupt */
1495		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1496	return;
1497}
1498
1499/*********************************************************************
1500 *
1501 *  MSIX RX Interrupt Service routine
1502 *
1503 **********************************************************************/
1504
1505static void
1506em_msix_rx(void *arg)
1507{
1508	struct rx_ring	*rxr = arg;
1509	struct adapter	*adapter = rxr->adapter;
1510	bool		more;
1511
1512	++rxr->rx_irq;
1513	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1514	if (more)
1515		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1516	else
1517		/* Reenable this interrupt */
1518		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1519	return;
1520}
1521
1522/*********************************************************************
1523 *
1524 *  MSIX Link Fast Interrupt Service routine
1525 *
1526 **********************************************************************/
1527static void
1528em_msix_link(void *arg)
1529{
1530	struct adapter	*adapter = arg;
1531	u32		reg_icr;
1532
1533	++adapter->link_irq;
1534	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1535
1536	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1537		adapter->hw.mac.get_link_status = 1;
1538		em_handle_link(adapter, 0);
1539	} else
1540		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1541		    EM_MSIX_LINK | E1000_IMS_LSC);
1542	return;
1543}
1544
1545static void
1546em_handle_rx(void *context, int pending)
1547{
1548	struct rx_ring	*rxr = context;
1549	struct adapter	*adapter = rxr->adapter;
1550        bool            more;
1551
1552	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1553	if (more)
1554		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1555	else
1556		/* Reenable this interrupt */
1557		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1558}
1559
1560static void
1561em_handle_tx(void *context, int pending)
1562{
1563	struct tx_ring	*txr = context;
1564	struct adapter	*adapter = txr->adapter;
1565	struct ifnet	*ifp = adapter->ifp;
1566
1567	if (!EM_TX_TRYLOCK(txr))
1568		return;
1569
1570	em_txeof(txr);
1571
1572#ifdef EM_MULTIQUEUE
1573	if (!drbr_empty(ifp, txr->br))
1574		em_mq_start_locked(ifp, txr, NULL);
1575#else
1576	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577		em_start_locked(ifp, txr);
1578#endif
1579	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580	EM_TX_UNLOCK(txr);
1581}
1582
1583static void
1584em_handle_link(void *context, int pending)
1585{
1586	struct adapter	*adapter = context;
1587	struct ifnet *ifp = adapter->ifp;
1588
1589	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1590		return;
1591
1592	EM_CORE_LOCK(adapter);
1593	callout_stop(&adapter->timer);
1594	em_update_link_status(adapter);
1595	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1596	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597	    EM_MSIX_LINK | E1000_IMS_LSC);
1598	EM_CORE_UNLOCK(adapter);
1599}
1600
1601
1602/*********************************************************************
1603 *
1604 *  Media Ioctl callback
1605 *
1606 *  This routine is called whenever the user queries the status of
1607 *  the interface using ifconfig.
1608 *
1609 **********************************************************************/
1610static void
1611em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1612{
1613	struct adapter *adapter = ifp->if_softc;
1614	u_char fiber_type = IFM_1000_SX;
1615
1616	INIT_DEBUGOUT("em_media_status: begin");
1617
1618	EM_CORE_LOCK(adapter);
1619	em_update_link_status(adapter);
1620
1621	ifmr->ifm_status = IFM_AVALID;
1622	ifmr->ifm_active = IFM_ETHER;
1623
1624	if (!adapter->link_active) {
1625		EM_CORE_UNLOCK(adapter);
1626		return;
1627	}
1628
1629	ifmr->ifm_status |= IFM_ACTIVE;
1630
1631	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1632	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1633		ifmr->ifm_active |= fiber_type | IFM_FDX;
1634	} else {
1635		switch (adapter->link_speed) {
1636		case 10:
1637			ifmr->ifm_active |= IFM_10_T;
1638			break;
1639		case 100:
1640			ifmr->ifm_active |= IFM_100_TX;
1641			break;
1642		case 1000:
1643			ifmr->ifm_active |= IFM_1000_T;
1644			break;
1645		}
1646		if (adapter->link_duplex == FULL_DUPLEX)
1647			ifmr->ifm_active |= IFM_FDX;
1648		else
1649			ifmr->ifm_active |= IFM_HDX;
1650	}
1651	EM_CORE_UNLOCK(adapter);
1652}
1653
1654/*********************************************************************
1655 *
1656 *  Media Ioctl callback
1657 *
1658 *  This routine is called when the user changes speed/duplex using
1659 *  media/mediopt option with ifconfig.
1660 *
1661 **********************************************************************/
1662static int
1663em_media_change(struct ifnet *ifp)
1664{
1665	struct adapter *adapter = ifp->if_softc;
1666	struct ifmedia  *ifm = &adapter->media;
1667
1668	INIT_DEBUGOUT("em_media_change: begin");
1669
1670	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1671		return (EINVAL);
1672
1673	EM_CORE_LOCK(adapter);
1674	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1675	case IFM_AUTO:
1676		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1677		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1678		break;
1679	case IFM_1000_LX:
1680	case IFM_1000_SX:
1681	case IFM_1000_T:
1682		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1684		break;
1685	case IFM_100_TX:
1686		adapter->hw.mac.autoneg = FALSE;
1687		adapter->hw.phy.autoneg_advertised = 0;
1688		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1689			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1690		else
1691			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1692		break;
1693	case IFM_10_T:
1694		adapter->hw.mac.autoneg = FALSE;
1695		adapter->hw.phy.autoneg_advertised = 0;
1696		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1697			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1698		else
1699			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1700		break;
1701	default:
1702		device_printf(adapter->dev, "Unsupported media type\n");
1703	}
1704
1705	/* As the speed/duplex settings my have changed we need to
1706	 * reset the PHY.
1707	 */
1708	adapter->hw.phy.reset_disable = FALSE;
1709
1710	em_init_locked(adapter);
1711	EM_CORE_UNLOCK(adapter);
1712
1713	return (0);
1714}
1715
1716/*********************************************************************
1717 *
1718 *  This routine maps the mbufs to tx descriptors.
1719 *
1720 *  return 0 on success, positive on failure
1721 **********************************************************************/
1722
1723static int
1724em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1725{
1726	struct adapter		*adapter = txr->adapter;
1727	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1728	bus_dmamap_t		map;
1729	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1730	struct e1000_tx_desc	*ctxd = NULL;
1731	struct mbuf		*m_head;
1732	u32			txd_upper, txd_lower, txd_used, txd_saved;
1733	int			nsegs, i, j, first, last = 0;
1734	int			error, do_tso, tso_desc = 0;
1735
1736	m_head = *m_headp;
1737	txd_upper = txd_lower = txd_used = txd_saved = 0;
1738	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1739
1740	/*
1741	** When doing checksum offload, it is critical to
1742	** make sure the first mbuf has more than header,
1743	** because that routine expects data to be present.
1744	*/
1745	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1746	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1747		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1748		*m_headp = m_head;
1749		if (m_head == NULL)
1750			return (ENOBUFS);
1751	}
1752
1753	/*
1754	 * TSO workaround:
1755	 *  If an mbuf is only header we need
1756	 *     to pull 4 bytes of data into it.
1757	 */
1758	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1759		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1760		*m_headp = m_head;
1761		if (m_head == NULL)
1762			return (ENOBUFS);
1763	}
1764
1765	/*
1766	 * Map the packet for DMA
1767	 *
1768	 * Capture the first descriptor index,
1769	 * this descriptor will have the index
1770	 * of the EOP which is the only one that
1771	 * now gets a DONE bit writeback.
1772	 */
1773	first = txr->next_avail_desc;
1774	tx_buffer = &txr->tx_buffers[first];
1775	tx_buffer_mapped = tx_buffer;
1776	map = tx_buffer->map;
1777
1778	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1779	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1780
1781	/*
1782	 * There are two types of errors we can (try) to handle:
1783	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1784	 *   out of segments.  Defragment the mbuf chain and try again.
1785	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1786	 *   at this point in time.  Defer sending and try again later.
1787	 * All other errors, in particular EINVAL, are fatal and prevent the
1788	 * mbuf chain from ever going through.  Drop it and report error.
1789	 */
1790	if (error == EFBIG) {
1791		struct mbuf *m;
1792
1793		m = m_defrag(*m_headp, M_DONTWAIT);
1794		if (m == NULL) {
1795			adapter->mbuf_alloc_failed++;
1796			m_freem(*m_headp);
1797			*m_headp = NULL;
1798			return (ENOBUFS);
1799		}
1800		*m_headp = m;
1801
1802		/* Try it again */
1803		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1804		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1805
1806		if (error) {
1807			adapter->no_tx_dma_setup++;
1808			m_freem(*m_headp);
1809			*m_headp = NULL;
1810			return (error);
1811		}
1812	} else if (error != 0) {
1813		adapter->no_tx_dma_setup++;
1814		return (error);
1815	}
1816
1817	/*
1818	 * TSO Hardware workaround, if this packet is not
1819	 * TSO, and is only a single descriptor long, and
1820	 * it follows a TSO burst, then we need to add a
1821	 * sentinel descriptor to prevent premature writeback.
1822	 */
1823	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1824		if (nsegs == 1)
1825			tso_desc = TRUE;
1826		txr->tx_tso = FALSE;
1827	}
1828
1829        if (nsegs > (txr->tx_avail - 2)) {
1830                txr->no_desc_avail++;
1831		bus_dmamap_unload(txr->txtag, map);
1832		return (ENOBUFS);
1833        }
1834	m_head = *m_headp;
1835
1836	/* Do hardware assists */
1837#if __FreeBSD_version >= 700000
1838	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1839		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1840		if (error != TRUE)
1841			return (ENXIO); /* something foobar */
1842		/* we need to make a final sentinel transmit desc */
1843		tso_desc = TRUE;
1844	} else
1845#endif
1846	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1847		em_transmit_checksum_setup(txr,  m_head,
1848		    &txd_upper, &txd_lower);
1849
1850	i = txr->next_avail_desc;
1851
1852	/* Set up our transmit descriptors */
1853	for (j = 0; j < nsegs; j++) {
1854		bus_size_t seg_len;
1855		bus_addr_t seg_addr;
1856
1857		tx_buffer = &txr->tx_buffers[i];
1858		ctxd = &txr->tx_base[i];
1859		seg_addr = segs[j].ds_addr;
1860		seg_len  = segs[j].ds_len;
1861		/*
1862		** TSO Workaround:
1863		** If this is the last descriptor, we want to
1864		** split it so we have a small final sentinel
1865		*/
1866		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1867			seg_len -= 4;
1868			ctxd->buffer_addr = htole64(seg_addr);
1869			ctxd->lower.data = htole32(
1870			adapter->txd_cmd | txd_lower | seg_len);
1871			ctxd->upper.data =
1872			    htole32(txd_upper);
1873			if (++i == adapter->num_tx_desc)
1874				i = 0;
1875			/* Now make the sentinel */
1876			++txd_used; /* using an extra txd */
1877			ctxd = &txr->tx_base[i];
1878			tx_buffer = &txr->tx_buffers[i];
1879			ctxd->buffer_addr =
1880			    htole64(seg_addr + seg_len);
1881			ctxd->lower.data = htole32(
1882			adapter->txd_cmd | txd_lower | 4);
1883			ctxd->upper.data =
1884			    htole32(txd_upper);
1885			last = i;
1886			if (++i == adapter->num_tx_desc)
1887				i = 0;
1888		} else {
1889			ctxd->buffer_addr = htole64(seg_addr);
1890			ctxd->lower.data = htole32(
1891			adapter->txd_cmd | txd_lower | seg_len);
1892			ctxd->upper.data =
1893			    htole32(txd_upper);
1894			last = i;
1895			if (++i == adapter->num_tx_desc)
1896				i = 0;
1897		}
1898		tx_buffer->m_head = NULL;
1899		tx_buffer->next_eop = -1;
1900	}
1901
1902	txr->next_avail_desc = i;
1903	txr->tx_avail -= nsegs;
1904	if (tso_desc) /* TSO used an extra for sentinel */
1905		txr->tx_avail -= txd_used;
1906
1907	if (m_head->m_flags & M_VLANTAG) {
1908		/* Set the vlan id. */
1909		ctxd->upper.fields.special =
1910		    htole16(m_head->m_pkthdr.ether_vtag);
1911                /* Tell hardware to add tag */
1912                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1913        }
1914
1915        tx_buffer->m_head = m_head;
1916	tx_buffer_mapped->map = tx_buffer->map;
1917	tx_buffer->map = map;
1918        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1919
1920        /*
1921         * Last Descriptor of Packet
1922	 * needs End Of Packet (EOP)
1923	 * and Report Status (RS)
1924         */
1925        ctxd->lower.data |=
1926	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1927	/*
1928	 * Keep track in the first buffer which
1929	 * descriptor will be written back
1930	 */
1931	tx_buffer = &txr->tx_buffers[first];
1932	tx_buffer->next_eop = last;
1933
1934	/*
1935	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1936	 * that this frame is available to transmit.
1937	 */
1938	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1939	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1940	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1941
1942	return (0);
1943}
1944
1945static void
1946em_set_promisc(struct adapter *adapter)
1947{
1948	struct ifnet	*ifp = adapter->ifp;
1949	u32		reg_rctl;
1950
1951	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1952
1953	if (ifp->if_flags & IFF_PROMISC) {
1954		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1955		/* Turn this on if you want to see bad packets */
1956		if (em_debug_sbp)
1957			reg_rctl |= E1000_RCTL_SBP;
1958		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1959	} else if (ifp->if_flags & IFF_ALLMULTI) {
1960		reg_rctl |= E1000_RCTL_MPE;
1961		reg_rctl &= ~E1000_RCTL_UPE;
1962		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1963	}
1964}
1965
1966static void
1967em_disable_promisc(struct adapter *adapter)
1968{
1969	u32	reg_rctl;
1970
1971	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1972
1973	reg_rctl &=  (~E1000_RCTL_UPE);
1974	reg_rctl &=  (~E1000_RCTL_MPE);
1975	reg_rctl &=  (~E1000_RCTL_SBP);
1976	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1977}
1978
1979
1980/*********************************************************************
1981 *  Multicast Update
1982 *
1983 *  This routine is called whenever multicast address list is updated.
1984 *
1985 **********************************************************************/
1986
1987static void
1988em_set_multi(struct adapter *adapter)
1989{
1990	struct ifnet	*ifp = adapter->ifp;
1991	struct ifmultiaddr *ifma;
1992	u32 reg_rctl = 0;
1993	u8  *mta; /* Multicast array memory */
1994	int mcnt = 0;
1995
1996	IOCTL_DEBUGOUT("em_set_multi: begin");
1997
1998	if (adapter->hw.mac.type == e1000_82542 &&
1999	    adapter->hw.revision_id == E1000_REVISION_2) {
2000		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2001		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2002			e1000_pci_clear_mwi(&adapter->hw);
2003		reg_rctl |= E1000_RCTL_RST;
2004		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2005		msec_delay(5);
2006	}
2007
2008	/* Allocate temporary memory to setup array */
2009	mta = malloc(sizeof(u8) *
2010	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2011	    M_DEVBUF, M_NOWAIT | M_ZERO);
2012	if (mta == NULL)
2013		panic("em_set_multi memory failure\n");
2014
2015#if __FreeBSD_version < 800000
2016	IF_ADDR_LOCK(ifp);
2017#else
2018	if_maddr_rlock(ifp);
2019#endif
2020	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2021		if (ifma->ifma_addr->sa_family != AF_LINK)
2022			continue;
2023
2024		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025			break;
2026
2027		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2028		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2029		mcnt++;
2030	}
2031#if __FreeBSD_version < 800000
2032	IF_ADDR_UNLOCK(ifp);
2033#else
2034	if_maddr_runlock(ifp);
2035#endif
2036	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2037		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2038		reg_rctl |= E1000_RCTL_MPE;
2039		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2040	} else
2041		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2042
2043	if (adapter->hw.mac.type == e1000_82542 &&
2044	    adapter->hw.revision_id == E1000_REVISION_2) {
2045		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2046		reg_rctl &= ~E1000_RCTL_RST;
2047		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2048		msec_delay(5);
2049		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2050			e1000_pci_set_mwi(&adapter->hw);
2051	}
2052	free(mta, M_DEVBUF);
2053}
2054
2055
2056/*********************************************************************
2057 *  Timer routine
2058 *
2059 *  This routine checks for link status and updates statistics.
2060 *
2061 **********************************************************************/
2062
2063static void
2064em_local_timer(void *arg)
2065{
2066	struct adapter	*adapter = arg;
2067	struct ifnet	*ifp = adapter->ifp;
2068	struct tx_ring	*txr = adapter->tx_rings;
2069
2070	EM_CORE_LOCK_ASSERT(adapter);
2071
2072	em_update_link_status(adapter);
2073	em_update_stats_counters(adapter);
2074
2075	/* Reset LAA into RAR[0] on 82571 */
2076	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2077		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2078
2079	/*
2080	** Check for time since any descriptor was cleaned
2081	*/
2082	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2083		EM_TX_LOCK(txr);
2084		if (txr->watchdog_check == FALSE) {
2085			EM_TX_UNLOCK(txr);
2086			continue;
2087		}
2088		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2089			goto hung;
2090		EM_TX_UNLOCK(txr);
2091	}
2092
2093	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2094	return;
2095hung:
2096	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2097	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2098	adapter->watchdog_events++;
2099	EM_TX_UNLOCK(txr);
2100	em_init_locked(adapter);
2101}
2102
2103
2104static void
2105em_update_link_status(struct adapter *adapter)
2106{
2107	struct e1000_hw *hw = &adapter->hw;
2108	struct ifnet *ifp = adapter->ifp;
2109	device_t dev = adapter->dev;
2110	u32 link_check = 0;
2111
2112	/* Get the cached link value or read phy for real */
2113	switch (hw->phy.media_type) {
2114	case e1000_media_type_copper:
2115		if (hw->mac.get_link_status) {
2116			/* Do the work to read phy */
2117			e1000_check_for_link(hw);
2118			link_check = !hw->mac.get_link_status;
2119			if (link_check) /* ESB2 fix */
2120				e1000_cfg_on_link_up(hw);
2121		} else
2122			link_check = TRUE;
2123		break;
2124	case e1000_media_type_fiber:
2125		e1000_check_for_link(hw);
2126		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2127                                 E1000_STATUS_LU);
2128		break;
2129	case e1000_media_type_internal_serdes:
2130		e1000_check_for_link(hw);
2131		link_check = adapter->hw.mac.serdes_has_link;
2132		break;
2133	default:
2134	case e1000_media_type_unknown:
2135		break;
2136	}
2137
2138	/* Now check for a transition */
2139	if (link_check && (adapter->link_active == 0)) {
2140		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2141		    &adapter->link_duplex);
2142		/* Check if we must disable SPEED_MODE bit on PCI-E */
2143		if ((adapter->link_speed != SPEED_1000) &&
2144		    ((hw->mac.type == e1000_82571) ||
2145		    (hw->mac.type == e1000_82572))) {
2146			int tarc0;
2147			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2148			tarc0 &= ~SPEED_MODE_BIT;
2149			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2150		}
2151		if (bootverbose)
2152			device_printf(dev, "Link is up %d Mbps %s\n",
2153			    adapter->link_speed,
2154			    ((adapter->link_duplex == FULL_DUPLEX) ?
2155			    "Full Duplex" : "Half Duplex"));
2156		adapter->link_active = 1;
2157		adapter->smartspeed = 0;
2158		ifp->if_baudrate = adapter->link_speed * 1000000;
2159		if_link_state_change(ifp, LINK_STATE_UP);
2160	} else if (!link_check && (adapter->link_active == 1)) {
2161		ifp->if_baudrate = adapter->link_speed = 0;
2162		adapter->link_duplex = 0;
2163		if (bootverbose)
2164			device_printf(dev, "Link is Down\n");
2165		adapter->link_active = 0;
2166		/* Link down, disable watchdog */
2167		// JFV change later
2168		//adapter->watchdog_check = FALSE;
2169		if_link_state_change(ifp, LINK_STATE_DOWN);
2170	}
2171}
2172
2173/*********************************************************************
2174 *
2175 *  This routine disables all traffic on the adapter by issuing a
2176 *  global reset on the MAC and deallocates TX/RX buffers.
2177 *
2178 *  This routine should always be called with BOTH the CORE
2179 *  and TX locks.
2180 **********************************************************************/
2181
2182static void
2183em_stop(void *arg)
2184{
2185	struct adapter	*adapter = arg;
2186	struct ifnet	*ifp = adapter->ifp;
2187	struct tx_ring	*txr = adapter->tx_rings;
2188
2189	EM_CORE_LOCK_ASSERT(adapter);
2190
2191	INIT_DEBUGOUT("em_stop: begin");
2192
2193	em_disable_intr(adapter);
2194	callout_stop(&adapter->timer);
2195
2196	/* Tell the stack that the interface is no longer active */
2197	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2198
2199        /* Unarm watchdog timer. */
2200	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2201		EM_TX_LOCK(txr);
2202		txr->watchdog_check = FALSE;
2203		EM_TX_UNLOCK(txr);
2204	}
2205
2206	e1000_reset_hw(&adapter->hw);
2207	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2208
2209	e1000_led_off(&adapter->hw);
2210	e1000_cleanup_led(&adapter->hw);
2211}
2212
2213
2214/*********************************************************************
2215 *
2216 *  Determine hardware revision.
2217 *
2218 **********************************************************************/
2219static void
2220em_identify_hardware(struct adapter *adapter)
2221{
2222	device_t dev = adapter->dev;
2223
2224	/* Make sure our PCI config space has the necessary stuff set */
2225	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2226	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2227	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2228		device_printf(dev, "Memory Access and/or Bus Master bits "
2229		    "were not set!\n");
2230		adapter->hw.bus.pci_cmd_word |=
2231		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2232		pci_write_config(dev, PCIR_COMMAND,
2233		    adapter->hw.bus.pci_cmd_word, 2);
2234	}
2235
2236	/* Save off the information about this board */
2237	adapter->hw.vendor_id = pci_get_vendor(dev);
2238	adapter->hw.device_id = pci_get_device(dev);
2239	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2240	adapter->hw.subsystem_vendor_id =
2241	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2242	adapter->hw.subsystem_device_id =
2243	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2244
2245	/* Do Shared Code Init and Setup */
2246	if (e1000_set_mac_type(&adapter->hw)) {
2247		device_printf(dev, "Setup init failure\n");
2248		return;
2249	}
2250}
2251
2252static int
2253em_allocate_pci_resources(struct adapter *adapter)
2254{
2255	device_t	dev = adapter->dev;
2256	int		rid;
2257
2258	rid = PCIR_BAR(0);
2259	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2260	    &rid, RF_ACTIVE);
2261	if (adapter->memory == NULL) {
2262		device_printf(dev, "Unable to allocate bus resource: memory\n");
2263		return (ENXIO);
2264	}
2265	adapter->osdep.mem_bus_space_tag =
2266	    rman_get_bustag(adapter->memory);
2267	adapter->osdep.mem_bus_space_handle =
2268	    rman_get_bushandle(adapter->memory);
2269	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2270
2271	/* Default to a single queue */
2272	adapter->num_queues = 1;
2273
2274	/*
2275	 * Setup MSI/X or MSI if PCI Express
2276	 */
2277	adapter->msix = em_setup_msix(adapter);
2278
2279	adapter->hw.back = &adapter->osdep;
2280
2281	return (0);
2282}
2283
2284/*********************************************************************
2285 *
2286 *  Setup the Legacy or MSI Interrupt handler
2287 *
2288 **********************************************************************/
2289int
2290em_allocate_legacy(struct adapter *adapter)
2291{
2292	device_t dev = adapter->dev;
2293	int error, rid = 0;
2294
2295	/* Manually turn off all interrupts */
2296	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2297
2298	if (adapter->msix == 1) /* using MSI */
2299		rid = 1;
2300	/* We allocate a single interrupt resource */
2301	adapter->res = bus_alloc_resource_any(dev,
2302	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2303	if (adapter->res == NULL) {
2304		device_printf(dev, "Unable to allocate bus resource: "
2305		    "interrupt\n");
2306		return (ENXIO);
2307	}
2308
2309	/*
2310	 * Allocate a fast interrupt and the associated
2311	 * deferred processing contexts.
2312	 */
2313	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2314	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2315	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2316	    taskqueue_thread_enqueue, &adapter->tq);
2317	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2318	    device_get_nameunit(adapter->dev));
2319	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2320	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2321		device_printf(dev, "Failed to register fast interrupt "
2322			    "handler: %d\n", error);
2323		taskqueue_free(adapter->tq);
2324		adapter->tq = NULL;
2325		return (error);
2326	}
2327
2328	return (0);
2329}
2330
2331/*********************************************************************
2332 *
2333 *  Setup the MSIX Interrupt handlers
2334 *   This is not really Multiqueue, rather
2335 *   its just multiple interrupt vectors.
2336 *
2337 **********************************************************************/
2338int
2339em_allocate_msix(struct adapter *adapter)
2340{
2341	device_t	dev = adapter->dev;
2342	struct		tx_ring *txr = adapter->tx_rings;
2343	struct		rx_ring *rxr = adapter->rx_rings;
2344	int		error, rid, vector = 0;
2345
2346
2347	/* Make sure all interrupts are disabled */
2348	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2349
2350	/* First set up ring resources */
2351	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2352
2353		/* RX ring */
2354		rid = vector + 1;
2355
2356		rxr->res = bus_alloc_resource_any(dev,
2357		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2358		if (rxr->res == NULL) {
2359			device_printf(dev,
2360			    "Unable to allocate bus resource: "
2361			    "RX MSIX Interrupt %d\n", i);
2362			return (ENXIO);
2363		}
2364		if ((error = bus_setup_intr(dev, rxr->res,
2365		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2366		    rxr, &rxr->tag)) != 0) {
2367			device_printf(dev, "Failed to register RX handler");
2368			return (error);
2369		}
2370		rxr->msix = vector++; /* NOTE increment vector for TX */
2371		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2372		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2373		    taskqueue_thread_enqueue, &rxr->tq);
2374		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2375		    device_get_nameunit(adapter->dev));
2376		/*
2377		** Set the bit to enable interrupt
2378		** in E1000_IMS -- bits 20 and 21
2379		** are for RX0 and RX1, note this has
2380		** NOTHING to do with the MSIX vector
2381		*/
2382		rxr->ims = 1 << (20 + i);
2383		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2384
2385		/* TX ring */
2386		rid = vector + 1;
2387		txr->res = bus_alloc_resource_any(dev,
2388		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2389		if (txr->res == NULL) {
2390			device_printf(dev,
2391			    "Unable to allocate bus resource: "
2392			    "TX MSIX Interrupt %d\n", i);
2393			return (ENXIO);
2394		}
2395		if ((error = bus_setup_intr(dev, txr->res,
2396		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2397		    txr, &txr->tag)) != 0) {
2398			device_printf(dev, "Failed to register TX handler");
2399			return (error);
2400		}
2401		txr->msix = vector++; /* Increment vector for next pass */
2402		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2403		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2404		    taskqueue_thread_enqueue, &txr->tq);
2405		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2406		    device_get_nameunit(adapter->dev));
2407		/*
2408		** Set the bit to enable interrupt
2409		** in E1000_IMS -- bits 22 and 23
2410		** are for TX0 and TX1, note this has
2411		** NOTHING to do with the MSIX vector
2412		*/
2413		txr->ims = 1 << (22 + i);
2414		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2415	}
2416
2417	/* Link interrupt */
2418	++rid;
2419	adapter->res = bus_alloc_resource_any(dev,
2420	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2421	if (!adapter->res) {
2422		device_printf(dev,"Unable to allocate "
2423		    "bus resource: Link interrupt [%d]\n", rid);
2424		return (ENXIO);
2425        }
2426	/* Set the link handler function */
2427	error = bus_setup_intr(dev, adapter->res,
2428	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2429	    em_msix_link, adapter, &adapter->tag);
2430	if (error) {
2431		adapter->res = NULL;
2432		device_printf(dev, "Failed to register LINK handler");
2433		return (error);
2434	}
2435	adapter->linkvec = vector;
2436	adapter->ivars |=  (8 | vector) << 16;
2437	adapter->ivars |= 0x80000000;
2438
2439	return (0);
2440}
2441
2442
2443static void
2444em_free_pci_resources(struct adapter *adapter)
2445{
2446	device_t	dev = adapter->dev;
2447	struct tx_ring	*txr;
2448	struct rx_ring	*rxr;
2449	int		rid;
2450
2451
2452	/*
2453	** Release all the queue interrupt resources:
2454	*/
2455	for (int i = 0; i < adapter->num_queues; i++) {
2456		txr = &adapter->tx_rings[i];
2457		rxr = &adapter->rx_rings[i];
2458		rid = txr->msix +1;
2459		if (txr->tag != NULL) {
2460			bus_teardown_intr(dev, txr->res, txr->tag);
2461			txr->tag = NULL;
2462		}
2463		if (txr->res != NULL)
2464			bus_release_resource(dev, SYS_RES_IRQ,
2465			    rid, txr->res);
2466		rid = rxr->msix +1;
2467		if (rxr->tag != NULL) {
2468			bus_teardown_intr(dev, rxr->res, rxr->tag);
2469			rxr->tag = NULL;
2470		}
2471		if (rxr->res != NULL)
2472			bus_release_resource(dev, SYS_RES_IRQ,
2473			    rid, rxr->res);
2474	}
2475
2476        if (adapter->linkvec) /* we are doing MSIX */
2477                rid = adapter->linkvec + 1;
2478        else
2479                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2480
2481	if (adapter->tag != NULL) {
2482		bus_teardown_intr(dev, adapter->res, adapter->tag);
2483		adapter->tag = NULL;
2484	}
2485
2486	if (adapter->res != NULL)
2487		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2488
2489
2490	if (adapter->msix)
2491		pci_release_msi(dev);
2492
2493	if (adapter->msix_mem != NULL)
2494		bus_release_resource(dev, SYS_RES_MEMORY,
2495		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2496
2497	if (adapter->memory != NULL)
2498		bus_release_resource(dev, SYS_RES_MEMORY,
2499		    PCIR_BAR(0), adapter->memory);
2500
2501	if (adapter->flash != NULL)
2502		bus_release_resource(dev, SYS_RES_MEMORY,
2503		    EM_FLASH, adapter->flash);
2504}
2505
2506/*
2507 * Setup MSI or MSI/X
2508 */
2509static int
2510em_setup_msix(struct adapter *adapter)
2511{
2512	device_t dev = adapter->dev;
2513	int val = 0;
2514
2515
2516	/* Setup MSI/X for Hartwell */
2517	if ((adapter->hw.mac.type == e1000_82574) &&
2518	    (em_enable_msix == TRUE)) {
2519		/* Map the MSIX BAR */
2520		int rid = PCIR_BAR(EM_MSIX_BAR);
2521		adapter->msix_mem = bus_alloc_resource_any(dev,
2522		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2523       		if (!adapter->msix_mem) {
2524			/* May not be enabled */
2525               		device_printf(adapter->dev,
2526			    "Unable to map MSIX table \n");
2527			goto msi;
2528       		}
2529		val = pci_msix_count(dev);
2530		if (val != 5) {
2531			bus_release_resource(dev, SYS_RES_MEMORY,
2532			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2533			adapter->msix_mem = NULL;
2534               		device_printf(adapter->dev,
2535			    "MSIX vectors wrong, using MSI \n");
2536			goto msi;
2537		}
2538		if (em_msix_queues == 2) {
2539			val = 5;
2540			adapter->num_queues = 2;
2541		} else {
2542			val = 3;
2543			adapter->num_queues = 1;
2544		}
2545		if (pci_alloc_msix(dev, &val) == 0) {
2546			device_printf(adapter->dev,
2547			    "Using MSIX interrupts "
2548			    "with %d vectors\n", val);
2549		}
2550
2551		return (val);
2552	}
2553msi:
2554       	val = pci_msi_count(dev);
2555       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2556               	adapter->msix = 1;
2557               	device_printf(adapter->dev,"Using MSI interrupt\n");
2558		return (val);
2559	}
2560	/* Should only happen due to manual invention */
2561	device_printf(adapter->dev,"Setup MSIX failure\n");
2562	return (0);
2563}
2564
2565
2566/*********************************************************************
2567 *
2568 *  Initialize the hardware to a configuration
2569 *  as specified by the adapter structure.
2570 *
2571 **********************************************************************/
2572static void
2573em_reset(struct adapter *adapter)
2574{
2575	device_t	dev = adapter->dev;
2576	struct e1000_hw	*hw = &adapter->hw;
2577	u16		rx_buffer_size;
2578
2579	INIT_DEBUGOUT("em_reset: begin");
2580
2581	/* Set up smart power down as default off on newer adapters. */
2582	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2583	    hw->mac.type == e1000_82572)) {
2584		u16 phy_tmp = 0;
2585
2586		/* Speed up time to link by disabling smart power down. */
2587		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2588		phy_tmp &= ~IGP02E1000_PM_SPD;
2589		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2590	}
2591
2592	/*
2593	 * These parameters control the automatic generation (Tx) and
2594	 * response (Rx) to Ethernet PAUSE frames.
2595	 * - High water mark should allow for at least two frames to be
2596	 *   received after sending an XOFF.
2597	 * - Low water mark works best when it is very near the high water mark.
2598	 *   This allows the receiver to restart by sending XON when it has
2599	 *   drained a bit. Here we use an arbitary value of 1500 which will
2600	 *   restart after one full frame is pulled from the buffer. There
2601	 *   could be several smaller frames in the buffer and if so they will
2602	 *   not trigger the XON until their total number reduces the buffer
2603	 *   by 1500.
2604	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2605	 */
2606	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2607
2608	hw->fc.high_water = rx_buffer_size -
2609	    roundup2(adapter->max_frame_size, 1024);
2610	hw->fc.low_water = hw->fc.high_water - 1500;
2611
2612	if (hw->mac.type == e1000_80003es2lan)
2613		hw->fc.pause_time = 0xFFFF;
2614	else
2615		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2616
2617	hw->fc.send_xon = TRUE;
2618
2619        /* Set Flow control, use the tunable location if sane */
2620        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2621		hw->fc.requested_mode = em_fc_setting;
2622	else
2623		hw->fc.requested_mode = e1000_fc_none;
2624
2625	/* Override - workaround for PCHLAN issue */
2626	if (hw->mac.type == e1000_pchlan)
2627                hw->fc.requested_mode = e1000_fc_rx_pause;
2628
2629	/* Issue a global reset */
2630	e1000_reset_hw(hw);
2631	E1000_WRITE_REG(hw, E1000_WUC, 0);
2632
2633	if (e1000_init_hw(hw) < 0) {
2634		device_printf(dev, "Hardware Initialization Failed\n");
2635		return;
2636	}
2637
2638	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2639	e1000_get_phy_info(hw);
2640	e1000_check_for_link(hw);
2641	return;
2642}
2643
2644/*********************************************************************
2645 *
2646 *  Setup networking device structure and register an interface.
2647 *
2648 **********************************************************************/
2649static void
2650em_setup_interface(device_t dev, struct adapter *adapter)
2651{
2652	struct ifnet   *ifp;
2653
2654	INIT_DEBUGOUT("em_setup_interface: begin");
2655
2656	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2657	if (ifp == NULL)
2658		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2659	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2660	ifp->if_mtu = ETHERMTU;
2661	ifp->if_init =  em_init;
2662	ifp->if_softc = adapter;
2663	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2664	ifp->if_ioctl = em_ioctl;
2665	ifp->if_start = em_start;
2666	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2667	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2668	IFQ_SET_READY(&ifp->if_snd);
2669
2670	ether_ifattach(ifp, adapter->hw.mac.addr);
2671
2672	ifp->if_capabilities = ifp->if_capenable = 0;
2673
2674#ifdef EM_MULTIQUEUE
2675	/* Multiqueue tx functions */
2676	ifp->if_transmit = em_mq_start;
2677	ifp->if_qflush = em_qflush;
2678#endif
2679
2680	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2681	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2682
2683	/* Enable TSO by default, can disable with ifconfig */
2684	ifp->if_capabilities |= IFCAP_TSO4;
2685	ifp->if_capenable |= IFCAP_TSO4;
2686
2687	/*
2688	 * Tell the upper layer(s) we
2689	 * support full VLAN capability
2690	 */
2691	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2692	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2693	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2694
2695	/*
2696	** Dont turn this on by default, if vlans are
2697	** created on another pseudo device (eg. lagg)
2698	** then vlan events are not passed thru, breaking
2699	** operation, but with HW FILTER off it works. If
2700	** using vlans directly on the em driver you can
2701	** enable this and get full hardware tag filtering.
2702	*/
2703	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2704
2705#ifdef DEVICE_POLLING
2706	ifp->if_capabilities |= IFCAP_POLLING;
2707#endif
2708
2709	/* Enable only WOL MAGIC by default */
2710	if (adapter->wol) {
2711		ifp->if_capabilities |= IFCAP_WOL;
2712		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2713	}
2714
2715	/*
2716	 * Specify the media types supported by this adapter and register
2717	 * callbacks to update media and link information
2718	 */
2719	ifmedia_init(&adapter->media, IFM_IMASK,
2720	    em_media_change, em_media_status);
2721	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2722	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2723		u_char fiber_type = IFM_1000_SX;	/* default type */
2724
2725		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2726			    0, NULL);
2727		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2728	} else {
2729		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2730		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2731			    0, NULL);
2732		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2733			    0, NULL);
2734		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2735			    0, NULL);
2736		if (adapter->hw.phy.type != e1000_phy_ife) {
2737			ifmedia_add(&adapter->media,
2738				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2739			ifmedia_add(&adapter->media,
2740				IFM_ETHER | IFM_1000_T, 0, NULL);
2741		}
2742	}
2743	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2744	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2745}
2746
2747
2748/*
2749 * Manage DMA'able memory.
2750 */
2751static void
2752em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2753{
2754	if (error)
2755		return;
2756	*(bus_addr_t *) arg = segs[0].ds_addr;
2757}
2758
2759static int
2760em_dma_malloc(struct adapter *adapter, bus_size_t size,
2761        struct em_dma_alloc *dma, int mapflags)
2762{
2763	int error;
2764
2765	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2766				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2767				BUS_SPACE_MAXADDR,	/* lowaddr */
2768				BUS_SPACE_MAXADDR,	/* highaddr */
2769				NULL, NULL,		/* filter, filterarg */
2770				size,			/* maxsize */
2771				1,			/* nsegments */
2772				size,			/* maxsegsize */
2773				0,			/* flags */
2774				NULL,			/* lockfunc */
2775				NULL,			/* lockarg */
2776				&dma->dma_tag);
2777	if (error) {
2778		device_printf(adapter->dev,
2779		    "%s: bus_dma_tag_create failed: %d\n",
2780		    __func__, error);
2781		goto fail_0;
2782	}
2783
2784	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2785	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2786	if (error) {
2787		device_printf(adapter->dev,
2788		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2789		    __func__, (uintmax_t)size, error);
2790		goto fail_2;
2791	}
2792
2793	dma->dma_paddr = 0;
2794	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2795	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2796	if (error || dma->dma_paddr == 0) {
2797		device_printf(adapter->dev,
2798		    "%s: bus_dmamap_load failed: %d\n",
2799		    __func__, error);
2800		goto fail_3;
2801	}
2802
2803	return (0);
2804
2805fail_3:
2806	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2807fail_2:
2808	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2809	bus_dma_tag_destroy(dma->dma_tag);
2810fail_0:
2811	dma->dma_map = NULL;
2812	dma->dma_tag = NULL;
2813
2814	return (error);
2815}
2816
2817static void
2818em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2819{
2820	if (dma->dma_tag == NULL)
2821		return;
2822	if (dma->dma_map != NULL) {
2823		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2824		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2825		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2826		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2827		dma->dma_map = NULL;
2828	}
2829	bus_dma_tag_destroy(dma->dma_tag);
2830	dma->dma_tag = NULL;
2831}
2832
2833
2834/*********************************************************************
2835 *
2836 *  Allocate memory for the transmit and receive rings, and then
2837 *  the descriptors associated with each, called only once at attach.
2838 *
2839 **********************************************************************/
2840static int
2841em_allocate_queues(struct adapter *adapter)
2842{
2843	device_t		dev = adapter->dev;
2844	struct tx_ring		*txr = NULL;
2845	struct rx_ring		*rxr = NULL;
2846	int rsize, tsize, error = E1000_SUCCESS;
2847	int txconf = 0, rxconf = 0;
2848
2849
2850	/* Allocate the TX ring struct memory */
2851	if (!(adapter->tx_rings =
2852	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2853	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2854		device_printf(dev, "Unable to allocate TX ring memory\n");
2855		error = ENOMEM;
2856		goto fail;
2857	}
2858
2859	/* Now allocate the RX */
2860	if (!(adapter->rx_rings =
2861	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2862	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2863		device_printf(dev, "Unable to allocate RX ring memory\n");
2864		error = ENOMEM;
2865		goto rx_fail;
2866	}
2867
2868	tsize = roundup2(adapter->num_tx_desc *
2869	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2870	/*
2871	 * Now set up the TX queues, txconf is needed to handle the
2872	 * possibility that things fail midcourse and we need to
2873	 * undo memory gracefully
2874	 */
2875	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2876		/* Set up some basics */
2877		txr = &adapter->tx_rings[i];
2878		txr->adapter = adapter;
2879		txr->me = i;
2880
2881		/* Initialize the TX lock */
2882		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2883		    device_get_nameunit(dev), txr->me);
2884		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2885
2886		if (em_dma_malloc(adapter, tsize,
2887			&txr->txdma, BUS_DMA_NOWAIT)) {
2888			device_printf(dev,
2889			    "Unable to allocate TX Descriptor memory\n");
2890			error = ENOMEM;
2891			goto err_tx_desc;
2892		}
2893		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2894		bzero((void *)txr->tx_base, tsize);
2895
2896        	if (em_allocate_transmit_buffers(txr)) {
2897			device_printf(dev,
2898			    "Critical Failure setting up transmit buffers\n");
2899			error = ENOMEM;
2900			goto err_tx_desc;
2901        	}
2902#if __FreeBSD_version >= 800000
2903		/* Allocate a buf ring */
2904		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2905		    M_WAITOK, &txr->tx_mtx);
2906#endif
2907	}
2908
2909	/*
2910	 * Next the RX queues...
2911	 */
2912	rsize = roundup2(adapter->num_rx_desc *
2913	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2914	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2915		rxr = &adapter->rx_rings[i];
2916		rxr->adapter = adapter;
2917		rxr->me = i;
2918
2919		/* Initialize the RX lock */
2920		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2921		    device_get_nameunit(dev), txr->me);
2922		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2923
2924		if (em_dma_malloc(adapter, rsize,
2925			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2926			device_printf(dev,
2927			    "Unable to allocate RxDescriptor memory\n");
2928			error = ENOMEM;
2929			goto err_rx_desc;
2930		}
2931		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2932		bzero((void *)rxr->rx_base, rsize);
2933
2934        	/* Allocate receive buffers for the ring*/
2935		if (em_allocate_receive_buffers(rxr)) {
2936			device_printf(dev,
2937			    "Critical Failure setting up receive buffers\n");
2938			error = ENOMEM;
2939			goto err_rx_desc;
2940		}
2941	}
2942
2943	return (0);
2944
2945err_rx_desc:
2946	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2947		em_dma_free(adapter, &rxr->rxdma);
2948err_tx_desc:
2949	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2950		em_dma_free(adapter, &txr->txdma);
2951	free(adapter->rx_rings, M_DEVBUF);
2952rx_fail:
2953#if __FreeBSD_version >= 800000
2954	buf_ring_free(txr->br, M_DEVBUF);
2955#endif
2956	free(adapter->tx_rings, M_DEVBUF);
2957fail:
2958	return (error);
2959}
2960
2961
2962/*********************************************************************
2963 *
2964 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2965 *  the information needed to transmit a packet on the wire. This is
2966 *  called only once at attach, setup is done every reset.
2967 *
2968 **********************************************************************/
2969static int
2970em_allocate_transmit_buffers(struct tx_ring *txr)
2971{
2972	struct adapter *adapter = txr->adapter;
2973	device_t dev = adapter->dev;
2974	struct em_buffer *txbuf;
2975	int error, i;
2976
2977	/*
2978	 * Setup DMA descriptor areas.
2979	 */
2980	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2981			       1, 0,			/* alignment, bounds */
2982			       BUS_SPACE_MAXADDR,	/* lowaddr */
2983			       BUS_SPACE_MAXADDR,	/* highaddr */
2984			       NULL, NULL,		/* filter, filterarg */
2985			       EM_TSO_SIZE,		/* maxsize */
2986			       EM_MAX_SCATTER,		/* nsegments */
2987			       PAGE_SIZE,		/* maxsegsize */
2988			       0,			/* flags */
2989			       NULL,			/* lockfunc */
2990			       NULL,			/* lockfuncarg */
2991			       &txr->txtag))) {
2992		device_printf(dev,"Unable to allocate TX DMA tag\n");
2993		goto fail;
2994	}
2995
2996	if (!(txr->tx_buffers =
2997	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2998	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2999		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3000		error = ENOMEM;
3001		goto fail;
3002	}
3003
3004        /* Create the descriptor buffer dma maps */
3005	txbuf = txr->tx_buffers;
3006	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3007		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3008		if (error != 0) {
3009			device_printf(dev, "Unable to create TX DMA map\n");
3010			goto fail;
3011		}
3012	}
3013
3014	return 0;
3015fail:
3016	/* We free all, it handles case where we are in the middle */
3017	em_free_transmit_structures(adapter);
3018	return (error);
3019}
3020
3021/*********************************************************************
3022 *
3023 *  Initialize a transmit ring.
3024 *
3025 **********************************************************************/
3026static void
3027em_setup_transmit_ring(struct tx_ring *txr)
3028{
3029	struct adapter *adapter = txr->adapter;
3030	struct em_buffer *txbuf;
3031	int i;
3032
3033	/* Clear the old descriptor contents */
3034	EM_TX_LOCK(txr);
3035	bzero((void *)txr->tx_base,
3036	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3037	/* Reset indices */
3038	txr->next_avail_desc = 0;
3039	txr->next_to_clean = 0;
3040
3041	/* Free any existing tx buffers. */
3042        txbuf = txr->tx_buffers;
3043	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3044		if (txbuf->m_head != NULL) {
3045			bus_dmamap_sync(txr->txtag, txbuf->map,
3046			    BUS_DMASYNC_POSTWRITE);
3047			bus_dmamap_unload(txr->txtag, txbuf->map);
3048			m_freem(txbuf->m_head);
3049			txbuf->m_head = NULL;
3050		}
3051		/* clear the watch index */
3052		txbuf->next_eop = -1;
3053        }
3054
3055	/* Set number of descriptors available */
3056	txr->tx_avail = adapter->num_tx_desc;
3057
3058	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3059	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3060	EM_TX_UNLOCK(txr);
3061}
3062
3063/*********************************************************************
3064 *
3065 *  Initialize all transmit rings.
3066 *
3067 **********************************************************************/
3068static void
3069em_setup_transmit_structures(struct adapter *adapter)
3070{
3071	struct tx_ring *txr = adapter->tx_rings;
3072
3073	for (int i = 0; i < adapter->num_queues; i++, txr++)
3074		em_setup_transmit_ring(txr);
3075
3076	return;
3077}
3078
3079/*********************************************************************
3080 *
3081 *  Enable transmit unit.
3082 *
3083 **********************************************************************/
3084static void
3085em_initialize_transmit_unit(struct adapter *adapter)
3086{
3087	struct tx_ring	*txr = adapter->tx_rings;
3088	struct e1000_hw	*hw = &adapter->hw;
3089	u32	tctl, tarc, tipg = 0;
3090
3091	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3092
3093	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3094		u64 bus_addr = txr->txdma.dma_paddr;
3095		/* Base and Len of TX Ring */
3096		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3097	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3098		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3099	    	    (u32)(bus_addr >> 32));
3100		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3101	    	    (u32)bus_addr);
3102		/* Init the HEAD/TAIL indices */
3103		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3104		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3105
3106		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3107		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3108		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3109
3110		txr->watchdog_check = FALSE;
3111	}
3112
3113	/* Set the default values for the Tx Inter Packet Gap timer */
3114	switch (adapter->hw.mac.type) {
3115	case e1000_82542:
3116		tipg = DEFAULT_82542_TIPG_IPGT;
3117		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3118		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3119		break;
3120	case e1000_80003es2lan:
3121		tipg = DEFAULT_82543_TIPG_IPGR1;
3122		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3123		    E1000_TIPG_IPGR2_SHIFT;
3124		break;
3125	default:
3126		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3127		    (adapter->hw.phy.media_type ==
3128		    e1000_media_type_internal_serdes))
3129			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3130		else
3131			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3132		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3133		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3134	}
3135
3136	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3137	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3138
3139	if(adapter->hw.mac.type >= e1000_82540)
3140		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3141		    adapter->tx_abs_int_delay.value);
3142
3143	if ((adapter->hw.mac.type == e1000_82571) ||
3144	    (adapter->hw.mac.type == e1000_82572)) {
3145		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3146		tarc |= SPEED_MODE_BIT;
3147		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3148	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3149		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3150		tarc |= 1;
3151		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3152		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3153		tarc |= 1;
3154		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3155	}
3156
3157	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3158	if (adapter->tx_int_delay.value > 0)
3159		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3160
3161	/* Program the Transmit Control Register */
3162	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3163	tctl &= ~E1000_TCTL_CT;
3164	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3165		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3166
3167	if (adapter->hw.mac.type >= e1000_82571)
3168		tctl |= E1000_TCTL_MULR;
3169
3170	/* This write will effectively turn on the transmit unit. */
3171	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3172
3173}
3174
3175
3176/*********************************************************************
3177 *
3178 *  Free all transmit rings.
3179 *
3180 **********************************************************************/
3181static void
3182em_free_transmit_structures(struct adapter *adapter)
3183{
3184	struct tx_ring *txr = adapter->tx_rings;
3185
3186	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3187		EM_TX_LOCK(txr);
3188		em_free_transmit_buffers(txr);
3189		em_dma_free(adapter, &txr->txdma);
3190		EM_TX_UNLOCK(txr);
3191		EM_TX_LOCK_DESTROY(txr);
3192	}
3193
3194	free(adapter->tx_rings, M_DEVBUF);
3195}
3196
3197/*********************************************************************
3198 *
3199 *  Free transmit ring related data structures.
3200 *
3201 **********************************************************************/
3202static void
3203em_free_transmit_buffers(struct tx_ring *txr)
3204{
3205	struct adapter		*adapter = txr->adapter;
3206	struct em_buffer	*txbuf;
3207
3208	INIT_DEBUGOUT("free_transmit_ring: begin");
3209
3210	if (txr->tx_buffers == NULL)
3211		return;
3212
3213	for (int i = 0; i < adapter->num_tx_desc; i++) {
3214		txbuf = &txr->tx_buffers[i];
3215		if (txbuf->m_head != NULL) {
3216			bus_dmamap_sync(txr->txtag, txbuf->map,
3217			    BUS_DMASYNC_POSTWRITE);
3218			bus_dmamap_unload(txr->txtag,
3219			    txbuf->map);
3220			m_freem(txbuf->m_head);
3221			txbuf->m_head = NULL;
3222			if (txbuf->map != NULL) {
3223				bus_dmamap_destroy(txr->txtag,
3224				    txbuf->map);
3225				txbuf->map = NULL;
3226			}
3227		} else if (txbuf->map != NULL) {
3228			bus_dmamap_unload(txr->txtag,
3229			    txbuf->map);
3230			bus_dmamap_destroy(txr->txtag,
3231			    txbuf->map);
3232			txbuf->map = NULL;
3233		}
3234	}
3235#if __FreeBSD_version >= 800000
3236	if (txr->br != NULL)
3237		buf_ring_free(txr->br, M_DEVBUF);
3238#endif
3239	if (txr->tx_buffers != NULL) {
3240		free(txr->tx_buffers, M_DEVBUF);
3241		txr->tx_buffers = NULL;
3242	}
3243	if (txr->txtag != NULL) {
3244		bus_dma_tag_destroy(txr->txtag);
3245		txr->txtag = NULL;
3246	}
3247	return;
3248}
3249
3250
3251/*********************************************************************
3252 *
3253 *  The offload context needs to be set when we transfer the first
3254 *  packet of a particular protocol (TCP/UDP). This routine has been
3255 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3256 *
3257 *  Added back the old method of keeping the current context type
3258 *  and not setting if unnecessary, as this is reported to be a
3259 *  big performance win.  -jfv
3260 **********************************************************************/
3261static void
3262em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3263    u32 *txd_upper, u32 *txd_lower)
3264{
3265	struct adapter			*adapter = txr->adapter;
3266	struct e1000_context_desc	*TXD = NULL;
3267	struct em_buffer *tx_buffer;
3268	struct ether_vlan_header *eh;
3269	struct ip *ip = NULL;
3270	struct ip6_hdr *ip6;
3271	int cur, ehdrlen;
3272	u32 cmd, hdr_len, ip_hlen;
3273	u16 etype;
3274	u8 ipproto;
3275
3276
3277	cmd = hdr_len = ipproto = 0;
3278	*txd_upper = *txd_lower = 0;
3279	cur = txr->next_avail_desc;
3280
3281	/*
3282	 * Determine where frame payload starts.
3283	 * Jump over vlan headers if already present,
3284	 * helpful for QinQ too.
3285	 */
3286	eh = mtod(mp, struct ether_vlan_header *);
3287	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3288		etype = ntohs(eh->evl_proto);
3289		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3290	} else {
3291		etype = ntohs(eh->evl_encap_proto);
3292		ehdrlen = ETHER_HDR_LEN;
3293	}
3294
3295	/*
3296	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3297	 * TODO: Support SCTP too when it hits the tree.
3298	 */
3299	switch (etype) {
3300	case ETHERTYPE_IP:
3301		ip = (struct ip *)(mp->m_data + ehdrlen);
3302		ip_hlen = ip->ip_hl << 2;
3303
3304		/* Setup of IP header checksum. */
3305		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3306			/*
3307			 * Start offset for header checksum calculation.
3308			 * End offset for header checksum calculation.
3309			 * Offset of place to put the checksum.
3310			 */
3311			TXD = (struct e1000_context_desc *)
3312			    &txr->tx_base[cur];
3313			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3314			TXD->lower_setup.ip_fields.ipcse =
3315			    htole16(ehdrlen + ip_hlen);
3316			TXD->lower_setup.ip_fields.ipcso =
3317			    ehdrlen + offsetof(struct ip, ip_sum);
3318			cmd |= E1000_TXD_CMD_IP;
3319			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3320		}
3321
3322		hdr_len = ehdrlen + ip_hlen;
3323		ipproto = ip->ip_p;
3324		break;
3325
3326	case ETHERTYPE_IPV6:
3327		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3328		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3329
3330		/* IPv6 doesn't have a header checksum. */
3331
3332		hdr_len = ehdrlen + ip_hlen;
3333		ipproto = ip6->ip6_nxt;
3334		break;
3335
3336	default:
3337		return;
3338	}
3339
3340	switch (ipproto) {
3341	case IPPROTO_TCP:
3342		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3343			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3344			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3345			/* no need for context if already set */
3346			if (txr->last_hw_offload == CSUM_TCP)
3347				return;
3348			txr->last_hw_offload = CSUM_TCP;
3349			/*
3350			 * Start offset for payload checksum calculation.
3351			 * End offset for payload checksum calculation.
3352			 * Offset of place to put the checksum.
3353			 */
3354			TXD = (struct e1000_context_desc *)
3355			    &txr->tx_base[cur];
3356			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3357			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3358			TXD->upper_setup.tcp_fields.tucso =
3359			    hdr_len + offsetof(struct tcphdr, th_sum);
3360			cmd |= E1000_TXD_CMD_TCP;
3361		}
3362		break;
3363	case IPPROTO_UDP:
3364	{
3365		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3366			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3367			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3368			/* no need for context if already set */
3369			if (txr->last_hw_offload == CSUM_UDP)
3370				return;
3371			txr->last_hw_offload = CSUM_UDP;
3372			/*
3373			 * Start offset for header checksum calculation.
3374			 * End offset for header checksum calculation.
3375			 * Offset of place to put the checksum.
3376			 */
3377			TXD = (struct e1000_context_desc *)
3378			    &txr->tx_base[cur];
3379			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3380			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3381			TXD->upper_setup.tcp_fields.tucso =
3382			    hdr_len + offsetof(struct udphdr, uh_sum);
3383		}
3384		/* Fall Thru */
3385	}
3386	default:
3387		break;
3388	}
3389
3390	if (TXD == NULL)
3391		return;
3392	TXD->tcp_seg_setup.data = htole32(0);
3393	TXD->cmd_and_length =
3394	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3395	tx_buffer = &txr->tx_buffers[cur];
3396	tx_buffer->m_head = NULL;
3397	tx_buffer->next_eop = -1;
3398
3399	if (++cur == adapter->num_tx_desc)
3400		cur = 0;
3401
3402	txr->tx_avail--;
3403	txr->next_avail_desc = cur;
3404}
3405
3406
3407/**********************************************************************
3408 *
3409 *  Setup work for hardware segmentation offload (TSO)
3410 *
3411 **********************************************************************/
3412static bool
3413em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3414   u32 *txd_lower)
3415{
3416	struct adapter			*adapter = txr->adapter;
3417	struct e1000_context_desc	*TXD;
3418	struct em_buffer		*tx_buffer;
3419	struct ether_vlan_header	*eh;
3420	struct ip			*ip;
3421	struct ip6_hdr			*ip6;
3422	struct tcphdr			*th;
3423	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3424	u16 etype;
3425
3426	/*
3427	 * This function could/should be extended to support IP/IPv6
3428	 * fragmentation as well.  But as they say, one step at a time.
3429	 */
3430
3431	/*
3432	 * Determine where frame payload starts.
3433	 * Jump over vlan headers if already present,
3434	 * helpful for QinQ too.
3435	 */
3436	eh = mtod(mp, struct ether_vlan_header *);
3437	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3438		etype = ntohs(eh->evl_proto);
3439		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3440	} else {
3441		etype = ntohs(eh->evl_encap_proto);
3442		ehdrlen = ETHER_HDR_LEN;
3443	}
3444
3445	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3446	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3447		return FALSE;	/* -1 */
3448
3449	/*
3450	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3451	 * TODO: Support SCTP too when it hits the tree.
3452	 */
3453	switch (etype) {
3454	case ETHERTYPE_IP:
3455		isip6 = 0;
3456		ip = (struct ip *)(mp->m_data + ehdrlen);
3457		if (ip->ip_p != IPPROTO_TCP)
3458			return FALSE;	/* 0 */
3459		ip->ip_len = 0;
3460		ip->ip_sum = 0;
3461		ip_hlen = ip->ip_hl << 2;
3462		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3463			return FALSE;	/* -1 */
3464		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3465#if 1
3466		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3467		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3468#else
3469		th->th_sum = mp->m_pkthdr.csum_data;
3470#endif
3471		break;
3472	case ETHERTYPE_IPV6:
3473		isip6 = 1;
3474		return FALSE;			/* Not supported yet. */
3475		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3476		if (ip6->ip6_nxt != IPPROTO_TCP)
3477			return FALSE;	/* 0 */
3478		ip6->ip6_plen = 0;
3479		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3480		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3481			return FALSE;	/* -1 */
3482		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3483#if 0
3484		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3485		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3486#else
3487		th->th_sum = mp->m_pkthdr.csum_data;
3488#endif
3489		break;
3490	default:
3491		return FALSE;
3492	}
3493	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3494
3495	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3496		      E1000_TXD_DTYP_D |	/* Data descr type */
3497		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3498
3499	/* IP and/or TCP header checksum calculation and insertion. */
3500	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3501		      E1000_TXD_POPTS_TXSM) << 8;
3502
3503	cur = txr->next_avail_desc;
3504	tx_buffer = &txr->tx_buffers[cur];
3505	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3506
3507	/* IPv6 doesn't have a header checksum. */
3508	if (!isip6) {
3509		/*
3510		 * Start offset for header checksum calculation.
3511		 * End offset for header checksum calculation.
3512		 * Offset of place put the checksum.
3513		 */
3514		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3515		TXD->lower_setup.ip_fields.ipcse =
3516		    htole16(ehdrlen + ip_hlen - 1);
3517		TXD->lower_setup.ip_fields.ipcso =
3518		    ehdrlen + offsetof(struct ip, ip_sum);
3519	}
3520	/*
3521	 * Start offset for payload checksum calculation.
3522	 * End offset for payload checksum calculation.
3523	 * Offset of place to put the checksum.
3524	 */
3525	TXD->upper_setup.tcp_fields.tucss =
3526	    ehdrlen + ip_hlen;
3527	TXD->upper_setup.tcp_fields.tucse = 0;
3528	TXD->upper_setup.tcp_fields.tucso =
3529	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3530	/*
3531	 * Payload size per packet w/o any headers.
3532	 * Length of all headers up to payload.
3533	 */
3534	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3535	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3536
3537	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3538				E1000_TXD_CMD_DEXT |	/* Extended descr */
3539				E1000_TXD_CMD_TSE |	/* TSE context */
3540				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3541				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3542				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3543
3544	tx_buffer->m_head = NULL;
3545	tx_buffer->next_eop = -1;
3546
3547	if (++cur == adapter->num_tx_desc)
3548		cur = 0;
3549
3550	txr->tx_avail--;
3551	txr->next_avail_desc = cur;
3552	txr->tx_tso = TRUE;
3553
3554	return TRUE;
3555}
3556
3557
3558/**********************************************************************
3559 *
3560 *  Examine each tx_buffer in the used queue. If the hardware is done
3561 *  processing the packet then free associated resources. The
3562 *  tx_buffer is put back on the free queue.
3563 *
3564 **********************************************************************/
3565static bool
3566em_txeof(struct tx_ring *txr)
3567{
3568	struct adapter	*adapter = txr->adapter;
3569        int first, last, done, num_avail;
3570        struct em_buffer *tx_buffer;
3571        struct e1000_tx_desc   *tx_desc, *eop_desc;
3572	struct ifnet   *ifp = adapter->ifp;
3573
3574	EM_TX_LOCK_ASSERT(txr);
3575
3576        if (txr->tx_avail == adapter->num_tx_desc)
3577                return (FALSE);
3578
3579        num_avail = txr->tx_avail;
3580        first = txr->next_to_clean;
3581        tx_desc = &txr->tx_base[first];
3582        tx_buffer = &txr->tx_buffers[first];
3583	last = tx_buffer->next_eop;
3584        eop_desc = &txr->tx_base[last];
3585
3586	/*
3587	 * What this does is get the index of the
3588	 * first descriptor AFTER the EOP of the
3589	 * first packet, that way we can do the
3590	 * simple comparison on the inner while loop.
3591	 */
3592	if (++last == adapter->num_tx_desc)
3593 		last = 0;
3594	done = last;
3595
3596        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3597            BUS_DMASYNC_POSTREAD);
3598
3599        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3600		/* We clean the range of the packet */
3601		while (first != done) {
3602                	tx_desc->upper.data = 0;
3603                	tx_desc->lower.data = 0;
3604                	tx_desc->buffer_addr = 0;
3605                	++num_avail;
3606
3607			if (tx_buffer->m_head) {
3608				ifp->if_opackets++;
3609				bus_dmamap_sync(txr->txtag,
3610				    tx_buffer->map,
3611				    BUS_DMASYNC_POSTWRITE);
3612				bus_dmamap_unload(txr->txtag,
3613				    tx_buffer->map);
3614
3615                        	m_freem(tx_buffer->m_head);
3616                        	tx_buffer->m_head = NULL;
3617                	}
3618			tx_buffer->next_eop = -1;
3619			txr->watchdog_time = ticks;
3620
3621	                if (++first == adapter->num_tx_desc)
3622				first = 0;
3623
3624	                tx_buffer = &txr->tx_buffers[first];
3625			tx_desc = &txr->tx_base[first];
3626		}
3627		/* See if we can continue to the next packet */
3628		last = tx_buffer->next_eop;
3629		if (last != -1) {
3630        		eop_desc = &txr->tx_base[last];
3631			/* Get new done point */
3632			if (++last == adapter->num_tx_desc) last = 0;
3633			done = last;
3634		} else
3635			break;
3636        }
3637        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3638            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3639
3640        txr->next_to_clean = first;
3641
3642        /*
3643         * If we have enough room, clear IFF_DRV_OACTIVE to
3644         * tell the stack that it is OK to send packets.
3645         * If there are no pending descriptors, clear the watchdog.
3646         */
3647        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3648                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3649                if (num_avail == adapter->num_tx_desc) {
3650			txr->watchdog_check = FALSE;
3651        		txr->tx_avail = num_avail;
3652			return (FALSE);
3653		}
3654        }
3655
3656        txr->tx_avail = num_avail;
3657	return (TRUE);
3658}
3659
3660
3661/*********************************************************************
3662 *
3663 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3664 *
3665 **********************************************************************/
3666static void
3667em_refresh_mbufs(struct rx_ring *rxr, int limit)
3668{
3669	struct adapter		*adapter = rxr->adapter;
3670	struct mbuf		*m;
3671	bus_dma_segment_t	segs[1];
3672	bus_dmamap_t		map;
3673	struct em_buffer	*rxbuf;
3674	int			i, error, nsegs, cleaned;
3675
3676	i = rxr->next_to_refresh;
3677	cleaned = -1;
3678	while (i != limit) {
3679		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3680		if (m == NULL)
3681			goto update;
3682		m->m_len = m->m_pkthdr.len = MCLBYTES;
3683
3684		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3685			m_adj(m, ETHER_ALIGN);
3686
3687		/*
3688		 * Using memory from the mbuf cluster pool, invoke the
3689		 * bus_dma machinery to arrange the memory mapping.
3690		 */
3691		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3692		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3693		if (error != 0) {
3694			m_free(m);
3695			goto update;
3696		}
3697
3698		/* If nsegs is wrong then the stack is corrupt. */
3699		KASSERT(nsegs == 1, ("Too many segments returned!"));
3700
3701		rxbuf = &rxr->rx_buffers[i];
3702		if (rxbuf->m_head != NULL)
3703			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3704
3705		map = rxbuf->map;
3706		rxbuf->map = rxr->rx_sparemap;
3707		rxr->rx_sparemap = map;
3708		bus_dmamap_sync(rxr->rxtag,
3709		    rxbuf->map, BUS_DMASYNC_PREREAD);
3710		rxbuf->m_head = m;
3711		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3712
3713		cleaned = i;
3714		/* Calculate next index */
3715		if (++i == adapter->num_rx_desc)
3716			i = 0;
3717		/* This is the work marker for refresh */
3718		rxr->next_to_refresh = i;
3719	}
3720update:
3721	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3722	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723	if (cleaned != -1) /* Update tail index */
3724		E1000_WRITE_REG(&adapter->hw,
3725		    E1000_RDT(rxr->me), cleaned);
3726
3727	return;
3728}
3729
3730
3731/*********************************************************************
3732 *
3733 *  Allocate memory for rx_buffer structures. Since we use one
3734 *  rx_buffer per received packet, the maximum number of rx_buffer's
3735 *  that we'll need is equal to the number of receive descriptors
3736 *  that we've allocated.
3737 *
3738 **********************************************************************/
3739static int
3740em_allocate_receive_buffers(struct rx_ring *rxr)
3741{
3742	struct adapter		*adapter = rxr->adapter;
3743	device_t		dev = adapter->dev;
3744	struct em_buffer	*rxbuf;
3745	int			error;
3746
3747	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3748	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3749	if (rxr->rx_buffers == NULL) {
3750		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3751		return (ENOMEM);
3752	}
3753
3754	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3755				1, 0,			/* alignment, bounds */
3756				BUS_SPACE_MAXADDR,	/* lowaddr */
3757				BUS_SPACE_MAXADDR,	/* highaddr */
3758				NULL, NULL,		/* filter, filterarg */
3759				MCLBYTES,		/* maxsize */
3760				1,			/* nsegments */
3761				MCLBYTES,		/* maxsegsize */
3762				0,			/* flags */
3763				NULL,			/* lockfunc */
3764				NULL,			/* lockarg */
3765				&rxr->rxtag);
3766	if (error) {
3767		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3768		    __func__, error);
3769		goto fail;
3770	}
3771
3772	/* Create the spare map (used by getbuf) */
3773	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3774	     &rxr->rx_sparemap);
3775	if (error) {
3776		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3777		    __func__, error);
3778		goto fail;
3779	}
3780
3781	rxbuf = rxr->rx_buffers;
3782	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3783		rxbuf = &rxr->rx_buffers[i];
3784		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3785		    &rxbuf->map);
3786		if (error) {
3787			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3788			    __func__, error);
3789			goto fail;
3790		}
3791	}
3792
3793	return (0);
3794
3795fail:
3796	em_free_receive_structures(adapter);
3797	return (error);
3798}
3799
3800
3801/*********************************************************************
3802 *
3803 *  Initialize a receive ring and its buffers.
3804 *
3805 **********************************************************************/
3806static int
3807em_setup_receive_ring(struct rx_ring *rxr)
3808{
3809	struct	adapter 	*adapter = rxr->adapter;
3810	struct em_buffer	*rxbuf;
3811	bus_dma_segment_t	seg[1];
3812	int			rsize, nsegs, error;
3813
3814
3815	/* Clear the ring contents */
3816	EM_RX_LOCK(rxr);
3817	rsize = roundup2(adapter->num_rx_desc *
3818	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3819	bzero((void *)rxr->rx_base, rsize);
3820
3821	/*
3822	** Free current RX buffer structs and their mbufs
3823	*/
3824	for (int i = 0; i < adapter->num_rx_desc; i++) {
3825		rxbuf = &rxr->rx_buffers[i];
3826		if (rxbuf->m_head != NULL) {
3827			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3828			    BUS_DMASYNC_POSTREAD);
3829			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3830			m_freem(rxbuf->m_head);
3831		}
3832	}
3833
3834	/* Now replenish the mbufs */
3835	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3836
3837		rxbuf = &rxr->rx_buffers[j];
3838		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3839		if (rxbuf->m_head == NULL)
3840			panic("RX ring hdr initialization failed!\n");
3841		rxbuf->m_head->m_len = MCLBYTES;
3842		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3843		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3844
3845		/* Get the memory mapping */
3846		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3847		    rxbuf->map, rxbuf->m_head, seg,
3848		    &nsegs, BUS_DMA_NOWAIT);
3849		if (error != 0)
3850			panic("RX ring dma initialization failed!\n");
3851		bus_dmamap_sync(rxr->rxtag,
3852		    rxbuf->map, BUS_DMASYNC_PREREAD);
3853
3854		/* Update descriptor */
3855		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3856	}
3857
3858
3859	/* Setup our descriptor indices */
3860	rxr->next_to_check = 0;
3861	rxr->next_to_refresh = 0;
3862
3863	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3864	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3865
3866	EM_RX_UNLOCK(rxr);
3867	return (0);
3868}
3869
3870/*********************************************************************
3871 *
3872 *  Initialize all receive rings.
3873 *
3874 **********************************************************************/
3875static int
3876em_setup_receive_structures(struct adapter *adapter)
3877{
3878	struct rx_ring *rxr = adapter->rx_rings;
3879	int j;
3880
3881	for (j = 0; j < adapter->num_queues; j++, rxr++)
3882		if (em_setup_receive_ring(rxr))
3883			goto fail;
3884
3885	return (0);
3886fail:
3887	/*
3888	 * Free RX buffers allocated so far, we will only handle
3889	 * the rings that completed, the failing case will have
3890	 * cleaned up for itself. 'j' failed, so its the terminus.
3891	 */
3892	for (int i = 0; i < j; ++i) {
3893		rxr = &adapter->rx_rings[i];
3894		for (int n = 0; n < adapter->num_rx_desc; n++) {
3895			struct em_buffer *rxbuf;
3896			rxbuf = &rxr->rx_buffers[n];
3897			if (rxbuf->m_head != NULL) {
3898				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3899			  	  BUS_DMASYNC_POSTREAD);
3900				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3901				m_freem(rxbuf->m_head);
3902				rxbuf->m_head = NULL;
3903			}
3904		}
3905	}
3906
3907	return (ENOBUFS);
3908}
3909
3910/*********************************************************************
3911 *
3912 *  Free all receive rings.
3913 *
3914 **********************************************************************/
3915static void
3916em_free_receive_structures(struct adapter *adapter)
3917{
3918	struct rx_ring *rxr = adapter->rx_rings;
3919
3920	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3921		em_free_receive_buffers(rxr);
3922		/* Free the ring memory as well */
3923		em_dma_free(adapter, &rxr->rxdma);
3924		EM_RX_LOCK_DESTROY(rxr);
3925	}
3926
3927	free(adapter->rx_rings, M_DEVBUF);
3928}
3929
3930
3931/*********************************************************************
3932 *
3933 *  Free receive ring data structures
3934 *
3935 **********************************************************************/
3936static void
3937em_free_receive_buffers(struct rx_ring *rxr)
3938{
3939	struct adapter		*adapter = rxr->adapter;
3940	struct em_buffer	*rxbuf = NULL;
3941
3942	INIT_DEBUGOUT("free_receive_buffers: begin");
3943
3944	if (rxr->rx_sparemap) {
3945		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3946		rxr->rx_sparemap = NULL;
3947	}
3948
3949	if (rxr->rx_buffers != NULL) {
3950		for (int i = 0; i < adapter->num_rx_desc; i++) {
3951			rxbuf = &rxr->rx_buffers[i];
3952			if (rxbuf->map != NULL) {
3953				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3954				    BUS_DMASYNC_POSTREAD);
3955				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3956				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3957			}
3958			if (rxbuf->m_head != NULL) {
3959				m_freem(rxbuf->m_head);
3960				rxbuf->m_head = NULL;
3961			}
3962		}
3963		free(rxr->rx_buffers, M_DEVBUF);
3964		rxr->rx_buffers = NULL;
3965	}
3966
3967	if (rxr->rxtag != NULL) {
3968		bus_dma_tag_destroy(rxr->rxtag);
3969		rxr->rxtag = NULL;
3970	}
3971
3972	return;
3973}
3974
3975
3976/*********************************************************************
3977 *
3978 *  Enable receive unit.
3979 *
3980 **********************************************************************/
3981#define MAX_INTS_PER_SEC	8000
3982#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3983
3984static void
3985em_initialize_receive_unit(struct adapter *adapter)
3986{
3987	struct rx_ring	*rxr = adapter->rx_rings;
3988	struct ifnet	*ifp = adapter->ifp;
3989	struct e1000_hw	*hw = &adapter->hw;
3990	u64	bus_addr;
3991	u32	rctl, rxcsum;
3992
3993	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3994
3995	/*
3996	 * Make sure receives are disabled while setting
3997	 * up the descriptor ring
3998	 */
3999	rctl = E1000_READ_REG(hw, E1000_RCTL);
4000	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4001
4002	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4003	    adapter->rx_abs_int_delay.value);
4004	/*
4005	 * Set the interrupt throttling rate. Value is calculated
4006	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4007	 */
4008	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4009
4010	/*
4011	** When using MSIX interrupts we need to throttle
4012	** using the EITR register (82574 only)
4013	*/
4014	if (hw->mac.type == e1000_82574)
4015		for (int i = 0; i < 4; i++)
4016			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4017			    DEFAULT_ITR);
4018
4019	/* Disable accelerated ackknowledge */
4020	if (adapter->hw.mac.type == e1000_82574)
4021		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4022
4023	if (ifp->if_capenable & IFCAP_RXCSUM) {
4024		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4025		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4026		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4027	}
4028
4029	/*
4030	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4031	** long latencies are observed, like Lenovo X60. This
4032	** change eliminates the problem, but since having positive
4033	** values in RDTR is a known source of problems on other
4034	** platforms another solution is being sought.
4035	*/
4036	if (hw->mac.type == e1000_82573)
4037		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4038
4039	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4040		/* Setup the Base and Length of the Rx Descriptor Ring */
4041		bus_addr = rxr->rxdma.dma_paddr;
4042		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4043		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4044		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4045		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4046		/* Setup the Head and Tail Descriptor Pointers */
4047		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4048		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4049	}
4050
4051	/* Setup the Receive Control Register */
4052	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4053	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4054	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4055	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4056
4057        /* Strip the CRC */
4058        rctl |= E1000_RCTL_SECRC;
4059
4060        /* Make sure VLAN Filters are off */
4061        rctl &= ~E1000_RCTL_VFE;
4062	rctl &= ~E1000_RCTL_SBP;
4063	rctl |= E1000_RCTL_SZ_2048;
4064	if (ifp->if_mtu > ETHERMTU)
4065		rctl |= E1000_RCTL_LPE;
4066	else
4067		rctl &= ~E1000_RCTL_LPE;
4068
4069	/* Write out the settings */
4070	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4071
4072	return;
4073}
4074
4075
4076/*********************************************************************
4077 *
4078 *  This routine executes in interrupt context. It replenishes
4079 *  the mbufs in the descriptor and sends data which has been
4080 *  dma'ed into host memory to upper layer.
4081 *
4082 *  We loop at most count times if count is > 0, or until done if
4083 *  count < 0.
4084 *
4085 *  For polling we also now return the number of cleaned packets
4086 *********************************************************************/
4087static bool
4088em_rxeof(struct rx_ring *rxr, int count, int *done)
4089{
4090	struct adapter		*adapter = rxr->adapter;
4091	struct ifnet		*ifp = adapter->ifp;
4092	struct mbuf		*mp, *sendmp;
4093	u8			status = 0;
4094	u16 			len;
4095	int			i, processed, rxdone = 0;
4096	bool			eop;
4097	struct e1000_rx_desc	*cur;
4098
4099	EM_RX_LOCK(rxr);
4100
4101	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4102
4103		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4104			break;
4105
4106		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4107		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4108
4109		cur = &rxr->rx_base[i];
4110		status = cur->status;
4111		mp = sendmp = NULL;
4112
4113		if ((status & E1000_RXD_STAT_DD) == 0)
4114			break;
4115
4116		len = le16toh(cur->length);
4117		eop = (status & E1000_RXD_STAT_EOP) != 0;
4118		count--;
4119
4120		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4121
4122			/* Assign correct length to the current fragment */
4123			mp = rxr->rx_buffers[i].m_head;
4124			mp->m_len = len;
4125
4126			if (rxr->fmp == NULL) {
4127				mp->m_pkthdr.len = len;
4128				rxr->fmp = mp; /* Store the first mbuf */
4129				rxr->lmp = mp;
4130			} else {
4131				/* Chain mbuf's together */
4132				mp->m_flags &= ~M_PKTHDR;
4133				rxr->lmp->m_next = mp;
4134				rxr->lmp = rxr->lmp->m_next;
4135				rxr->fmp->m_pkthdr.len += len;
4136			}
4137
4138			if (eop) {
4139				rxr->fmp->m_pkthdr.rcvif = ifp;
4140				ifp->if_ipackets++;
4141				em_receive_checksum(cur, rxr->fmp);
4142#ifndef __NO_STRICT_ALIGNMENT
4143				if (adapter->max_frame_size >
4144				    (MCLBYTES - ETHER_ALIGN) &&
4145				    em_fixup_rx(rxr) != 0)
4146					goto skip;
4147#endif
4148				if (status & E1000_RXD_STAT_VP) {
4149					rxr->fmp->m_pkthdr.ether_vtag =
4150					    (le16toh(cur->special) &
4151					    E1000_RXD_SPC_VLAN_MASK);
4152					rxr->fmp->m_flags |= M_VLANTAG;
4153				}
4154#ifdef EM_MULTIQUEUE
4155				rxr->fmp->m_pkthdr.flowid = curcpu;
4156				rxr->fmp->m_flags |= M_FLOWID;
4157#endif
4158#ifndef __NO_STRICT_ALIGNMENT
4159skip:
4160#endif
4161				sendmp = rxr->fmp;
4162				rxr->fmp = NULL;
4163				rxr->lmp = NULL;
4164			}
4165		} else {
4166			ifp->if_ierrors++;
4167			/* Reuse loaded DMA map and just update mbuf chain */
4168			mp = rxr->rx_buffers[i].m_head;
4169			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4170			mp->m_data = mp->m_ext.ext_buf;
4171			mp->m_next = NULL;
4172			if (adapter->max_frame_size <=
4173			    (MCLBYTES - ETHER_ALIGN))
4174				m_adj(mp, ETHER_ALIGN);
4175			if (rxr->fmp != NULL) {
4176				m_freem(rxr->fmp);
4177				rxr->fmp = NULL;
4178				rxr->lmp = NULL;
4179			}
4180			sendmp = NULL;
4181		}
4182
4183		/* Zero out the receive descriptors status. */
4184		cur->status = 0;
4185		++rxdone;	/* cumulative for POLL */
4186		++processed;
4187
4188		/* Advance our pointers to the next descriptor. */
4189		if (++i == adapter->num_rx_desc)
4190			i = 0;
4191
4192		/* Send to the stack */
4193		if (sendmp != NULL) {
4194			rxr->next_to_check = i;
4195			EM_RX_UNLOCK(rxr);
4196			(*ifp->if_input)(ifp, sendmp);
4197			EM_RX_LOCK(rxr);
4198			i = rxr->next_to_check;
4199		}
4200
4201		/* Only refresh mbufs every 8 descriptors */
4202		if (processed == 8) {
4203			em_refresh_mbufs(rxr, i);
4204			processed = 0;
4205		}
4206	}
4207
4208	/* Catch any remaining refresh work */
4209	if (processed != 0) {
4210		em_refresh_mbufs(rxr, i);
4211		processed = 0;
4212	}
4213
4214	rxr->next_to_check = i;
4215	if (done != NULL)
4216		*done = rxdone;
4217	EM_RX_UNLOCK(rxr);
4218
4219	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4220}
4221
4222#ifndef __NO_STRICT_ALIGNMENT
4223/*
4224 * When jumbo frames are enabled we should realign entire payload on
4225 * architecures with strict alignment. This is serious design mistake of 8254x
4226 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4227 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4228 * payload. On architecures without strict alignment restrictions 8254x still
4229 * performs unaligned memory access which would reduce the performance too.
4230 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4231 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4232 * existing mbuf chain.
4233 *
4234 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4235 * not used at all on architectures with strict alignment.
4236 */
4237static int
4238em_fixup_rx(struct rx_ring *rxr)
4239{
4240	struct adapter *adapter = rxr->adapter;
4241	struct mbuf *m, *n;
4242	int error;
4243
4244	error = 0;
4245	m = rxr->fmp;
4246	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4247		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4248		m->m_data += ETHER_HDR_LEN;
4249	} else {
4250		MGETHDR(n, M_DONTWAIT, MT_DATA);
4251		if (n != NULL) {
4252			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4253			m->m_data += ETHER_HDR_LEN;
4254			m->m_len -= ETHER_HDR_LEN;
4255			n->m_len = ETHER_HDR_LEN;
4256			M_MOVE_PKTHDR(n, m);
4257			n->m_next = m;
4258			rxr->fmp = n;
4259		} else {
4260			adapter->dropped_pkts++;
4261			m_freem(rxr->fmp);
4262			rxr->fmp = NULL;
4263			error = ENOMEM;
4264		}
4265	}
4266
4267	return (error);
4268}
4269#endif
4270
4271/*********************************************************************
4272 *
4273 *  Verify that the hardware indicated that the checksum is valid.
4274 *  Inform the stack about the status of checksum so that stack
4275 *  doesn't spend time verifying the checksum.
4276 *
4277 *********************************************************************/
4278static void
4279em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4280{
4281	/* Ignore Checksum bit is set */
4282	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4283		mp->m_pkthdr.csum_flags = 0;
4284		return;
4285	}
4286
4287	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4288		/* Did it pass? */
4289		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4290			/* IP Checksum Good */
4291			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4292			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4293
4294		} else {
4295			mp->m_pkthdr.csum_flags = 0;
4296		}
4297	}
4298
4299	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4300		/* Did it pass? */
4301		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4302			mp->m_pkthdr.csum_flags |=
4303			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4304			mp->m_pkthdr.csum_data = htons(0xffff);
4305		}
4306	}
4307}
4308
4309/*
4310 * This routine is run via an vlan
4311 * config EVENT
4312 */
4313static void
4314em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4315{
4316	struct adapter	*adapter = ifp->if_softc;
4317	u32		index, bit;
4318
4319	if (ifp->if_softc !=  arg)   /* Not our event */
4320		return;
4321
4322	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4323                return;
4324
4325	index = (vtag >> 5) & 0x7F;
4326	bit = vtag & 0x1F;
4327	em_shadow_vfta[index] |= (1 << bit);
4328	++adapter->num_vlans;
4329	/* Re-init to load the changes */
4330	em_init(adapter);
4331}
4332
4333/*
4334 * This routine is run via an vlan
4335 * unconfig EVENT
4336 */
4337static void
4338em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4339{
4340	struct adapter	*adapter = ifp->if_softc;
4341	u32		index, bit;
4342
4343	if (ifp->if_softc !=  arg)
4344		return;
4345
4346	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4347                return;
4348
4349	index = (vtag >> 5) & 0x7F;
4350	bit = vtag & 0x1F;
4351	em_shadow_vfta[index] &= ~(1 << bit);
4352	--adapter->num_vlans;
4353	/* Re-init to load the changes */
4354	em_init(adapter);
4355}
4356
4357static void
4358em_setup_vlan_hw_support(struct adapter *adapter)
4359{
4360	struct e1000_hw *hw = &adapter->hw;
4361	u32             reg;
4362
4363	/*
4364	** We get here thru init_locked, meaning
4365	** a soft reset, this has already cleared
4366	** the VFTA and other state, so if there
4367	** have been no vlan's registered do nothing.
4368	*/
4369	if (adapter->num_vlans == 0)
4370                return;
4371
4372	/*
4373	** A soft reset zero's out the VFTA, so
4374	** we need to repopulate it now.
4375	*/
4376	for (int i = 0; i < EM_VFTA_SIZE; i++)
4377                if (em_shadow_vfta[i] != 0)
4378			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4379                            i, em_shadow_vfta[i]);
4380
4381	reg = E1000_READ_REG(hw, E1000_CTRL);
4382	reg |= E1000_CTRL_VME;
4383	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4384
4385	/* Enable the Filter Table */
4386	reg = E1000_READ_REG(hw, E1000_RCTL);
4387	reg &= ~E1000_RCTL_CFIEN;
4388	reg |= E1000_RCTL_VFE;
4389	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4390
4391	/* Update the frame size */
4392	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4393	    adapter->max_frame_size + VLAN_TAG_SIZE);
4394}
4395
4396static void
4397em_enable_intr(struct adapter *adapter)
4398{
4399	struct e1000_hw *hw = &adapter->hw;
4400	u32 ims_mask = IMS_ENABLE_MASK;
4401
4402	if (hw->mac.type == e1000_82574) {
4403		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4404		ims_mask |= EM_MSIX_MASK;
4405	}
4406	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4407}
4408
4409static void
4410em_disable_intr(struct adapter *adapter)
4411{
4412	struct e1000_hw *hw = &adapter->hw;
4413
4414	if (hw->mac.type == e1000_82574)
4415		E1000_WRITE_REG(hw, EM_EIAC, 0);
4416	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4417}
4418
4419/*
4420 * Bit of a misnomer, what this really means is
4421 * to enable OS management of the system... aka
4422 * to disable special hardware management features
4423 */
4424static void
4425em_init_manageability(struct adapter *adapter)
4426{
4427	/* A shared code workaround */
4428#define E1000_82542_MANC2H E1000_MANC2H
4429	if (adapter->has_manage) {
4430		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4431		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4432
4433		/* disable hardware interception of ARP */
4434		manc &= ~(E1000_MANC_ARP_EN);
4435
4436                /* enable receiving management packets to the host */
4437		manc |= E1000_MANC_EN_MNG2HOST;
4438#define E1000_MNG2HOST_PORT_623 (1 << 5)
4439#define E1000_MNG2HOST_PORT_664 (1 << 6)
4440		manc2h |= E1000_MNG2HOST_PORT_623;
4441		manc2h |= E1000_MNG2HOST_PORT_664;
4442		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4443		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4444	}
4445}
4446
4447/*
4448 * Give control back to hardware management
4449 * controller if there is one.
4450 */
4451static void
4452em_release_manageability(struct adapter *adapter)
4453{
4454	if (adapter->has_manage) {
4455		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4456
4457		/* re-enable hardware interception of ARP */
4458		manc |= E1000_MANC_ARP_EN;
4459		manc &= ~E1000_MANC_EN_MNG2HOST;
4460
4461		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4462	}
4463}
4464
4465/*
4466 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4467 * For ASF and Pass Through versions of f/w this means
4468 * that the driver is loaded. For AMT version type f/w
4469 * this means that the network i/f is open.
4470 */
4471static void
4472em_get_hw_control(struct adapter *adapter)
4473{
4474	u32 ctrl_ext, swsm;
4475
4476	if (adapter->hw.mac.type == e1000_82573) {
4477		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4478		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4479		    swsm | E1000_SWSM_DRV_LOAD);
4480		return;
4481	}
4482	/* else */
4483	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4484	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4485	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4486	return;
4487}
4488
4489/*
4490 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4491 * For ASF and Pass Through versions of f/w this means that
4492 * the driver is no longer loaded. For AMT versions of the
4493 * f/w this means that the network i/f is closed.
4494 */
4495static void
4496em_release_hw_control(struct adapter *adapter)
4497{
4498	u32 ctrl_ext, swsm;
4499
4500	if (!adapter->has_manage)
4501		return;
4502
4503	if (adapter->hw.mac.type == e1000_82573) {
4504		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4505		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4506		    swsm & ~E1000_SWSM_DRV_LOAD);
4507		return;
4508	}
4509	/* else */
4510	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4511	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4512	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4513	return;
4514}
4515
4516static int
4517em_is_valid_ether_addr(u8 *addr)
4518{
4519	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4520
4521	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4522		return (FALSE);
4523	}
4524
4525	return (TRUE);
4526}
4527
4528/*
4529** Parse the interface capabilities with regard
4530** to both system management and wake-on-lan for
4531** later use.
4532*/
4533static void
4534em_get_wakeup(device_t dev)
4535{
4536	struct adapter	*adapter = device_get_softc(dev);
4537	u16		eeprom_data = 0, device_id, apme_mask;
4538
4539	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4540	apme_mask = EM_EEPROM_APME;
4541
4542	switch (adapter->hw.mac.type) {
4543	case e1000_82573:
4544	case e1000_82583:
4545		adapter->has_amt = TRUE;
4546		/* Falls thru */
4547	case e1000_82571:
4548	case e1000_82572:
4549	case e1000_80003es2lan:
4550		if (adapter->hw.bus.func == 1) {
4551			e1000_read_nvm(&adapter->hw,
4552			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4553			break;
4554		} else
4555			e1000_read_nvm(&adapter->hw,
4556			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4557		break;
4558	case e1000_ich8lan:
4559	case e1000_ich9lan:
4560	case e1000_ich10lan:
4561	case e1000_pchlan:
4562		apme_mask = E1000_WUC_APME;
4563		adapter->has_amt = TRUE;
4564		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4565		break;
4566	default:
4567		e1000_read_nvm(&adapter->hw,
4568		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4569		break;
4570	}
4571	if (eeprom_data & apme_mask)
4572		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4573	/*
4574         * We have the eeprom settings, now apply the special cases
4575         * where the eeprom may be wrong or the board won't support
4576         * wake on lan on a particular port
4577	 */
4578	device_id = pci_get_device(dev);
4579        switch (device_id) {
4580	case E1000_DEV_ID_82571EB_FIBER:
4581		/* Wake events only supported on port A for dual fiber
4582		 * regardless of eeprom setting */
4583		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4584		    E1000_STATUS_FUNC_1)
4585			adapter->wol = 0;
4586		break;
4587	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4588	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4589	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4590                /* if quad port adapter, disable WoL on all but port A */
4591		if (global_quad_port_a != 0)
4592			adapter->wol = 0;
4593		/* Reset for multiple quad port adapters */
4594		if (++global_quad_port_a == 4)
4595			global_quad_port_a = 0;
4596                break;
4597	}
4598	return;
4599}
4600
4601
4602/*
4603 * Enable PCI Wake On Lan capability
4604 */
4605static void
4606em_enable_wakeup(device_t dev)
4607{
4608	struct adapter	*adapter = device_get_softc(dev);
4609	struct ifnet	*ifp = adapter->ifp;
4610	u32		pmc, ctrl, ctrl_ext, rctl;
4611	u16     	status;
4612
4613	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4614		return;
4615
4616	/* Advertise the wakeup capability */
4617	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4618	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4619	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4620	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4621
4622	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4623	    (adapter->hw.mac.type == e1000_pchlan) ||
4624	    (adapter->hw.mac.type == e1000_ich9lan) ||
4625	    (adapter->hw.mac.type == e1000_ich10lan)) {
4626		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4627		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4628	}
4629
4630	/* Keep the laser running on Fiber adapters */
4631	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4632	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4633		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4635		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4636	}
4637
4638	/*
4639	** Determine type of Wakeup: note that wol
4640	** is set with all bits on by default.
4641	*/
4642	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4643		adapter->wol &= ~E1000_WUFC_MAG;
4644
4645	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4646		adapter->wol &= ~E1000_WUFC_MC;
4647	else {
4648		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4649		rctl |= E1000_RCTL_MPE;
4650		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4651	}
4652
4653	if (adapter->hw.mac.type == e1000_pchlan) {
4654		if (em_enable_phy_wakeup(adapter))
4655			return;
4656	} else {
4657		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4658		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4659	}
4660
4661	if (adapter->hw.phy.type == e1000_phy_igp_3)
4662		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4663
4664        /* Request PME */
4665        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4666	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4667	if (ifp->if_capenable & IFCAP_WOL)
4668		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4669        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4670
4671	return;
4672}
4673
4674/*
4675** WOL in the newer chipset interfaces (pchlan)
4676** require thing to be copied into the phy
4677*/
4678static int
4679em_enable_phy_wakeup(struct adapter *adapter)
4680{
4681	struct e1000_hw *hw = &adapter->hw;
4682	u32 mreg, ret = 0;
4683	u16 preg;
4684
4685	/* copy MAC RARs to PHY RARs */
4686	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4687		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4688		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4689		e1000_write_phy_reg(hw, BM_RAR_M(i),
4690		    (u16)((mreg >> 16) & 0xFFFF));
4691		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4692		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4693		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4694		    (u16)((mreg >> 16) & 0xFFFF));
4695	}
4696
4697	/* copy MAC MTA to PHY MTA */
4698	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4699		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4700		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4701		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4702		    (u16)((mreg >> 16) & 0xFFFF));
4703	}
4704
4705	/* configure PHY Rx Control register */
4706	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4707	mreg = E1000_READ_REG(hw, E1000_RCTL);
4708	if (mreg & E1000_RCTL_UPE)
4709		preg |= BM_RCTL_UPE;
4710	if (mreg & E1000_RCTL_MPE)
4711		preg |= BM_RCTL_MPE;
4712	preg &= ~(BM_RCTL_MO_MASK);
4713	if (mreg & E1000_RCTL_MO_3)
4714		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4715				<< BM_RCTL_MO_SHIFT);
4716	if (mreg & E1000_RCTL_BAM)
4717		preg |= BM_RCTL_BAM;
4718	if (mreg & E1000_RCTL_PMCF)
4719		preg |= BM_RCTL_PMCF;
4720	mreg = E1000_READ_REG(hw, E1000_CTRL);
4721	if (mreg & E1000_CTRL_RFCE)
4722		preg |= BM_RCTL_RFCE;
4723	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4724
4725	/* enable PHY wakeup in MAC register */
4726	E1000_WRITE_REG(hw, E1000_WUC,
4727	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4728	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4729
4730	/* configure and enable PHY wakeup in PHY registers */
4731	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4732	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4733
4734	/* activate PHY wakeup */
4735	ret = hw->phy.ops.acquire(hw);
4736	if (ret) {
4737		printf("Could not acquire PHY\n");
4738		return ret;
4739	}
4740	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4741	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4742	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4743	if (ret) {
4744		printf("Could not read PHY page 769\n");
4745		goto out;
4746	}
4747	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4748	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4749	if (ret)
4750		printf("Could not set PHY Host Wakeup bit\n");
4751out:
4752	hw->phy.ops.release(hw);
4753
4754	return ret;
4755}
4756
4757static void
4758em_led_func(void *arg, int onoff)
4759{
4760	struct adapter	*adapter = arg;
4761
4762	EM_CORE_LOCK(adapter);
4763	if (onoff) {
4764		e1000_setup_led(&adapter->hw);
4765		e1000_led_on(&adapter->hw);
4766	} else {
4767		e1000_led_off(&adapter->hw);
4768		e1000_cleanup_led(&adapter->hw);
4769	}
4770	EM_CORE_UNLOCK(adapter);
4771}
4772
4773/**********************************************************************
4774 *
4775 *  Update the board statistics counters.
4776 *
4777 **********************************************************************/
4778static void
4779em_update_stats_counters(struct adapter *adapter)
4780{
4781	struct ifnet   *ifp;
4782
4783	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4784	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4785		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4786		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4787	}
4788	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4789	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4790	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4791	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4792
4793	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4794	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4795	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4796	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4797	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4798	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4799	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4800	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4801	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4802	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4803	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4804	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4805	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4806	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4807	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4808	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4809	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4810	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4811	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4812	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4813
4814	/* For the 64-bit byte counters the low dword must be read first. */
4815	/* Both registers clear on the read of the high dword */
4816
4817	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4818	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4819
4820	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4821	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4822	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4823	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4824	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4825
4826	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4827	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4828
4829	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4830	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4831	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4832	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4833	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4834	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4835	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4836	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4837	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4838	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4839
4840	if (adapter->hw.mac.type >= e1000_82543) {
4841		adapter->stats.algnerrc +=
4842		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4843		adapter->stats.rxerrc +=
4844		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4845		adapter->stats.tncrs +=
4846		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4847		adapter->stats.cexterr +=
4848		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4849		adapter->stats.tsctc +=
4850		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4851		adapter->stats.tsctfc +=
4852		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4853	}
4854	ifp = adapter->ifp;
4855
4856	ifp->if_collisions = adapter->stats.colc;
4857
4858	/* Rx Errors */
4859	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4860	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4861	    adapter->stats.ruc + adapter->stats.roc +
4862	    adapter->stats.mpc + adapter->stats.cexterr;
4863
4864	/* Tx Errors */
4865	ifp->if_oerrors = adapter->stats.ecol +
4866	    adapter->stats.latecol + adapter->watchdog_events;
4867}
4868
4869
4870/*
4871 * Add sysctl variables, one per statistic, to the system.
4872 */
4873static void
4874em_add_hw_stats(struct adapter *adapter)
4875{
4876
4877	device_t dev = adapter->dev;
4878
4879	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4880	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4881	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4882	struct e1000_hw_stats *stats = &adapter->stats;
4883
4884	struct sysctl_oid *stat_node, *int_node, *host_node;
4885	struct sysctl_oid_list *stat_list, *int_list, *host_list;
4886
4887	/* Driver Statistics */
4888	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4889			CTLFLAG_RD, &adapter->link_irq, 0,
4890			"Link MSIX IRQ Handled");
4891	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4892			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4893			 "Std mbuf failed");
4894	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4895			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4896			 "Std mbuf cluster failed");
4897	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4898			CTLFLAG_RD, &adapter->dropped_pkts,
4899			"Driver dropped packets");
4900	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4901			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4902			"Driver tx dma failure in xmit");
4903
4904	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4905			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4906			"Flow Control High Watermark");
4907	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4908			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4909			"Flow Control Low Watermark");
4910
4911	/* MAC stats get the own sub node */
4912
4913	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4914				    CTLFLAG_RD, NULL, "Statistics");
4915	stat_list = SYSCTL_CHILDREN(stat_node);
4916
4917	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4918			CTLFLAG_RD, &stats->ecol,
4919			"Excessive collisions");
4920	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4921			CTLFLAG_RD, &adapter->stats.symerrs,
4922			"Symbol Errors");
4923	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4924			CTLFLAG_RD, &adapter->stats.sec,
4925			"Sequence Errors");
4926	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4927			CTLFLAG_RD, &adapter->stats.dc,
4928			"Defer Count");
4929	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4930			CTLFLAG_RD, &adapter->stats.mpc,
4931			"Missed Packets");
4932	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4933			CTLFLAG_RD, &adapter->stats.rnbc,
4934			"Receive No Buffers");
4935	/* RLEC is inaccurate on some hardware, calculate our own. */
4936/* 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4937/* 			CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4938/* 			"Receive Length Errors"); */
4939
4940	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4941			CTLFLAG_RD, &adapter->stats.rxerrc,
4942			"Receive Errors");
4943	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4944			CTLFLAG_RD, &adapter->stats.crcerrs,
4945			"CRC errors");
4946	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4947			CTLFLAG_RD, &adapter->stats.algnerrc,
4948			"Alignment Errors");
4949	/* On 82575 these are collision counts */
4950	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4951			CTLFLAG_RD, &adapter->stats.cexterr,
4952			"Collision/Carrier extension errors");
4953	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4954			CTLFLAG_RD, &adapter->rx_overruns,
4955			"RX overruns");
4956	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4957			CTLFLAG_RD, &adapter->watchdog_events,
4958			"Watchdog timeouts");
4959	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4960			CTLFLAG_RD, &adapter->stats.xonrxc,
4961			"XON Received");
4962	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4963			CTLFLAG_RD, &adapter->stats.xontxc,
4964			"XON Transmitted");
4965	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4966			CTLFLAG_RD, &adapter->stats.xoffrxc,
4967			"XOFF Received");
4968	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4969			CTLFLAG_RD, &adapter->stats.xofftxc,
4970			"XOFF Transmitted");
4971
4972	/* Packet Reception Stats */
4973	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4974			CTLFLAG_RD, &adapter->stats.tpr,
4975			"Total Packets Received ");
4976	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4977			CTLFLAG_RD, &adapter->stats.gprc,
4978			"Good Packets Received");
4979	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4980			CTLFLAG_RD, &adapter->stats.bprc,
4981			"Broadcast Packets Received");
4982	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4983			CTLFLAG_RD, &adapter->stats.mprc,
4984			"Multicast Packets Received");
4985	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4986			CTLFLAG_RD, &adapter->stats.prc64,
4987			"64 byte frames received ");
4988	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4989			CTLFLAG_RD, &adapter->stats.prc127,
4990			"65-127 byte frames received");
4991	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
4992			CTLFLAG_RD, &adapter->stats.prc255,
4993			"128-255 byte frames received");
4994	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
4995			CTLFLAG_RD, &adapter->stats.prc511,
4996			"256-511 byte frames received");
4997	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
4998			CTLFLAG_RD, &adapter->stats.prc1023,
4999			"512-1023 byte frames received");
5000	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5001			CTLFLAG_RD, &adapter->stats.prc1522,
5002			"1023-1522 byte frames received");
5003 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5004 			CTLFLAG_RD, &adapter->stats.gorc,
5005 			"Good Octets Received");
5006
5007	/* Packet Transmission Stats */
5008 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5009 			CTLFLAG_RD, &adapter->stats.gotc,
5010 			"Good Octest Transmitted");
5011	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5012			CTLFLAG_RD, &adapter->stats.tpt,
5013			"Total Packets Transmitted");
5014	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5015			CTLFLAG_RD, &adapter->stats.gptc,
5016			"Good Packets Transmitted");
5017	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5018			CTLFLAG_RD, &adapter->stats.bptc,
5019			"Broadcast Packets Transmitted");
5020	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5021			CTLFLAG_RD, &adapter->stats.mptc,
5022			"Multicast Packets Transmitted");
5023	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5024			CTLFLAG_RD, &adapter->stats.ptc64,
5025			"64 byte frames transmitted ");
5026	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5027			CTLFLAG_RD, &adapter->stats.ptc127,
5028			"65-127 byte frames transmitted");
5029	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5030			CTLFLAG_RD, &adapter->stats.ptc255,
5031			"128-255 byte frames transmitted");
5032	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5033			CTLFLAG_RD, &adapter->stats.ptc511,
5034			"256-511 byte frames transmitted");
5035	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5036			CTLFLAG_RD, &adapter->stats.ptc1023,
5037			"512-1023 byte frames transmitted");
5038	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5039			CTLFLAG_RD, &adapter->stats.ptc1522,
5040			"1024-1522 byte frames transmitted");
5041	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5042			CTLFLAG_RD, &adapter->stats.tsctc,
5043			"TSO Contexts Transmitted");
5044	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5045			CTLFLAG_RD, &adapter->stats.tsctfc,
5046			"TSO Contexts Failed");
5047
5048
5049	/* Interrupt Stats */
5050
5051	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5052				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5053	int_list = SYSCTL_CHILDREN(int_node);
5054
5055	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5056			CTLFLAG_RD, &adapter->stats.iac,
5057			"Interrupt Assertion Count");
5058
5059	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5060			CTLFLAG_RD, &adapter->stats.icrxptc,
5061			"Interrupt Cause Rx Pkt Timer Expire Count");
5062
5063	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5064			CTLFLAG_RD, &adapter->stats.icrxatc,
5065			"Interrupt Cause Rx Abs Timer Expire Count");
5066
5067	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5068			CTLFLAG_RD, &adapter->stats.ictxptc,
5069			"Interrupt Cause Tx Pkt Timer Expire Count");
5070
5071	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5072			CTLFLAG_RD, &adapter->stats.ictxatc,
5073			"Interrupt Cause Tx Abs Timer Expire Count");
5074
5075	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5076			CTLFLAG_RD, &adapter->stats.ictxqec,
5077			"Interrupt Cause Tx Queue Empty Count");
5078
5079	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5080			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5081			"Interrupt Cause Tx Queue Min Thresh Count");
5082
5083	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5084			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5085			"Interrupt Cause Rx Desc Min Thresh Count");
5086
5087	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5088			CTLFLAG_RD, &adapter->stats.icrxoc,
5089			"Interrupt Cause Receiver Overrun Count");
5090
5091	/* Host to Card Stats */
5092
5093	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5094				    CTLFLAG_RD, NULL,
5095				    "Host to Card Statistics");
5096
5097	host_list = SYSCTL_CHILDREN(host_node);
5098
5099	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5100			CTLFLAG_RD, &adapter->stats.cbtmpc,
5101			"Circuit Breaker Tx Packet Count");
5102
5103	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5104			CTLFLAG_RD, &adapter->stats.htdpmc,
5105			"Host Transmit Discarded Packets");
5106
5107	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5108			CTLFLAG_RD, &adapter->stats.rpthc,
5109			"Rx Packets To Host");
5110
5111	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5112			CTLFLAG_RD, &adapter->stats.cbrmpc,
5113			"Circuit Breaker Rx Packet Count");
5114
5115	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5116			CTLFLAG_RD, &adapter->stats.cbrdpc,
5117			"Circuit Breaker Rx Dropped Count");
5118
5119	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5120			CTLFLAG_RD, &adapter->stats.hgptc,
5121			"Host Good Packets Tx Count");
5122
5123	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5124			CTLFLAG_RD, &adapter->stats.htcbdpc,
5125			"Host Tx Circuit Breaker Dropped Count");
5126
5127	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5128			CTLFLAG_RD, &adapter->stats.hgorc,
5129			"Host Good Octets Received Count");
5130
5131	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5132			CTLFLAG_RD, &adapter->stats.hgotc,
5133			"Host Good Octets Transmit Count");
5134
5135	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5136			CTLFLAG_RD, &adapter->stats.lenerrs,
5137			"Length Errors");
5138
5139	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5140			CTLFLAG_RD, &adapter->stats.scvpc,
5141			"SerDes/SGMII Code Violation Pkt Count");
5142
5143	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5144			CTLFLAG_RD, &adapter->stats.hrmpc,
5145			"Header Redirection Missed Packet Count");
5146
5147
5148
5149}
5150
5151/**********************************************************************
5152 *
5153 *  This routine provides a way to dump out the adapter eeprom,
5154 *  often a useful debug/service tool. This only dumps the first
5155 *  32 words, stuff that matters is in that extent.
5156 *
5157 **********************************************************************/
5158
5159static int
5160em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5161{
5162	struct adapter *adapter;
5163	int error;
5164	int result;
5165
5166	result = -1;
5167	error = sysctl_handle_int(oidp, &result, 0, req);
5168
5169	if (error || !req->newptr)
5170		return (error);
5171
5172	/*
5173	 * This value will cause a hex dump of the
5174	 * first 32 16-bit words of the EEPROM to
5175	 * the screen.
5176	 */
5177	if (result == 1) {
5178		adapter = (struct adapter *)arg1;
5179		em_print_nvm_info(adapter);
5180        }
5181
5182	return (error);
5183}
5184
5185static void
5186em_print_nvm_info(struct adapter *adapter)
5187{
5188	u16	eeprom_data;
5189	int	i, j, row = 0;
5190
5191	/* Its a bit crude, but it gets the job done */
5192	printf("\nInterface EEPROM Dump:\n");
5193	printf("Offset\n0x0000  ");
5194	for (i = 0, j = 0; i < 32; i++, j++) {
5195		if (j == 8) { /* Make the offset block */
5196			j = 0; ++row;
5197			printf("\n0x00%x0  ",row);
5198		}
5199		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5200		printf("%04x ", eeprom_data);
5201	}
5202	printf("\n");
5203}
5204
5205static int
5206em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5207{
5208	struct em_int_delay_info *info;
5209	struct adapter *adapter;
5210	u32 regval;
5211	int error, usecs, ticks;
5212
5213	info = (struct em_int_delay_info *)arg1;
5214	usecs = info->value;
5215	error = sysctl_handle_int(oidp, &usecs, 0, req);
5216	if (error != 0 || req->newptr == NULL)
5217		return (error);
5218	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5219		return (EINVAL);
5220	info->value = usecs;
5221	ticks = EM_USECS_TO_TICKS(usecs);
5222
5223	adapter = info->adapter;
5224
5225	EM_CORE_LOCK(adapter);
5226	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5227	regval = (regval & ~0xffff) | (ticks & 0xffff);
5228	/* Handle a few special cases. */
5229	switch (info->offset) {
5230	case E1000_RDTR:
5231		break;
5232	case E1000_TIDV:
5233		if (ticks == 0) {
5234			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5235			/* Don't write 0 into the TIDV register. */
5236			regval++;
5237		} else
5238			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5239		break;
5240	}
5241	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5242	EM_CORE_UNLOCK(adapter);
5243	return (0);
5244}
5245
5246static void
5247em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5248	const char *description, struct em_int_delay_info *info,
5249	int offset, int value)
5250{
5251	info->adapter = adapter;
5252	info->offset = offset;
5253	info->value = value;
5254	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5255	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5256	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5257	    info, 0, em_sysctl_int_delay, "I", description);
5258}
5259
5260static void
5261em_add_rx_process_limit(struct adapter *adapter, const char *name,
5262	const char *description, int *limit, int value)
5263{
5264	*limit = value;
5265	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5266	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5267	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5268}
5269
5270
5271