if_em.c revision 212303
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 212303 2010-09-07 20:13:08Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.6";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static int	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static void	em_add_hw_stats(struct adapter *adapter);
234static bool	em_txeof(struct tx_ring *);
235static bool	em_rxeof(struct rx_ring *, int, int *);
236#ifndef __NO_STRICT_ALIGNMENT
237static int	em_fixup_rx(struct rx_ring *);
238#endif
239static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
240static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
241		    u32 *, u32 *);
242static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
243static void	em_set_promisc(struct adapter *);
244static void	em_disable_promisc(struct adapter *);
245static void	em_set_multi(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
259static void	em_add_int_delay_sysctl(struct adapter *, const char *,
260		    const char *, struct em_int_delay_info *, int, int);
261/* Management and WOL Support */
262static void	em_init_manageability(struct adapter *);
263static void	em_release_manageability(struct adapter *);
264static void     em_get_hw_control(struct adapter *);
265static void     em_release_hw_control(struct adapter *);
266static void	em_get_wakeup(device_t);
267static void     em_enable_wakeup(device_t);
268static int	em_enable_phy_wakeup(struct adapter *);
269static void	em_led_func(void *, int);
270
271static int	em_irq_fast(void *);
272
273/* MSIX handlers */
274static void	em_msix_tx(void *);
275static void	em_msix_rx(void *);
276static void	em_msix_link(void *);
277static void	em_handle_tx(void *context, int pending);
278static void	em_handle_rx(void *context, int pending);
279static void	em_handle_link(void *context, int pending);
280
281static void	em_add_rx_process_limit(struct adapter *, const char *,
282		    const char *, int *, int);
283
284static __inline void em_rx_discard(struct rx_ring *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350#ifdef EM_MULTIQUEUE
351static int em_enable_msix = TRUE;
352static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
353#else
354static int em_enable_msix = FALSE;
355static int em_msix_queues = 0; /* disable */
356#endif
357TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/*
369** Shadow VFTA table, this is needed because
370** the real vlan filter table gets cleared during
371** a soft reset and the driver needs to be able
372** to repopulate it.
373*/
374static u32 em_shadow_vfta[EM_VFTA_SIZE];
375
376/* Global used in WOL setup with multiport cards */
377static int global_quad_port_a = 0;
378
379/*********************************************************************
380 *  Device identification routine
381 *
382 *  em_probe determines if the driver should be loaded on
383 *  adapter based on PCI vendor/device id of the adapter.
384 *
385 *  return BUS_PROBE_DEFAULT on success, positive on failure
386 *********************************************************************/
387
388static int
389em_probe(device_t dev)
390{
391	char		adapter_name[60];
392	u16		pci_vendor_id = 0;
393	u16		pci_device_id = 0;
394	u16		pci_subvendor_id = 0;
395	u16		pci_subdevice_id = 0;
396	em_vendor_info_t *ent;
397
398	INIT_DEBUGOUT("em_probe: begin");
399
400	pci_vendor_id = pci_get_vendor(dev);
401	if (pci_vendor_id != EM_VENDOR_ID)
402		return (ENXIO);
403
404	pci_device_id = pci_get_device(dev);
405	pci_subvendor_id = pci_get_subvendor(dev);
406	pci_subdevice_id = pci_get_subdevice(dev);
407
408	ent = em_vendor_info_array;
409	while (ent->vendor_id != 0) {
410		if ((pci_vendor_id == ent->vendor_id) &&
411		    (pci_device_id == ent->device_id) &&
412
413		    ((pci_subvendor_id == ent->subvendor_id) ||
414		    (ent->subvendor_id == PCI_ANY_ID)) &&
415
416		    ((pci_subdevice_id == ent->subdevice_id) ||
417		    (ent->subdevice_id == PCI_ANY_ID))) {
418			sprintf(adapter_name, "%s %s",
419				em_strings[ent->index],
420				em_driver_version);
421			device_set_desc_copy(dev, adapter_name);
422			return (BUS_PROBE_DEFAULT);
423		}
424		ent++;
425	}
426
427	return (ENXIO);
428}
429
430/*********************************************************************
431 *  Device initialization routine
432 *
433 *  The attach entry point is called when the driver is being loaded.
434 *  This routine identifies the type of hardware, allocates all resources
435 *  and initializes the hardware.
436 *
437 *  return 0 on success, positive on failure
438 *********************************************************************/
439
440static int
441em_attach(device_t dev)
442{
443	struct adapter	*adapter;
444	int		error = 0;
445
446	INIT_DEBUGOUT("em_attach: begin");
447
448	adapter = device_get_softc(dev);
449	adapter->dev = adapter->osdep.dev = dev;
450	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
451
452	/* SYSCTL stuff */
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_nvm_info, "I", "NVM Information");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/* Allocate multicast array memory. */
582	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
583	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
584	if (adapter->mta == NULL) {
585		device_printf(dev, "Can not allocate multicast setup array\n");
586		error = ENOMEM;
587		goto err_late;
588	}
589
590	/*
591	** Start from a known state, this is
592	** important in reading the nvm and
593	** mac from that.
594	*/
595	e1000_reset_hw(&adapter->hw);
596
597	/* Make sure we have a good EEPROM before we read from it */
598	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
599		/*
600		** Some PCI-E parts fail the first check due to
601		** the link being in sleep state, call it again,
602		** if it fails a second time its a real issue.
603		*/
604		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
605			device_printf(dev,
606			    "The EEPROM Checksum Is Not Valid\n");
607			error = EIO;
608			goto err_late;
609		}
610	}
611
612	/* Copy the permanent MAC address out of the EEPROM */
613	if (e1000_read_mac_addr(&adapter->hw) < 0) {
614		device_printf(dev, "EEPROM read error while reading MAC"
615		    " address\n");
616		error = EIO;
617		goto err_late;
618	}
619
620	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
621		device_printf(dev, "Invalid MAC address\n");
622		error = EIO;
623		goto err_late;
624	}
625
626	/*
627	**  Do interrupt configuration
628	*/
629	if (adapter->msix > 1) /* Do MSIX */
630		error = em_allocate_msix(adapter);
631	else  /* MSI or Legacy */
632		error = em_allocate_legacy(adapter);
633	if (error)
634		goto err_late;
635
636	/*
637	 * Get Wake-on-Lan and Management info for later use
638	 */
639	em_get_wakeup(dev);
640
641	/* Setup OS specific network interface */
642	if (em_setup_interface(dev, adapter) != 0)
643		goto err_late;
644
645	em_reset(adapter);
646
647	/* Initialize statistics */
648	em_update_stats_counters(adapter);
649
650	adapter->hw.mac.get_link_status = 1;
651	em_update_link_status(adapter);
652
653	/* Indicate SOL/IDER usage */
654	if (e1000_check_reset_block(&adapter->hw))
655		device_printf(dev,
656		    "PHY reset is blocked due to SOL/IDER session.\n");
657
658	/* Register for VLAN events */
659	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
660	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
661	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
662	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
663
664	em_add_hw_stats(adapter);
665
666	/* Non-AMT based hardware can now take control from firmware */
667	if (adapter->has_manage && !adapter->has_amt)
668		em_get_hw_control(adapter);
669
670	/* Tell the stack that the interface is not active */
671	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
672
673	adapter->led_dev = led_create(em_led_func, adapter,
674	    device_get_nameunit(dev));
675
676	INIT_DEBUGOUT("em_attach: end");
677
678	return (0);
679
680err_late:
681	em_free_transmit_structures(adapter);
682	em_free_receive_structures(adapter);
683	em_release_hw_control(adapter);
684	if (adapter->ifp != NULL)
685		if_free(adapter->ifp);
686err_pci:
687	em_free_pci_resources(adapter);
688	free(adapter->mta, M_DEVBUF);
689	EM_CORE_LOCK_DESTROY(adapter);
690
691	return (error);
692}
693
694/*********************************************************************
695 *  Device removal routine
696 *
697 *  The detach entry point is called when the driver is being removed.
698 *  This routine stops the adapter and deallocates all the resources
699 *  that were allocated for driver operation.
700 *
701 *  return 0 on success, positive on failure
702 *********************************************************************/
703
704static int
705em_detach(device_t dev)
706{
707	struct adapter	*adapter = device_get_softc(dev);
708	struct ifnet	*ifp = adapter->ifp;
709
710	INIT_DEBUGOUT("em_detach: begin");
711
712	/* Make sure VLANS are not using driver */
713	if (adapter->ifp->if_vlantrunk != NULL) {
714		device_printf(dev,"Vlan in use, detach first\n");
715		return (EBUSY);
716	}
717
718#ifdef DEVICE_POLLING
719	if (ifp->if_capenable & IFCAP_POLLING)
720		ether_poll_deregister(ifp);
721#endif
722
723	if (adapter->led_dev != NULL)
724		led_destroy(adapter->led_dev);
725
726	EM_CORE_LOCK(adapter);
727	adapter->in_detach = 1;
728	em_stop(adapter);
729	EM_CORE_UNLOCK(adapter);
730	EM_CORE_LOCK_DESTROY(adapter);
731
732	e1000_phy_hw_reset(&adapter->hw);
733
734	em_release_manageability(adapter);
735	em_release_hw_control(adapter);
736
737	/* Unregister VLAN events */
738	if (adapter->vlan_attach != NULL)
739		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
740	if (adapter->vlan_detach != NULL)
741		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
742
743	ether_ifdetach(adapter->ifp);
744	callout_drain(&adapter->timer);
745
746	em_free_pci_resources(adapter);
747	bus_generic_detach(dev);
748	if_free(ifp);
749
750	em_free_transmit_structures(adapter);
751	em_free_receive_structures(adapter);
752
753	em_release_hw_control(adapter);
754	free(adapter->mta, M_DEVBUF);
755
756	return (0);
757}
758
759/*********************************************************************
760 *
761 *  Shutdown entry point
762 *
763 **********************************************************************/
764
765static int
766em_shutdown(device_t dev)
767{
768	return em_suspend(dev);
769}
770
771/*
772 * Suspend/resume device methods.
773 */
774static int
775em_suspend(device_t dev)
776{
777	struct adapter *adapter = device_get_softc(dev);
778
779	EM_CORE_LOCK(adapter);
780
781        em_release_manageability(adapter);
782	em_release_hw_control(adapter);
783	em_enable_wakeup(dev);
784
785	EM_CORE_UNLOCK(adapter);
786
787	return bus_generic_suspend(dev);
788}
789
790static int
791em_resume(device_t dev)
792{
793	struct adapter *adapter = device_get_softc(dev);
794	struct ifnet *ifp = adapter->ifp;
795
796	EM_CORE_LOCK(adapter);
797	em_init_locked(adapter);
798	em_init_manageability(adapter);
799	EM_CORE_UNLOCK(adapter);
800	em_start(ifp);
801
802	return bus_generic_resume(dev);
803}
804
805
806/*********************************************************************
807 *  Transmit entry point
808 *
809 *  em_start is called by the stack to initiate a transmit.
810 *  The driver will remain in this routine as long as there are
811 *  packets to transmit and transmit resources are available.
812 *  In case resources are not available stack is notified and
813 *  the packet is requeued.
814 **********************************************************************/
815
816#ifdef EM_MULTIQUEUE
817static int
818em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
819{
820	struct adapter  *adapter = txr->adapter;
821        struct mbuf     *next;
822        int             err = 0, enq = 0;
823
824	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
825	    IFF_DRV_RUNNING || adapter->link_active == 0) {
826		if (m != NULL)
827			err = drbr_enqueue(ifp, txr->br, m);
828		return (err);
829	}
830
831        /* Call cleanup if number of TX descriptors low */
832	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
833		em_txeof(txr);
834
835	enq = 0;
836	if (m == NULL) {
837		next = drbr_dequeue(ifp, txr->br);
838	} else if (drbr_needs_enqueue(ifp, txr->br)) {
839		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
840			return (err);
841		next = drbr_dequeue(ifp, txr->br);
842	} else
843		next = m;
844
845	/* Process the queue */
846	while (next != NULL) {
847		if ((err = em_xmit(txr, &next)) != 0) {
848                        if (next != NULL)
849                                err = drbr_enqueue(ifp, txr->br, next);
850                        break;
851		}
852		enq++;
853		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
854		ETHER_BPF_MTAP(ifp, next);
855		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
856                        break;
857		if (txr->tx_avail < EM_MAX_SCATTER) {
858			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
859			break;
860		}
861		next = drbr_dequeue(ifp, txr->br);
862	}
863
864	if (enq > 0) {
865                /* Set the watchdog */
866                txr->watchdog_check = TRUE;
867		txr->watchdog_time = ticks;
868	}
869	return (err);
870}
871
872/*
873** Multiqueue capable stack interface, this is not
874** yet truely multiqueue, but that is coming...
875*/
876static int
877em_mq_start(struct ifnet *ifp, struct mbuf *m)
878{
879	struct adapter	*adapter = ifp->if_softc;
880	struct tx_ring	*txr;
881	int 		i, error = 0;
882
883	/* Which queue to use */
884	if ((m->m_flags & M_FLOWID) != 0)
885                i = m->m_pkthdr.flowid % adapter->num_queues;
886	else
887		i = curcpu % adapter->num_queues;
888
889	txr = &adapter->tx_rings[i];
890
891	if (EM_TX_TRYLOCK(txr)) {
892		error = em_mq_start_locked(ifp, txr, m);
893		EM_TX_UNLOCK(txr);
894	} else
895		error = drbr_enqueue(ifp, txr->br, m);
896
897	return (error);
898}
899
900/*
901** Flush all ring buffers
902*/
903static void
904em_qflush(struct ifnet *ifp)
905{
906	struct adapter  *adapter = ifp->if_softc;
907	struct tx_ring  *txr = adapter->tx_rings;
908	struct mbuf     *m;
909
910	for (int i = 0; i < adapter->num_queues; i++, txr++) {
911		EM_TX_LOCK(txr);
912		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
913			m_freem(m);
914		EM_TX_UNLOCK(txr);
915	}
916	if_qflush(ifp);
917}
918
919#endif /* EM_MULTIQUEUE */
920
921static void
922em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
923{
924	struct adapter	*adapter = ifp->if_softc;
925	struct mbuf	*m_head;
926
927	EM_TX_LOCK_ASSERT(txr);
928
929	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
930	    IFF_DRV_RUNNING)
931		return;
932
933	if (!adapter->link_active)
934		return;
935
936        /* Call cleanup if number of TX descriptors low */
937	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
938		em_txeof(txr);
939
940	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
941		if (txr->tx_avail < EM_MAX_SCATTER) {
942			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
943			break;
944		}
945                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
946		if (m_head == NULL)
947			break;
948		/*
949		 *  Encapsulation can modify our pointer, and or make it
950		 *  NULL on failure.  In that event, we can't requeue.
951		 */
952		if (em_xmit(txr, &m_head)) {
953			if (m_head == NULL)
954				break;
955			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
956			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
957			break;
958		}
959
960		/* Send a copy of the frame to the BPF listener */
961		ETHER_BPF_MTAP(ifp, m_head);
962
963		/* Set timeout in case hardware has problems transmitting. */
964		txr->watchdog_time = ticks;
965		txr->watchdog_check = TRUE;
966	}
967
968	return;
969}
970
971static void
972em_start(struct ifnet *ifp)
973{
974	struct adapter	*adapter = ifp->if_softc;
975	struct tx_ring	*txr = adapter->tx_rings;
976
977	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
978		EM_TX_LOCK(txr);
979		em_start_locked(ifp, txr);
980		EM_TX_UNLOCK(txr);
981	}
982	return;
983}
984
985/*********************************************************************
986 *  Ioctl entry point
987 *
988 *  em_ioctl is called when the user wants to configure the
989 *  interface.
990 *
991 *  return 0 on success, positive on failure
992 **********************************************************************/
993
994static int
995em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
996{
997	struct adapter	*adapter = ifp->if_softc;
998	struct ifreq *ifr = (struct ifreq *)data;
999#ifdef INET
1000	struct ifaddr *ifa = (struct ifaddr *)data;
1001#endif
1002	int error = 0;
1003
1004	if (adapter->in_detach)
1005		return (error);
1006
1007	switch (command) {
1008	case SIOCSIFADDR:
1009#ifdef INET
1010		if (ifa->ifa_addr->sa_family == AF_INET) {
1011			/*
1012			 * XXX
1013			 * Since resetting hardware takes a very long time
1014			 * and results in link renegotiation we only
1015			 * initialize the hardware only when it is absolutely
1016			 * required.
1017			 */
1018			ifp->if_flags |= IFF_UP;
1019			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1020				EM_CORE_LOCK(adapter);
1021				em_init_locked(adapter);
1022				EM_CORE_UNLOCK(adapter);
1023			}
1024			arp_ifinit(ifp, ifa);
1025		} else
1026#endif
1027			error = ether_ioctl(ifp, command, data);
1028		break;
1029	case SIOCSIFMTU:
1030	    {
1031		int max_frame_size;
1032
1033		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1034
1035		EM_CORE_LOCK(adapter);
1036		switch (adapter->hw.mac.type) {
1037		case e1000_82571:
1038		case e1000_82572:
1039		case e1000_ich9lan:
1040		case e1000_ich10lan:
1041		case e1000_82574:
1042		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1043			max_frame_size = 9234;
1044			break;
1045		case e1000_pchlan:
1046			max_frame_size = 4096;
1047			break;
1048			/* Adapters that do not support jumbo frames */
1049		case e1000_82583:
1050		case e1000_ich8lan:
1051			max_frame_size = ETHER_MAX_LEN;
1052			break;
1053		default:
1054			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1055		}
1056		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1057		    ETHER_CRC_LEN) {
1058			EM_CORE_UNLOCK(adapter);
1059			error = EINVAL;
1060			break;
1061		}
1062
1063		ifp->if_mtu = ifr->ifr_mtu;
1064		adapter->max_frame_size =
1065		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1066		em_init_locked(adapter);
1067		EM_CORE_UNLOCK(adapter);
1068		break;
1069	    }
1070	case SIOCSIFFLAGS:
1071		IOCTL_DEBUGOUT("ioctl rcv'd:\
1072		    SIOCSIFFLAGS (Set Interface Flags)");
1073		EM_CORE_LOCK(adapter);
1074		if (ifp->if_flags & IFF_UP) {
1075			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1076				if ((ifp->if_flags ^ adapter->if_flags) &
1077				    (IFF_PROMISC | IFF_ALLMULTI)) {
1078					em_disable_promisc(adapter);
1079					em_set_promisc(adapter);
1080				}
1081			} else
1082				em_init_locked(adapter);
1083		} else
1084			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1085				em_stop(adapter);
1086		adapter->if_flags = ifp->if_flags;
1087		EM_CORE_UNLOCK(adapter);
1088		break;
1089	case SIOCADDMULTI:
1090	case SIOCDELMULTI:
1091		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1092		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1093			EM_CORE_LOCK(adapter);
1094			em_disable_intr(adapter);
1095			em_set_multi(adapter);
1096#ifdef DEVICE_POLLING
1097			if (!(ifp->if_capenable & IFCAP_POLLING))
1098#endif
1099				em_enable_intr(adapter);
1100			EM_CORE_UNLOCK(adapter);
1101		}
1102		break;
1103	case SIOCSIFMEDIA:
1104		/* Check SOL/IDER usage */
1105		EM_CORE_LOCK(adapter);
1106		if (e1000_check_reset_block(&adapter->hw)) {
1107			EM_CORE_UNLOCK(adapter);
1108			device_printf(adapter->dev, "Media change is"
1109			    " blocked due to SOL/IDER session.\n");
1110			break;
1111		}
1112		EM_CORE_UNLOCK(adapter);
1113	case SIOCGIFMEDIA:
1114		IOCTL_DEBUGOUT("ioctl rcv'd: \
1115		    SIOCxIFMEDIA (Get/Set Interface Media)");
1116		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1117		break;
1118	case SIOCSIFCAP:
1119	    {
1120		int mask, reinit;
1121
1122		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1123		reinit = 0;
1124		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1125#ifdef DEVICE_POLLING
1126		if (mask & IFCAP_POLLING) {
1127			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1128				error = ether_poll_register(em_poll, ifp);
1129				if (error)
1130					return (error);
1131				EM_CORE_LOCK(adapter);
1132				em_disable_intr(adapter);
1133				ifp->if_capenable |= IFCAP_POLLING;
1134				EM_CORE_UNLOCK(adapter);
1135			} else {
1136				error = ether_poll_deregister(ifp);
1137				/* Enable interrupt even in error case */
1138				EM_CORE_LOCK(adapter);
1139				em_enable_intr(adapter);
1140				ifp->if_capenable &= ~IFCAP_POLLING;
1141				EM_CORE_UNLOCK(adapter);
1142			}
1143		}
1144#endif
1145		if (mask & IFCAP_HWCSUM) {
1146			ifp->if_capenable ^= IFCAP_HWCSUM;
1147			reinit = 1;
1148		}
1149		if (mask & IFCAP_TSO4) {
1150			ifp->if_capenable ^= IFCAP_TSO4;
1151			reinit = 1;
1152		}
1153		if (mask & IFCAP_VLAN_HWTAGGING) {
1154			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1155			reinit = 1;
1156		}
1157		if (mask & IFCAP_VLAN_HWFILTER) {
1158			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1159			reinit = 1;
1160		}
1161		if ((mask & IFCAP_WOL) &&
1162		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1163			if (mask & IFCAP_WOL_MCAST)
1164				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1165			if (mask & IFCAP_WOL_MAGIC)
1166				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1167		}
1168		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1169			em_init(adapter);
1170		VLAN_CAPABILITIES(ifp);
1171		break;
1172	    }
1173
1174	default:
1175		error = ether_ioctl(ifp, command, data);
1176		break;
1177	}
1178
1179	return (error);
1180}
1181
1182
1183/*********************************************************************
1184 *  Init entry point
1185 *
1186 *  This routine is used in two ways. It is used by the stack as
1187 *  init entry point in network interface structure. It is also used
1188 *  by the driver as a hw/sw initialization routine to get to a
1189 *  consistent state.
1190 *
1191 *  return 0 on success, positive on failure
1192 **********************************************************************/
1193
1194static void
1195em_init_locked(struct adapter *adapter)
1196{
1197	struct ifnet	*ifp = adapter->ifp;
1198	device_t	dev = adapter->dev;
1199	u32		pba;
1200
1201	INIT_DEBUGOUT("em_init: begin");
1202
1203	EM_CORE_LOCK_ASSERT(adapter);
1204
1205	em_disable_intr(adapter);
1206	callout_stop(&adapter->timer);
1207
1208	/*
1209	 * Packet Buffer Allocation (PBA)
1210	 * Writing PBA sets the receive portion of the buffer
1211	 * the remainder is used for the transmit buffer.
1212	 */
1213	switch (adapter->hw.mac.type) {
1214	/* Total Packet Buffer on these is 48K */
1215	case e1000_82571:
1216	case e1000_82572:
1217	case e1000_80003es2lan:
1218			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1219		break;
1220	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1221			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1222		break;
1223	case e1000_82574:
1224	case e1000_82583:
1225			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1226		break;
1227	case e1000_ich9lan:
1228	case e1000_ich10lan:
1229	case e1000_pchlan:
1230		pba = E1000_PBA_10K;
1231		break;
1232	case e1000_ich8lan:
1233		pba = E1000_PBA_8K;
1234		break;
1235	default:
1236		if (adapter->max_frame_size > 8192)
1237			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1238		else
1239			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1240	}
1241
1242	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1243	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1244
1245	/* Get the latest mac address, User can use a LAA */
1246        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1247              ETHER_ADDR_LEN);
1248
1249	/* Put the address into the Receive Address Array */
1250	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1251
1252	/*
1253	 * With the 82571 adapter, RAR[0] may be overwritten
1254	 * when the other port is reset, we make a duplicate
1255	 * in RAR[14] for that eventuality, this assures
1256	 * the interface continues to function.
1257	 */
1258	if (adapter->hw.mac.type == e1000_82571) {
1259		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1260		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1261		    E1000_RAR_ENTRIES - 1);
1262	}
1263
1264	/* Initialize the hardware */
1265	em_reset(adapter);
1266	em_update_link_status(adapter);
1267
1268	/* Setup VLAN support, basic and offload if available */
1269	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1270
1271	/* Use real VLAN Filter support? */
1272	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1273		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1274			/* Use real VLAN Filter support */
1275			em_setup_vlan_hw_support(adapter);
1276		else {
1277			u32 ctrl;
1278			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1279			ctrl |= E1000_CTRL_VME;
1280			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1281		}
1282	}
1283
1284	/* Set hardware offload abilities */
1285	ifp->if_hwassist = 0;
1286	if (ifp->if_capenable & IFCAP_TXCSUM)
1287		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1288	if (ifp->if_capenable & IFCAP_TSO4)
1289		ifp->if_hwassist |= CSUM_TSO;
1290
1291	/* Configure for OS presence */
1292	em_init_manageability(adapter);
1293
1294	/* Prepare transmit descriptors and buffers */
1295	em_setup_transmit_structures(adapter);
1296	em_initialize_transmit_unit(adapter);
1297
1298	/* Setup Multicast table */
1299	em_set_multi(adapter);
1300
1301	/* Prepare receive descriptors and buffers */
1302	if (em_setup_receive_structures(adapter)) {
1303		device_printf(dev, "Could not setup receive structures\n");
1304		em_stop(adapter);
1305		return;
1306	}
1307	em_initialize_receive_unit(adapter);
1308
1309	/* Don't lose promiscuous settings */
1310	em_set_promisc(adapter);
1311
1312	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1313	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1314
1315	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1316	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1317
1318	/* MSI/X configuration for 82574 */
1319	if (adapter->hw.mac.type == e1000_82574) {
1320		int tmp;
1321		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1322		tmp |= E1000_CTRL_EXT_PBA_CLR;
1323		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1324		/* Set the IVAR - interrupt vector routing. */
1325		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1326	}
1327
1328#ifdef DEVICE_POLLING
1329	/*
1330	 * Only enable interrupts if we are not polling, make sure
1331	 * they are off otherwise.
1332	 */
1333	if (ifp->if_capenable & IFCAP_POLLING)
1334		em_disable_intr(adapter);
1335	else
1336#endif /* DEVICE_POLLING */
1337		em_enable_intr(adapter);
1338
1339	/* AMT based hardware can now take control from firmware */
1340	if (adapter->has_manage && adapter->has_amt)
1341		em_get_hw_control(adapter);
1342
1343	/* Don't reset the phy next time init gets called */
1344	adapter->hw.phy.reset_disable = TRUE;
1345}
1346
1347static void
1348em_init(void *arg)
1349{
1350	struct adapter *adapter = arg;
1351
1352	EM_CORE_LOCK(adapter);
1353	em_init_locked(adapter);
1354	EM_CORE_UNLOCK(adapter);
1355}
1356
1357
1358#ifdef DEVICE_POLLING
1359/*********************************************************************
1360 *
1361 *  Legacy polling routine: note this only works with single queue
1362 *
1363 *********************************************************************/
1364static int
1365em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1366{
1367	struct adapter *adapter = ifp->if_softc;
1368	struct tx_ring	*txr = adapter->tx_rings;
1369	struct rx_ring	*rxr = adapter->rx_rings;
1370	u32		reg_icr;
1371	int		rx_done;
1372
1373	EM_CORE_LOCK(adapter);
1374	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1375		EM_CORE_UNLOCK(adapter);
1376		return (0);
1377	}
1378
1379	if (cmd == POLL_AND_CHECK_STATUS) {
1380		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1381		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1382			callout_stop(&adapter->timer);
1383			adapter->hw.mac.get_link_status = 1;
1384			em_update_link_status(adapter);
1385			callout_reset(&adapter->timer, hz,
1386			    em_local_timer, adapter);
1387		}
1388	}
1389	EM_CORE_UNLOCK(adapter);
1390
1391	em_rxeof(rxr, count, &rx_done);
1392
1393	EM_TX_LOCK(txr);
1394	em_txeof(txr);
1395#ifdef EM_MULTIQUEUE
1396	if (!drbr_empty(ifp, txr->br))
1397		em_mq_start_locked(ifp, txr, NULL);
1398#else
1399	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1400		em_start_locked(ifp, txr);
1401#endif
1402	EM_TX_UNLOCK(txr);
1403
1404	return (rx_done);
1405}
1406#endif /* DEVICE_POLLING */
1407
1408
1409/*********************************************************************
1410 *
1411 *  Fast Legacy/MSI Combined Interrupt Service routine
1412 *
1413 *********************************************************************/
1414static int
1415em_irq_fast(void *arg)
1416{
1417	struct adapter	*adapter = arg;
1418	struct ifnet	*ifp;
1419	u32		reg_icr;
1420
1421	ifp = adapter->ifp;
1422
1423	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1424
1425	/* Hot eject?  */
1426	if (reg_icr == 0xffffffff)
1427		return FILTER_STRAY;
1428
1429	/* Definitely not our interrupt.  */
1430	if (reg_icr == 0x0)
1431		return FILTER_STRAY;
1432
1433	/*
1434	 * Starting with the 82571 chip, bit 31 should be used to
1435	 * determine whether the interrupt belongs to us.
1436	 */
1437	if (adapter->hw.mac.type >= e1000_82571 &&
1438	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1439		return FILTER_STRAY;
1440
1441	em_disable_intr(adapter);
1442	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1443
1444	/* Link status change */
1445	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1446		adapter->hw.mac.get_link_status = 1;
1447		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1448	}
1449
1450	if (reg_icr & E1000_ICR_RXO)
1451		adapter->rx_overruns++;
1452	return FILTER_HANDLED;
1453}
1454
1455/* Combined RX/TX handler, used by Legacy and MSI */
1456static void
1457em_handle_que(void *context, int pending)
1458{
1459	struct adapter	*adapter = context;
1460	struct ifnet	*ifp = adapter->ifp;
1461	struct tx_ring	*txr = adapter->tx_rings;
1462	struct rx_ring	*rxr = adapter->rx_rings;
1463	bool		more;
1464
1465
1466	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1467		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1468
1469		EM_TX_LOCK(txr);
1470		if (em_txeof(txr))
1471			more = TRUE;
1472#ifdef EM_MULTIQUEUE
1473		if (!drbr_empty(ifp, txr->br))
1474			em_mq_start_locked(ifp, txr, NULL);
1475#else
1476		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1477			em_start_locked(ifp, txr);
1478#endif
1479		EM_TX_UNLOCK(txr);
1480		if (more) {
1481			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1482			return;
1483		}
1484	}
1485
1486	em_enable_intr(adapter);
1487	return;
1488}
1489
1490
1491/*********************************************************************
1492 *
1493 *  MSIX Interrupt Service Routines
1494 *
1495 **********************************************************************/
1496static void
1497em_msix_tx(void *arg)
1498{
1499	struct tx_ring *txr = arg;
1500	struct adapter *adapter = txr->adapter;
1501	bool		more;
1502
1503	++txr->tx_irq;
1504	EM_TX_LOCK(txr);
1505	more = em_txeof(txr);
1506	EM_TX_UNLOCK(txr);
1507	if (more)
1508		taskqueue_enqueue(txr->tq, &txr->tx_task);
1509	else
1510		/* Reenable this interrupt */
1511		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1512	return;
1513}
1514
1515/*********************************************************************
1516 *
1517 *  MSIX RX Interrupt Service routine
1518 *
1519 **********************************************************************/
1520
1521static void
1522em_msix_rx(void *arg)
1523{
1524	struct rx_ring	*rxr = arg;
1525	struct adapter	*adapter = rxr->adapter;
1526	bool		more;
1527
1528	++rxr->rx_irq;
1529	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1530	if (more)
1531		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1532	else
1533		/* Reenable this interrupt */
1534		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1535	return;
1536}
1537
1538/*********************************************************************
1539 *
1540 *  MSIX Link Fast Interrupt Service routine
1541 *
1542 **********************************************************************/
1543static void
1544em_msix_link(void *arg)
1545{
1546	struct adapter	*adapter = arg;
1547	u32		reg_icr;
1548
1549	++adapter->link_irq;
1550	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1551
1552	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1553		adapter->hw.mac.get_link_status = 1;
1554		em_handle_link(adapter, 0);
1555	} else
1556		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1557		    EM_MSIX_LINK | E1000_IMS_LSC);
1558	return;
1559}
1560
1561static void
1562em_handle_rx(void *context, int pending)
1563{
1564	struct rx_ring	*rxr = context;
1565	struct adapter	*adapter = rxr->adapter;
1566        bool            more;
1567
1568	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1569	if (more)
1570		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1571	else
1572		/* Reenable this interrupt */
1573		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1574}
1575
1576static void
1577em_handle_tx(void *context, int pending)
1578{
1579	struct tx_ring	*txr = context;
1580	struct adapter	*adapter = txr->adapter;
1581	struct ifnet	*ifp = adapter->ifp;
1582
1583	if (!EM_TX_TRYLOCK(txr))
1584		return;
1585
1586	em_txeof(txr);
1587
1588#ifdef EM_MULTIQUEUE
1589	if (!drbr_empty(ifp, txr->br))
1590		em_mq_start_locked(ifp, txr, NULL);
1591#else
1592	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1593		em_start_locked(ifp, txr);
1594#endif
1595	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1596	EM_TX_UNLOCK(txr);
1597}
1598
1599static void
1600em_handle_link(void *context, int pending)
1601{
1602	struct adapter	*adapter = context;
1603	struct ifnet *ifp = adapter->ifp;
1604
1605	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1606		return;
1607
1608	EM_CORE_LOCK(adapter);
1609	callout_stop(&adapter->timer);
1610	em_update_link_status(adapter);
1611	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1612	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1613	    EM_MSIX_LINK | E1000_IMS_LSC);
1614	EM_CORE_UNLOCK(adapter);
1615}
1616
1617
1618/*********************************************************************
1619 *
1620 *  Media Ioctl callback
1621 *
1622 *  This routine is called whenever the user queries the status of
1623 *  the interface using ifconfig.
1624 *
1625 **********************************************************************/
1626static void
1627em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1628{
1629	struct adapter *adapter = ifp->if_softc;
1630	u_char fiber_type = IFM_1000_SX;
1631
1632	INIT_DEBUGOUT("em_media_status: begin");
1633
1634	EM_CORE_LOCK(adapter);
1635	em_update_link_status(adapter);
1636
1637	ifmr->ifm_status = IFM_AVALID;
1638	ifmr->ifm_active = IFM_ETHER;
1639
1640	if (!adapter->link_active) {
1641		EM_CORE_UNLOCK(adapter);
1642		return;
1643	}
1644
1645	ifmr->ifm_status |= IFM_ACTIVE;
1646
1647	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1648	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1649		ifmr->ifm_active |= fiber_type | IFM_FDX;
1650	} else {
1651		switch (adapter->link_speed) {
1652		case 10:
1653			ifmr->ifm_active |= IFM_10_T;
1654			break;
1655		case 100:
1656			ifmr->ifm_active |= IFM_100_TX;
1657			break;
1658		case 1000:
1659			ifmr->ifm_active |= IFM_1000_T;
1660			break;
1661		}
1662		if (adapter->link_duplex == FULL_DUPLEX)
1663			ifmr->ifm_active |= IFM_FDX;
1664		else
1665			ifmr->ifm_active |= IFM_HDX;
1666	}
1667	EM_CORE_UNLOCK(adapter);
1668}
1669
1670/*********************************************************************
1671 *
1672 *  Media Ioctl callback
1673 *
1674 *  This routine is called when the user changes speed/duplex using
1675 *  media/mediopt option with ifconfig.
1676 *
1677 **********************************************************************/
1678static int
1679em_media_change(struct ifnet *ifp)
1680{
1681	struct adapter *adapter = ifp->if_softc;
1682	struct ifmedia  *ifm = &adapter->media;
1683
1684	INIT_DEBUGOUT("em_media_change: begin");
1685
1686	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1687		return (EINVAL);
1688
1689	EM_CORE_LOCK(adapter);
1690	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1691	case IFM_AUTO:
1692		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1693		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1694		break;
1695	case IFM_1000_LX:
1696	case IFM_1000_SX:
1697	case IFM_1000_T:
1698		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1699		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1700		break;
1701	case IFM_100_TX:
1702		adapter->hw.mac.autoneg = FALSE;
1703		adapter->hw.phy.autoneg_advertised = 0;
1704		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1705			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1706		else
1707			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1708		break;
1709	case IFM_10_T:
1710		adapter->hw.mac.autoneg = FALSE;
1711		adapter->hw.phy.autoneg_advertised = 0;
1712		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1713			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1714		else
1715			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1716		break;
1717	default:
1718		device_printf(adapter->dev, "Unsupported media type\n");
1719	}
1720
1721	/* As the speed/duplex settings my have changed we need to
1722	 * reset the PHY.
1723	 */
1724	adapter->hw.phy.reset_disable = FALSE;
1725
1726	em_init_locked(adapter);
1727	EM_CORE_UNLOCK(adapter);
1728
1729	return (0);
1730}
1731
1732/*********************************************************************
1733 *
1734 *  This routine maps the mbufs to tx descriptors.
1735 *
1736 *  return 0 on success, positive on failure
1737 **********************************************************************/
1738
1739static int
1740em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1741{
1742	struct adapter		*adapter = txr->adapter;
1743	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1744	bus_dmamap_t		map;
1745	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1746	struct e1000_tx_desc	*ctxd = NULL;
1747	struct mbuf		*m_head;
1748	u32			txd_upper, txd_lower, txd_used, txd_saved;
1749	int			nsegs, i, j, first, last = 0;
1750	int			error, do_tso, tso_desc = 0;
1751
1752	m_head = *m_headp;
1753	txd_upper = txd_lower = txd_used = txd_saved = 0;
1754	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1755
1756	/*
1757	** When doing checksum offload, it is critical to
1758	** make sure the first mbuf has more than header,
1759	** because that routine expects data to be present.
1760	*/
1761	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1762	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1763		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1764		*m_headp = m_head;
1765		if (m_head == NULL)
1766			return (ENOBUFS);
1767	}
1768
1769	/*
1770	 * TSO workaround:
1771	 *  If an mbuf is only header we need
1772	 *     to pull 4 bytes of data into it.
1773	 */
1774	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1775		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1776		*m_headp = m_head;
1777		if (m_head == NULL)
1778			return (ENOBUFS);
1779	}
1780
1781	/*
1782	 * Map the packet for DMA
1783	 *
1784	 * Capture the first descriptor index,
1785	 * this descriptor will have the index
1786	 * of the EOP which is the only one that
1787	 * now gets a DONE bit writeback.
1788	 */
1789	first = txr->next_avail_desc;
1790	tx_buffer = &txr->tx_buffers[first];
1791	tx_buffer_mapped = tx_buffer;
1792	map = tx_buffer->map;
1793
1794	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1795	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1796
1797	/*
1798	 * There are two types of errors we can (try) to handle:
1799	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1800	 *   out of segments.  Defragment the mbuf chain and try again.
1801	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1802	 *   at this point in time.  Defer sending and try again later.
1803	 * All other errors, in particular EINVAL, are fatal and prevent the
1804	 * mbuf chain from ever going through.  Drop it and report error.
1805	 */
1806	if (error == EFBIG) {
1807		struct mbuf *m;
1808
1809		m = m_defrag(*m_headp, M_DONTWAIT);
1810		if (m == NULL) {
1811			adapter->mbuf_alloc_failed++;
1812			m_freem(*m_headp);
1813			*m_headp = NULL;
1814			return (ENOBUFS);
1815		}
1816		*m_headp = m;
1817
1818		/* Try it again */
1819		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1820		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1821
1822		if (error) {
1823			adapter->no_tx_dma_setup++;
1824			m_freem(*m_headp);
1825			*m_headp = NULL;
1826			return (error);
1827		}
1828	} else if (error != 0) {
1829		adapter->no_tx_dma_setup++;
1830		return (error);
1831	}
1832
1833	/*
1834	 * TSO Hardware workaround, if this packet is not
1835	 * TSO, and is only a single descriptor long, and
1836	 * it follows a TSO burst, then we need to add a
1837	 * sentinel descriptor to prevent premature writeback.
1838	 */
1839	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1840		if (nsegs == 1)
1841			tso_desc = TRUE;
1842		txr->tx_tso = FALSE;
1843	}
1844
1845        if (nsegs > (txr->tx_avail - 2)) {
1846                txr->no_desc_avail++;
1847		bus_dmamap_unload(txr->txtag, map);
1848		return (ENOBUFS);
1849        }
1850	m_head = *m_headp;
1851
1852	/* Do hardware assists */
1853#if __FreeBSD_version >= 700000
1854	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1855		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1856		if (error != TRUE)
1857			return (ENXIO); /* something foobar */
1858		/* we need to make a final sentinel transmit desc */
1859		tso_desc = TRUE;
1860	} else
1861#endif
1862	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1863		em_transmit_checksum_setup(txr,  m_head,
1864		    &txd_upper, &txd_lower);
1865
1866	i = txr->next_avail_desc;
1867
1868	/* Set up our transmit descriptors */
1869	for (j = 0; j < nsegs; j++) {
1870		bus_size_t seg_len;
1871		bus_addr_t seg_addr;
1872
1873		tx_buffer = &txr->tx_buffers[i];
1874		ctxd = &txr->tx_base[i];
1875		seg_addr = segs[j].ds_addr;
1876		seg_len  = segs[j].ds_len;
1877		/*
1878		** TSO Workaround:
1879		** If this is the last descriptor, we want to
1880		** split it so we have a small final sentinel
1881		*/
1882		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1883			seg_len -= 4;
1884			ctxd->buffer_addr = htole64(seg_addr);
1885			ctxd->lower.data = htole32(
1886			adapter->txd_cmd | txd_lower | seg_len);
1887			ctxd->upper.data =
1888			    htole32(txd_upper);
1889			if (++i == adapter->num_tx_desc)
1890				i = 0;
1891			/* Now make the sentinel */
1892			++txd_used; /* using an extra txd */
1893			ctxd = &txr->tx_base[i];
1894			tx_buffer = &txr->tx_buffers[i];
1895			ctxd->buffer_addr =
1896			    htole64(seg_addr + seg_len);
1897			ctxd->lower.data = htole32(
1898			adapter->txd_cmd | txd_lower | 4);
1899			ctxd->upper.data =
1900			    htole32(txd_upper);
1901			last = i;
1902			if (++i == adapter->num_tx_desc)
1903				i = 0;
1904		} else {
1905			ctxd->buffer_addr = htole64(seg_addr);
1906			ctxd->lower.data = htole32(
1907			adapter->txd_cmd | txd_lower | seg_len);
1908			ctxd->upper.data =
1909			    htole32(txd_upper);
1910			last = i;
1911			if (++i == adapter->num_tx_desc)
1912				i = 0;
1913		}
1914		tx_buffer->m_head = NULL;
1915		tx_buffer->next_eop = -1;
1916	}
1917
1918	txr->next_avail_desc = i;
1919	txr->tx_avail -= nsegs;
1920	if (tso_desc) /* TSO used an extra for sentinel */
1921		txr->tx_avail -= txd_used;
1922
1923	if (m_head->m_flags & M_VLANTAG) {
1924		/* Set the vlan id. */
1925		ctxd->upper.fields.special =
1926		    htole16(m_head->m_pkthdr.ether_vtag);
1927                /* Tell hardware to add tag */
1928                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1929        }
1930
1931        tx_buffer->m_head = m_head;
1932	tx_buffer_mapped->map = tx_buffer->map;
1933	tx_buffer->map = map;
1934        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1935
1936        /*
1937         * Last Descriptor of Packet
1938	 * needs End Of Packet (EOP)
1939	 * and Report Status (RS)
1940         */
1941        ctxd->lower.data |=
1942	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1943	/*
1944	 * Keep track in the first buffer which
1945	 * descriptor will be written back
1946	 */
1947	tx_buffer = &txr->tx_buffers[first];
1948	tx_buffer->next_eop = last;
1949
1950	/*
1951	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1952	 * that this frame is available to transmit.
1953	 */
1954	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1955	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1956	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1957
1958	return (0);
1959}
1960
1961static void
1962em_set_promisc(struct adapter *adapter)
1963{
1964	struct ifnet	*ifp = adapter->ifp;
1965	u32		reg_rctl;
1966
1967	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1968
1969	if (ifp->if_flags & IFF_PROMISC) {
1970		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1971		/* Turn this on if you want to see bad packets */
1972		if (em_debug_sbp)
1973			reg_rctl |= E1000_RCTL_SBP;
1974		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1975	} else if (ifp->if_flags & IFF_ALLMULTI) {
1976		reg_rctl |= E1000_RCTL_MPE;
1977		reg_rctl &= ~E1000_RCTL_UPE;
1978		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1979	}
1980}
1981
1982static void
1983em_disable_promisc(struct adapter *adapter)
1984{
1985	u32	reg_rctl;
1986
1987	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1988
1989	reg_rctl &=  (~E1000_RCTL_UPE);
1990	reg_rctl &=  (~E1000_RCTL_MPE);
1991	reg_rctl &=  (~E1000_RCTL_SBP);
1992	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1993}
1994
1995
1996/*********************************************************************
1997 *  Multicast Update
1998 *
1999 *  This routine is called whenever multicast address list is updated.
2000 *
2001 **********************************************************************/
2002
2003static void
2004em_set_multi(struct adapter *adapter)
2005{
2006	struct ifnet	*ifp = adapter->ifp;
2007	struct ifmultiaddr *ifma;
2008	u32 reg_rctl = 0;
2009	u8  *mta; /* Multicast array memory */
2010	int mcnt = 0;
2011
2012	IOCTL_DEBUGOUT("em_set_multi: begin");
2013
2014	mta = adapter->mta;
2015	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2016
2017	if (adapter->hw.mac.type == e1000_82542 &&
2018	    adapter->hw.revision_id == E1000_REVISION_2) {
2019		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2020		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2021			e1000_pci_clear_mwi(&adapter->hw);
2022		reg_rctl |= E1000_RCTL_RST;
2023		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2024		msec_delay(5);
2025	}
2026
2027#if __FreeBSD_version < 800000
2028	IF_ADDR_LOCK(ifp);
2029#else
2030	if_maddr_rlock(ifp);
2031#endif
2032	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2033		if (ifma->ifma_addr->sa_family != AF_LINK)
2034			continue;
2035
2036		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2037			break;
2038
2039		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2040		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2041		mcnt++;
2042	}
2043#if __FreeBSD_version < 800000
2044	IF_ADDR_UNLOCK(ifp);
2045#else
2046	if_maddr_runlock(ifp);
2047#endif
2048	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2049		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2050		reg_rctl |= E1000_RCTL_MPE;
2051		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2052	} else
2053		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2054
2055	if (adapter->hw.mac.type == e1000_82542 &&
2056	    adapter->hw.revision_id == E1000_REVISION_2) {
2057		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2058		reg_rctl &= ~E1000_RCTL_RST;
2059		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2060		msec_delay(5);
2061		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2062			e1000_pci_set_mwi(&adapter->hw);
2063	}
2064}
2065
2066
2067/*********************************************************************
2068 *  Timer routine
2069 *
2070 *  This routine checks for link status and updates statistics.
2071 *
2072 **********************************************************************/
2073
2074static void
2075em_local_timer(void *arg)
2076{
2077	struct adapter	*adapter = arg;
2078	struct ifnet	*ifp = adapter->ifp;
2079	struct tx_ring	*txr = adapter->tx_rings;
2080
2081	EM_CORE_LOCK_ASSERT(adapter);
2082
2083	em_update_link_status(adapter);
2084	em_update_stats_counters(adapter);
2085
2086	/* Reset LAA into RAR[0] on 82571 */
2087	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2088		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2089
2090	/*
2091	** Check for time since any descriptor was cleaned
2092	*/
2093	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2094		EM_TX_LOCK(txr);
2095		if (txr->watchdog_check == FALSE) {
2096			EM_TX_UNLOCK(txr);
2097			continue;
2098		}
2099		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2100			goto hung;
2101		EM_TX_UNLOCK(txr);
2102	}
2103
2104	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2105	return;
2106hung:
2107	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2108	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2109	adapter->watchdog_events++;
2110	EM_TX_UNLOCK(txr);
2111	em_init_locked(adapter);
2112}
2113
2114
2115static void
2116em_update_link_status(struct adapter *adapter)
2117{
2118	struct e1000_hw *hw = &adapter->hw;
2119	struct ifnet *ifp = adapter->ifp;
2120	device_t dev = adapter->dev;
2121	u32 link_check = 0;
2122
2123	/* Get the cached link value or read phy for real */
2124	switch (hw->phy.media_type) {
2125	case e1000_media_type_copper:
2126		if (hw->mac.get_link_status) {
2127			/* Do the work to read phy */
2128			e1000_check_for_link(hw);
2129			link_check = !hw->mac.get_link_status;
2130			if (link_check) /* ESB2 fix */
2131				e1000_cfg_on_link_up(hw);
2132		} else
2133			link_check = TRUE;
2134		break;
2135	case e1000_media_type_fiber:
2136		e1000_check_for_link(hw);
2137		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2138                                 E1000_STATUS_LU);
2139		break;
2140	case e1000_media_type_internal_serdes:
2141		e1000_check_for_link(hw);
2142		link_check = adapter->hw.mac.serdes_has_link;
2143		break;
2144	default:
2145	case e1000_media_type_unknown:
2146		break;
2147	}
2148
2149	/* Now check for a transition */
2150	if (link_check && (adapter->link_active == 0)) {
2151		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2152		    &adapter->link_duplex);
2153		/* Check if we must disable SPEED_MODE bit on PCI-E */
2154		if ((adapter->link_speed != SPEED_1000) &&
2155		    ((hw->mac.type == e1000_82571) ||
2156		    (hw->mac.type == e1000_82572))) {
2157			int tarc0;
2158			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2159			tarc0 &= ~SPEED_MODE_BIT;
2160			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2161		}
2162		if (bootverbose)
2163			device_printf(dev, "Link is up %d Mbps %s\n",
2164			    adapter->link_speed,
2165			    ((adapter->link_duplex == FULL_DUPLEX) ?
2166			    "Full Duplex" : "Half Duplex"));
2167		adapter->link_active = 1;
2168		adapter->smartspeed = 0;
2169		ifp->if_baudrate = adapter->link_speed * 1000000;
2170		if_link_state_change(ifp, LINK_STATE_UP);
2171	} else if (!link_check && (adapter->link_active == 1)) {
2172		ifp->if_baudrate = adapter->link_speed = 0;
2173		adapter->link_duplex = 0;
2174		if (bootverbose)
2175			device_printf(dev, "Link is Down\n");
2176		adapter->link_active = 0;
2177		/* Link down, disable watchdog */
2178		// JFV change later
2179		//adapter->watchdog_check = FALSE;
2180		if_link_state_change(ifp, LINK_STATE_DOWN);
2181	}
2182}
2183
2184/*********************************************************************
2185 *
2186 *  This routine disables all traffic on the adapter by issuing a
2187 *  global reset on the MAC and deallocates TX/RX buffers.
2188 *
2189 *  This routine should always be called with BOTH the CORE
2190 *  and TX locks.
2191 **********************************************************************/
2192
2193static void
2194em_stop(void *arg)
2195{
2196	struct adapter	*adapter = arg;
2197	struct ifnet	*ifp = adapter->ifp;
2198	struct tx_ring	*txr = adapter->tx_rings;
2199
2200	EM_CORE_LOCK_ASSERT(adapter);
2201
2202	INIT_DEBUGOUT("em_stop: begin");
2203
2204	em_disable_intr(adapter);
2205	callout_stop(&adapter->timer);
2206
2207	/* Tell the stack that the interface is no longer active */
2208	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2209
2210        /* Unarm watchdog timer. */
2211	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2212		EM_TX_LOCK(txr);
2213		txr->watchdog_check = FALSE;
2214		EM_TX_UNLOCK(txr);
2215	}
2216
2217	e1000_reset_hw(&adapter->hw);
2218	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2219
2220	e1000_led_off(&adapter->hw);
2221	e1000_cleanup_led(&adapter->hw);
2222}
2223
2224
2225/*********************************************************************
2226 *
2227 *  Determine hardware revision.
2228 *
2229 **********************************************************************/
2230static void
2231em_identify_hardware(struct adapter *adapter)
2232{
2233	device_t dev = adapter->dev;
2234
2235	/* Make sure our PCI config space has the necessary stuff set */
2236	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2237	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2238	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2239		device_printf(dev, "Memory Access and/or Bus Master bits "
2240		    "were not set!\n");
2241		adapter->hw.bus.pci_cmd_word |=
2242		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2243		pci_write_config(dev, PCIR_COMMAND,
2244		    adapter->hw.bus.pci_cmd_word, 2);
2245	}
2246
2247	/* Save off the information about this board */
2248	adapter->hw.vendor_id = pci_get_vendor(dev);
2249	adapter->hw.device_id = pci_get_device(dev);
2250	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2251	adapter->hw.subsystem_vendor_id =
2252	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2253	adapter->hw.subsystem_device_id =
2254	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2255
2256	/* Do Shared Code Init and Setup */
2257	if (e1000_set_mac_type(&adapter->hw)) {
2258		device_printf(dev, "Setup init failure\n");
2259		return;
2260	}
2261}
2262
2263static int
2264em_allocate_pci_resources(struct adapter *adapter)
2265{
2266	device_t	dev = adapter->dev;
2267	int		rid;
2268
2269	rid = PCIR_BAR(0);
2270	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2271	    &rid, RF_ACTIVE);
2272	if (adapter->memory == NULL) {
2273		device_printf(dev, "Unable to allocate bus resource: memory\n");
2274		return (ENXIO);
2275	}
2276	adapter->osdep.mem_bus_space_tag =
2277	    rman_get_bustag(adapter->memory);
2278	adapter->osdep.mem_bus_space_handle =
2279	    rman_get_bushandle(adapter->memory);
2280	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2281
2282	/* Default to a single queue */
2283	adapter->num_queues = 1;
2284
2285	/*
2286	 * Setup MSI/X or MSI if PCI Express
2287	 */
2288	adapter->msix = em_setup_msix(adapter);
2289
2290	adapter->hw.back = &adapter->osdep;
2291
2292	return (0);
2293}
2294
2295/*********************************************************************
2296 *
2297 *  Setup the Legacy or MSI Interrupt handler
2298 *
2299 **********************************************************************/
2300int
2301em_allocate_legacy(struct adapter *adapter)
2302{
2303	device_t dev = adapter->dev;
2304	int error, rid = 0;
2305
2306	/* Manually turn off all interrupts */
2307	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2308
2309	if (adapter->msix == 1) /* using MSI */
2310		rid = 1;
2311	/* We allocate a single interrupt resource */
2312	adapter->res = bus_alloc_resource_any(dev,
2313	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2314	if (adapter->res == NULL) {
2315		device_printf(dev, "Unable to allocate bus resource: "
2316		    "interrupt\n");
2317		return (ENXIO);
2318	}
2319
2320	/*
2321	 * Allocate a fast interrupt and the associated
2322	 * deferred processing contexts.
2323	 */
2324	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2325	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2326	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2327	    taskqueue_thread_enqueue, &adapter->tq);
2328	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2329	    device_get_nameunit(adapter->dev));
2330	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2331	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2332		device_printf(dev, "Failed to register fast interrupt "
2333			    "handler: %d\n", error);
2334		taskqueue_free(adapter->tq);
2335		adapter->tq = NULL;
2336		return (error);
2337	}
2338
2339	return (0);
2340}
2341
2342/*********************************************************************
2343 *
2344 *  Setup the MSIX Interrupt handlers
2345 *   This is not really Multiqueue, rather
2346 *   its just multiple interrupt vectors.
2347 *
2348 **********************************************************************/
2349int
2350em_allocate_msix(struct adapter *adapter)
2351{
2352	device_t	dev = adapter->dev;
2353	struct		tx_ring *txr = adapter->tx_rings;
2354	struct		rx_ring *rxr = adapter->rx_rings;
2355	int		error, rid, vector = 0;
2356
2357
2358	/* Make sure all interrupts are disabled */
2359	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2360
2361	/* First set up ring resources */
2362	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2363
2364		/* RX ring */
2365		rid = vector + 1;
2366
2367		rxr->res = bus_alloc_resource_any(dev,
2368		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2369		if (rxr->res == NULL) {
2370			device_printf(dev,
2371			    "Unable to allocate bus resource: "
2372			    "RX MSIX Interrupt %d\n", i);
2373			return (ENXIO);
2374		}
2375		if ((error = bus_setup_intr(dev, rxr->res,
2376		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2377		    rxr, &rxr->tag)) != 0) {
2378			device_printf(dev, "Failed to register RX handler");
2379			return (error);
2380		}
2381		rxr->msix = vector++; /* NOTE increment vector for TX */
2382		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2383		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2384		    taskqueue_thread_enqueue, &rxr->tq);
2385		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2386		    device_get_nameunit(adapter->dev));
2387		/*
2388		** Set the bit to enable interrupt
2389		** in E1000_IMS -- bits 20 and 21
2390		** are for RX0 and RX1, note this has
2391		** NOTHING to do with the MSIX vector
2392		*/
2393		rxr->ims = 1 << (20 + i);
2394		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2395
2396		/* TX ring */
2397		rid = vector + 1;
2398		txr->res = bus_alloc_resource_any(dev,
2399		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2400		if (txr->res == NULL) {
2401			device_printf(dev,
2402			    "Unable to allocate bus resource: "
2403			    "TX MSIX Interrupt %d\n", i);
2404			return (ENXIO);
2405		}
2406		if ((error = bus_setup_intr(dev, txr->res,
2407		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2408		    txr, &txr->tag)) != 0) {
2409			device_printf(dev, "Failed to register TX handler");
2410			return (error);
2411		}
2412		txr->msix = vector++; /* Increment vector for next pass */
2413		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2414		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2415		    taskqueue_thread_enqueue, &txr->tq);
2416		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2417		    device_get_nameunit(adapter->dev));
2418		/*
2419		** Set the bit to enable interrupt
2420		** in E1000_IMS -- bits 22 and 23
2421		** are for TX0 and TX1, note this has
2422		** NOTHING to do with the MSIX vector
2423		*/
2424		txr->ims = 1 << (22 + i);
2425		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2426	}
2427
2428	/* Link interrupt */
2429	++rid;
2430	adapter->res = bus_alloc_resource_any(dev,
2431	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2432	if (!adapter->res) {
2433		device_printf(dev,"Unable to allocate "
2434		    "bus resource: Link interrupt [%d]\n", rid);
2435		return (ENXIO);
2436        }
2437	/* Set the link handler function */
2438	error = bus_setup_intr(dev, adapter->res,
2439	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2440	    em_msix_link, adapter, &adapter->tag);
2441	if (error) {
2442		adapter->res = NULL;
2443		device_printf(dev, "Failed to register LINK handler");
2444		return (error);
2445	}
2446	adapter->linkvec = vector;
2447	adapter->ivars |=  (8 | vector) << 16;
2448	adapter->ivars |= 0x80000000;
2449
2450	return (0);
2451}
2452
2453
2454static void
2455em_free_pci_resources(struct adapter *adapter)
2456{
2457	device_t	dev = adapter->dev;
2458	struct tx_ring	*txr;
2459	struct rx_ring	*rxr;
2460	int		rid;
2461
2462
2463	/*
2464	** Release all the queue interrupt resources:
2465	*/
2466	for (int i = 0; i < adapter->num_queues; i++) {
2467		txr = &adapter->tx_rings[i];
2468		rxr = &adapter->rx_rings[i];
2469		rid = txr->msix +1;
2470		if (txr->tag != NULL) {
2471			bus_teardown_intr(dev, txr->res, txr->tag);
2472			txr->tag = NULL;
2473		}
2474		if (txr->res != NULL)
2475			bus_release_resource(dev, SYS_RES_IRQ,
2476			    rid, txr->res);
2477		rid = rxr->msix +1;
2478		if (rxr->tag != NULL) {
2479			bus_teardown_intr(dev, rxr->res, rxr->tag);
2480			rxr->tag = NULL;
2481		}
2482		if (rxr->res != NULL)
2483			bus_release_resource(dev, SYS_RES_IRQ,
2484			    rid, rxr->res);
2485	}
2486
2487        if (adapter->linkvec) /* we are doing MSIX */
2488                rid = adapter->linkvec + 1;
2489        else
2490                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2491
2492	if (adapter->tag != NULL) {
2493		bus_teardown_intr(dev, adapter->res, adapter->tag);
2494		adapter->tag = NULL;
2495	}
2496
2497	if (adapter->res != NULL)
2498		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2499
2500
2501	if (adapter->msix)
2502		pci_release_msi(dev);
2503
2504	if (adapter->msix_mem != NULL)
2505		bus_release_resource(dev, SYS_RES_MEMORY,
2506		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2507
2508	if (adapter->memory != NULL)
2509		bus_release_resource(dev, SYS_RES_MEMORY,
2510		    PCIR_BAR(0), adapter->memory);
2511
2512	if (adapter->flash != NULL)
2513		bus_release_resource(dev, SYS_RES_MEMORY,
2514		    EM_FLASH, adapter->flash);
2515}
2516
2517/*
2518 * Setup MSI or MSI/X
2519 */
2520static int
2521em_setup_msix(struct adapter *adapter)
2522{
2523	device_t dev = adapter->dev;
2524	int val = 0;
2525
2526
2527	/* Setup MSI/X for Hartwell */
2528	if ((adapter->hw.mac.type == e1000_82574) &&
2529	    (em_enable_msix == TRUE)) {
2530		/* Map the MSIX BAR */
2531		int rid = PCIR_BAR(EM_MSIX_BAR);
2532		adapter->msix_mem = bus_alloc_resource_any(dev,
2533		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2534       		if (!adapter->msix_mem) {
2535			/* May not be enabled */
2536               		device_printf(adapter->dev,
2537			    "Unable to map MSIX table \n");
2538			goto msi;
2539       		}
2540		val = pci_msix_count(dev);
2541		if (val != 5) {
2542			bus_release_resource(dev, SYS_RES_MEMORY,
2543			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2544			adapter->msix_mem = NULL;
2545               		device_printf(adapter->dev,
2546			    "MSIX vectors wrong, using MSI \n");
2547			goto msi;
2548		}
2549		if (em_msix_queues == 2) {
2550			val = 5;
2551			adapter->num_queues = 2;
2552		} else {
2553			val = 3;
2554			adapter->num_queues = 1;
2555		}
2556		if (pci_alloc_msix(dev, &val) == 0) {
2557			device_printf(adapter->dev,
2558			    "Using MSIX interrupts "
2559			    "with %d vectors\n", val);
2560		}
2561
2562		return (val);
2563	}
2564msi:
2565       	val = pci_msi_count(dev);
2566       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2567               	adapter->msix = 1;
2568               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2569		return (val);
2570	}
2571	/* Should only happen due to manual configuration */
2572	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2573	return (0);
2574}
2575
2576
2577/*********************************************************************
2578 *
2579 *  Initialize the hardware to a configuration
2580 *  as specified by the adapter structure.
2581 *
2582 **********************************************************************/
2583static void
2584em_reset(struct adapter *adapter)
2585{
2586	device_t	dev = adapter->dev;
2587	struct e1000_hw	*hw = &adapter->hw;
2588	u16		rx_buffer_size;
2589
2590	INIT_DEBUGOUT("em_reset: begin");
2591
2592	/* Set up smart power down as default off on newer adapters. */
2593	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2594	    hw->mac.type == e1000_82572)) {
2595		u16 phy_tmp = 0;
2596
2597		/* Speed up time to link by disabling smart power down. */
2598		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2599		phy_tmp &= ~IGP02E1000_PM_SPD;
2600		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2601	}
2602
2603	/*
2604	 * These parameters control the automatic generation (Tx) and
2605	 * response (Rx) to Ethernet PAUSE frames.
2606	 * - High water mark should allow for at least two frames to be
2607	 *   received after sending an XOFF.
2608	 * - Low water mark works best when it is very near the high water mark.
2609	 *   This allows the receiver to restart by sending XON when it has
2610	 *   drained a bit. Here we use an arbitary value of 1500 which will
2611	 *   restart after one full frame is pulled from the buffer. There
2612	 *   could be several smaller frames in the buffer and if so they will
2613	 *   not trigger the XON until their total number reduces the buffer
2614	 *   by 1500.
2615	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2616	 */
2617	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2618
2619	hw->fc.high_water = rx_buffer_size -
2620	    roundup2(adapter->max_frame_size, 1024);
2621	hw->fc.low_water = hw->fc.high_water - 1500;
2622
2623	if (hw->mac.type == e1000_80003es2lan)
2624		hw->fc.pause_time = 0xFFFF;
2625	else
2626		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2627
2628	hw->fc.send_xon = TRUE;
2629
2630        /* Set Flow control, use the tunable location if sane */
2631        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2632		hw->fc.requested_mode = em_fc_setting;
2633	else
2634		hw->fc.requested_mode = e1000_fc_none;
2635
2636	/* Override - workaround for PCHLAN issue */
2637	if (hw->mac.type == e1000_pchlan)
2638                hw->fc.requested_mode = e1000_fc_rx_pause;
2639
2640	/* Issue a global reset */
2641	e1000_reset_hw(hw);
2642	E1000_WRITE_REG(hw, E1000_WUC, 0);
2643
2644	if (e1000_init_hw(hw) < 0) {
2645		device_printf(dev, "Hardware Initialization Failed\n");
2646		return;
2647	}
2648
2649	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2650	e1000_get_phy_info(hw);
2651	e1000_check_for_link(hw);
2652	return;
2653}
2654
2655/*********************************************************************
2656 *
2657 *  Setup networking device structure and register an interface.
2658 *
2659 **********************************************************************/
2660static int
2661em_setup_interface(device_t dev, struct adapter *adapter)
2662{
2663	struct ifnet   *ifp;
2664
2665	INIT_DEBUGOUT("em_setup_interface: begin");
2666
2667	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2668	if (ifp == NULL) {
2669		device_printf(dev, "can not allocate ifnet structure\n");
2670		return (-1);
2671	}
2672	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2673	ifp->if_mtu = ETHERMTU;
2674	ifp->if_init =  em_init;
2675	ifp->if_softc = adapter;
2676	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2677	ifp->if_ioctl = em_ioctl;
2678	ifp->if_start = em_start;
2679	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681	IFQ_SET_READY(&ifp->if_snd);
2682
2683	ether_ifattach(ifp, adapter->hw.mac.addr);
2684
2685	ifp->if_capabilities = ifp->if_capenable = 0;
2686
2687#ifdef EM_MULTIQUEUE
2688	/* Multiqueue tx functions */
2689	ifp->if_transmit = em_mq_start;
2690	ifp->if_qflush = em_qflush;
2691#endif
2692
2693	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2694	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2695
2696	/* Enable TSO by default, can disable with ifconfig */
2697	ifp->if_capabilities |= IFCAP_TSO4;
2698	ifp->if_capenable |= IFCAP_TSO4;
2699
2700	/*
2701	 * Tell the upper layer(s) we
2702	 * support full VLAN capability
2703	 */
2704	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2705	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2706	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2707
2708	/*
2709	** Dont turn this on by default, if vlans are
2710	** created on another pseudo device (eg. lagg)
2711	** then vlan events are not passed thru, breaking
2712	** operation, but with HW FILTER off it works. If
2713	** using vlans directly on the em driver you can
2714	** enable this and get full hardware tag filtering.
2715	*/
2716	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2717
2718#ifdef DEVICE_POLLING
2719	ifp->if_capabilities |= IFCAP_POLLING;
2720#endif
2721
2722	/* Enable only WOL MAGIC by default */
2723	if (adapter->wol) {
2724		ifp->if_capabilities |= IFCAP_WOL;
2725		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2726	}
2727
2728	/*
2729	 * Specify the media types supported by this adapter and register
2730	 * callbacks to update media and link information
2731	 */
2732	ifmedia_init(&adapter->media, IFM_IMASK,
2733	    em_media_change, em_media_status);
2734	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2735	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2736		u_char fiber_type = IFM_1000_SX;	/* default type */
2737
2738		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2739			    0, NULL);
2740		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2741	} else {
2742		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2743		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2744			    0, NULL);
2745		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2746			    0, NULL);
2747		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2748			    0, NULL);
2749		if (adapter->hw.phy.type != e1000_phy_ife) {
2750			ifmedia_add(&adapter->media,
2751				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2752			ifmedia_add(&adapter->media,
2753				IFM_ETHER | IFM_1000_T, 0, NULL);
2754		}
2755	}
2756	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2757	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2758	return (0);
2759}
2760
2761
2762/*
2763 * Manage DMA'able memory.
2764 */
2765static void
2766em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2767{
2768	if (error)
2769		return;
2770	*(bus_addr_t *) arg = segs[0].ds_addr;
2771}
2772
2773static int
2774em_dma_malloc(struct adapter *adapter, bus_size_t size,
2775        struct em_dma_alloc *dma, int mapflags)
2776{
2777	int error;
2778
2779	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2780				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2781				BUS_SPACE_MAXADDR,	/* lowaddr */
2782				BUS_SPACE_MAXADDR,	/* highaddr */
2783				NULL, NULL,		/* filter, filterarg */
2784				size,			/* maxsize */
2785				1,			/* nsegments */
2786				size,			/* maxsegsize */
2787				0,			/* flags */
2788				NULL,			/* lockfunc */
2789				NULL,			/* lockarg */
2790				&dma->dma_tag);
2791	if (error) {
2792		device_printf(adapter->dev,
2793		    "%s: bus_dma_tag_create failed: %d\n",
2794		    __func__, error);
2795		goto fail_0;
2796	}
2797
2798	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2799	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2800	if (error) {
2801		device_printf(adapter->dev,
2802		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2803		    __func__, (uintmax_t)size, error);
2804		goto fail_2;
2805	}
2806
2807	dma->dma_paddr = 0;
2808	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2809	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2810	if (error || dma->dma_paddr == 0) {
2811		device_printf(adapter->dev,
2812		    "%s: bus_dmamap_load failed: %d\n",
2813		    __func__, error);
2814		goto fail_3;
2815	}
2816
2817	return (0);
2818
2819fail_3:
2820	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2821fail_2:
2822	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2823	bus_dma_tag_destroy(dma->dma_tag);
2824fail_0:
2825	dma->dma_map = NULL;
2826	dma->dma_tag = NULL;
2827
2828	return (error);
2829}
2830
2831static void
2832em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2833{
2834	if (dma->dma_tag == NULL)
2835		return;
2836	if (dma->dma_map != NULL) {
2837		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2838		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2839		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2840		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2841		dma->dma_map = NULL;
2842	}
2843	bus_dma_tag_destroy(dma->dma_tag);
2844	dma->dma_tag = NULL;
2845}
2846
2847
2848/*********************************************************************
2849 *
2850 *  Allocate memory for the transmit and receive rings, and then
2851 *  the descriptors associated with each, called only once at attach.
2852 *
2853 **********************************************************************/
2854static int
2855em_allocate_queues(struct adapter *adapter)
2856{
2857	device_t		dev = adapter->dev;
2858	struct tx_ring		*txr = NULL;
2859	struct rx_ring		*rxr = NULL;
2860	int rsize, tsize, error = E1000_SUCCESS;
2861	int txconf = 0, rxconf = 0;
2862
2863
2864	/* Allocate the TX ring struct memory */
2865	if (!(adapter->tx_rings =
2866	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2867	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2868		device_printf(dev, "Unable to allocate TX ring memory\n");
2869		error = ENOMEM;
2870		goto fail;
2871	}
2872
2873	/* Now allocate the RX */
2874	if (!(adapter->rx_rings =
2875	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2876	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2877		device_printf(dev, "Unable to allocate RX ring memory\n");
2878		error = ENOMEM;
2879		goto rx_fail;
2880	}
2881
2882	tsize = roundup2(adapter->num_tx_desc *
2883	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2884	/*
2885	 * Now set up the TX queues, txconf is needed to handle the
2886	 * possibility that things fail midcourse and we need to
2887	 * undo memory gracefully
2888	 */
2889	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2890		/* Set up some basics */
2891		txr = &adapter->tx_rings[i];
2892		txr->adapter = adapter;
2893		txr->me = i;
2894
2895		/* Initialize the TX lock */
2896		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2897		    device_get_nameunit(dev), txr->me);
2898		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2899
2900		if (em_dma_malloc(adapter, tsize,
2901			&txr->txdma, BUS_DMA_NOWAIT)) {
2902			device_printf(dev,
2903			    "Unable to allocate TX Descriptor memory\n");
2904			error = ENOMEM;
2905			goto err_tx_desc;
2906		}
2907		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2908		bzero((void *)txr->tx_base, tsize);
2909
2910        	if (em_allocate_transmit_buffers(txr)) {
2911			device_printf(dev,
2912			    "Critical Failure setting up transmit buffers\n");
2913			error = ENOMEM;
2914			goto err_tx_desc;
2915        	}
2916#if __FreeBSD_version >= 800000
2917		/* Allocate a buf ring */
2918		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2919		    M_WAITOK, &txr->tx_mtx);
2920#endif
2921	}
2922
2923	/*
2924	 * Next the RX queues...
2925	 */
2926	rsize = roundup2(adapter->num_rx_desc *
2927	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2928	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2929		rxr = &adapter->rx_rings[i];
2930		rxr->adapter = adapter;
2931		rxr->me = i;
2932
2933		/* Initialize the RX lock */
2934		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2935		    device_get_nameunit(dev), txr->me);
2936		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2937
2938		if (em_dma_malloc(adapter, rsize,
2939			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2940			device_printf(dev,
2941			    "Unable to allocate RxDescriptor memory\n");
2942			error = ENOMEM;
2943			goto err_rx_desc;
2944		}
2945		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2946		bzero((void *)rxr->rx_base, rsize);
2947
2948        	/* Allocate receive buffers for the ring*/
2949		if (em_allocate_receive_buffers(rxr)) {
2950			device_printf(dev,
2951			    "Critical Failure setting up receive buffers\n");
2952			error = ENOMEM;
2953			goto err_rx_desc;
2954		}
2955	}
2956
2957	return (0);
2958
2959err_rx_desc:
2960	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2961		em_dma_free(adapter, &rxr->rxdma);
2962err_tx_desc:
2963	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2964		em_dma_free(adapter, &txr->txdma);
2965	free(adapter->rx_rings, M_DEVBUF);
2966rx_fail:
2967#if __FreeBSD_version >= 800000
2968	buf_ring_free(txr->br, M_DEVBUF);
2969#endif
2970	free(adapter->tx_rings, M_DEVBUF);
2971fail:
2972	return (error);
2973}
2974
2975
2976/*********************************************************************
2977 *
2978 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2979 *  the information needed to transmit a packet on the wire. This is
2980 *  called only once at attach, setup is done every reset.
2981 *
2982 **********************************************************************/
2983static int
2984em_allocate_transmit_buffers(struct tx_ring *txr)
2985{
2986	struct adapter *adapter = txr->adapter;
2987	device_t dev = adapter->dev;
2988	struct em_buffer *txbuf;
2989	int error, i;
2990
2991	/*
2992	 * Setup DMA descriptor areas.
2993	 */
2994	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2995			       1, 0,			/* alignment, bounds */
2996			       BUS_SPACE_MAXADDR,	/* lowaddr */
2997			       BUS_SPACE_MAXADDR,	/* highaddr */
2998			       NULL, NULL,		/* filter, filterarg */
2999			       EM_TSO_SIZE,		/* maxsize */
3000			       EM_MAX_SCATTER,		/* nsegments */
3001			       PAGE_SIZE,		/* maxsegsize */
3002			       0,			/* flags */
3003			       NULL,			/* lockfunc */
3004			       NULL,			/* lockfuncarg */
3005			       &txr->txtag))) {
3006		device_printf(dev,"Unable to allocate TX DMA tag\n");
3007		goto fail;
3008	}
3009
3010	if (!(txr->tx_buffers =
3011	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3012	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3013		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3014		error = ENOMEM;
3015		goto fail;
3016	}
3017
3018        /* Create the descriptor buffer dma maps */
3019	txbuf = txr->tx_buffers;
3020	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3021		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3022		if (error != 0) {
3023			device_printf(dev, "Unable to create TX DMA map\n");
3024			goto fail;
3025		}
3026	}
3027
3028	return 0;
3029fail:
3030	/* We free all, it handles case where we are in the middle */
3031	em_free_transmit_structures(adapter);
3032	return (error);
3033}
3034
3035/*********************************************************************
3036 *
3037 *  Initialize a transmit ring.
3038 *
3039 **********************************************************************/
3040static void
3041em_setup_transmit_ring(struct tx_ring *txr)
3042{
3043	struct adapter *adapter = txr->adapter;
3044	struct em_buffer *txbuf;
3045	int i;
3046
3047	/* Clear the old descriptor contents */
3048	EM_TX_LOCK(txr);
3049	bzero((void *)txr->tx_base,
3050	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3051	/* Reset indices */
3052	txr->next_avail_desc = 0;
3053	txr->next_to_clean = 0;
3054
3055	/* Free any existing tx buffers. */
3056        txbuf = txr->tx_buffers;
3057	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3058		if (txbuf->m_head != NULL) {
3059			bus_dmamap_sync(txr->txtag, txbuf->map,
3060			    BUS_DMASYNC_POSTWRITE);
3061			bus_dmamap_unload(txr->txtag, txbuf->map);
3062			m_freem(txbuf->m_head);
3063			txbuf->m_head = NULL;
3064		}
3065		/* clear the watch index */
3066		txbuf->next_eop = -1;
3067        }
3068
3069	/* Set number of descriptors available */
3070	txr->tx_avail = adapter->num_tx_desc;
3071
3072	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3073	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3074	EM_TX_UNLOCK(txr);
3075}
3076
3077/*********************************************************************
3078 *
3079 *  Initialize all transmit rings.
3080 *
3081 **********************************************************************/
3082static void
3083em_setup_transmit_structures(struct adapter *adapter)
3084{
3085	struct tx_ring *txr = adapter->tx_rings;
3086
3087	for (int i = 0; i < adapter->num_queues; i++, txr++)
3088		em_setup_transmit_ring(txr);
3089
3090	return;
3091}
3092
3093/*********************************************************************
3094 *
3095 *  Enable transmit unit.
3096 *
3097 **********************************************************************/
3098static void
3099em_initialize_transmit_unit(struct adapter *adapter)
3100{
3101	struct tx_ring	*txr = adapter->tx_rings;
3102	struct e1000_hw	*hw = &adapter->hw;
3103	u32	tctl, tarc, tipg = 0;
3104
3105	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3106
3107	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3108		u64 bus_addr = txr->txdma.dma_paddr;
3109		/* Base and Len of TX Ring */
3110		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3111	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3112		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3113	    	    (u32)(bus_addr >> 32));
3114		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3115	    	    (u32)bus_addr);
3116		/* Init the HEAD/TAIL indices */
3117		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3118		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3119
3120		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3121		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3122		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3123
3124		txr->watchdog_check = FALSE;
3125	}
3126
3127	/* Set the default values for the Tx Inter Packet Gap timer */
3128	switch (adapter->hw.mac.type) {
3129	case e1000_82542:
3130		tipg = DEFAULT_82542_TIPG_IPGT;
3131		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3132		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3133		break;
3134	case e1000_80003es2lan:
3135		tipg = DEFAULT_82543_TIPG_IPGR1;
3136		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3137		    E1000_TIPG_IPGR2_SHIFT;
3138		break;
3139	default:
3140		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3141		    (adapter->hw.phy.media_type ==
3142		    e1000_media_type_internal_serdes))
3143			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3144		else
3145			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3146		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3147		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3148	}
3149
3150	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3151	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3152
3153	if(adapter->hw.mac.type >= e1000_82540)
3154		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3155		    adapter->tx_abs_int_delay.value);
3156
3157	if ((adapter->hw.mac.type == e1000_82571) ||
3158	    (adapter->hw.mac.type == e1000_82572)) {
3159		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3160		tarc |= SPEED_MODE_BIT;
3161		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3162	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3163		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3164		tarc |= 1;
3165		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3166		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3167		tarc |= 1;
3168		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3169	}
3170
3171	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3172	if (adapter->tx_int_delay.value > 0)
3173		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3174
3175	/* Program the Transmit Control Register */
3176	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3177	tctl &= ~E1000_TCTL_CT;
3178	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3179		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3180
3181	if (adapter->hw.mac.type >= e1000_82571)
3182		tctl |= E1000_TCTL_MULR;
3183
3184	/* This write will effectively turn on the transmit unit. */
3185	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3186
3187}
3188
3189
3190/*********************************************************************
3191 *
3192 *  Free all transmit rings.
3193 *
3194 **********************************************************************/
3195static void
3196em_free_transmit_structures(struct adapter *adapter)
3197{
3198	struct tx_ring *txr = adapter->tx_rings;
3199
3200	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3201		EM_TX_LOCK(txr);
3202		em_free_transmit_buffers(txr);
3203		em_dma_free(adapter, &txr->txdma);
3204		EM_TX_UNLOCK(txr);
3205		EM_TX_LOCK_DESTROY(txr);
3206	}
3207
3208	free(adapter->tx_rings, M_DEVBUF);
3209}
3210
3211/*********************************************************************
3212 *
3213 *  Free transmit ring related data structures.
3214 *
3215 **********************************************************************/
3216static void
3217em_free_transmit_buffers(struct tx_ring *txr)
3218{
3219	struct adapter		*adapter = txr->adapter;
3220	struct em_buffer	*txbuf;
3221
3222	INIT_DEBUGOUT("free_transmit_ring: begin");
3223
3224	if (txr->tx_buffers == NULL)
3225		return;
3226
3227	for (int i = 0; i < adapter->num_tx_desc; i++) {
3228		txbuf = &txr->tx_buffers[i];
3229		if (txbuf->m_head != NULL) {
3230			bus_dmamap_sync(txr->txtag, txbuf->map,
3231			    BUS_DMASYNC_POSTWRITE);
3232			bus_dmamap_unload(txr->txtag,
3233			    txbuf->map);
3234			m_freem(txbuf->m_head);
3235			txbuf->m_head = NULL;
3236			if (txbuf->map != NULL) {
3237				bus_dmamap_destroy(txr->txtag,
3238				    txbuf->map);
3239				txbuf->map = NULL;
3240			}
3241		} else if (txbuf->map != NULL) {
3242			bus_dmamap_unload(txr->txtag,
3243			    txbuf->map);
3244			bus_dmamap_destroy(txr->txtag,
3245			    txbuf->map);
3246			txbuf->map = NULL;
3247		}
3248	}
3249#if __FreeBSD_version >= 800000
3250	if (txr->br != NULL)
3251		buf_ring_free(txr->br, M_DEVBUF);
3252#endif
3253	if (txr->tx_buffers != NULL) {
3254		free(txr->tx_buffers, M_DEVBUF);
3255		txr->tx_buffers = NULL;
3256	}
3257	if (txr->txtag != NULL) {
3258		bus_dma_tag_destroy(txr->txtag);
3259		txr->txtag = NULL;
3260	}
3261	return;
3262}
3263
3264
3265/*********************************************************************
3266 *
3267 *  The offload context needs to be set when we transfer the first
3268 *  packet of a particular protocol (TCP/UDP). This routine has been
3269 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3270 *
3271 *  Added back the old method of keeping the current context type
3272 *  and not setting if unnecessary, as this is reported to be a
3273 *  big performance win.  -jfv
3274 **********************************************************************/
3275static void
3276em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3277    u32 *txd_upper, u32 *txd_lower)
3278{
3279	struct adapter			*adapter = txr->adapter;
3280	struct e1000_context_desc	*TXD = NULL;
3281	struct em_buffer *tx_buffer;
3282	struct ether_vlan_header *eh;
3283	struct ip *ip = NULL;
3284	struct ip6_hdr *ip6;
3285	int cur, ehdrlen;
3286	u32 cmd, hdr_len, ip_hlen;
3287	u16 etype;
3288	u8 ipproto;
3289
3290
3291	cmd = hdr_len = ipproto = 0;
3292	*txd_upper = *txd_lower = 0;
3293	cur = txr->next_avail_desc;
3294
3295	/*
3296	 * Determine where frame payload starts.
3297	 * Jump over vlan headers if already present,
3298	 * helpful for QinQ too.
3299	 */
3300	eh = mtod(mp, struct ether_vlan_header *);
3301	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3302		etype = ntohs(eh->evl_proto);
3303		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3304	} else {
3305		etype = ntohs(eh->evl_encap_proto);
3306		ehdrlen = ETHER_HDR_LEN;
3307	}
3308
3309	/*
3310	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3311	 * TODO: Support SCTP too when it hits the tree.
3312	 */
3313	switch (etype) {
3314	case ETHERTYPE_IP:
3315		ip = (struct ip *)(mp->m_data + ehdrlen);
3316		ip_hlen = ip->ip_hl << 2;
3317
3318		/* Setup of IP header checksum. */
3319		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3320			/*
3321			 * Start offset for header checksum calculation.
3322			 * End offset for header checksum calculation.
3323			 * Offset of place to put the checksum.
3324			 */
3325			TXD = (struct e1000_context_desc *)
3326			    &txr->tx_base[cur];
3327			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3328			TXD->lower_setup.ip_fields.ipcse =
3329			    htole16(ehdrlen + ip_hlen);
3330			TXD->lower_setup.ip_fields.ipcso =
3331			    ehdrlen + offsetof(struct ip, ip_sum);
3332			cmd |= E1000_TXD_CMD_IP;
3333			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3334		}
3335
3336		hdr_len = ehdrlen + ip_hlen;
3337		ipproto = ip->ip_p;
3338		break;
3339
3340	case ETHERTYPE_IPV6:
3341		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3342		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3343
3344		/* IPv6 doesn't have a header checksum. */
3345
3346		hdr_len = ehdrlen + ip_hlen;
3347		ipproto = ip6->ip6_nxt;
3348		break;
3349
3350	default:
3351		return;
3352	}
3353
3354	switch (ipproto) {
3355	case IPPROTO_TCP:
3356		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3357			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3358			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3359			/* no need for context if already set */
3360			if (txr->last_hw_offload == CSUM_TCP)
3361				return;
3362			txr->last_hw_offload = CSUM_TCP;
3363			/*
3364			 * Start offset for payload checksum calculation.
3365			 * End offset for payload checksum calculation.
3366			 * Offset of place to put the checksum.
3367			 */
3368			TXD = (struct e1000_context_desc *)
3369			    &txr->tx_base[cur];
3370			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3371			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3372			TXD->upper_setup.tcp_fields.tucso =
3373			    hdr_len + offsetof(struct tcphdr, th_sum);
3374			cmd |= E1000_TXD_CMD_TCP;
3375		}
3376		break;
3377	case IPPROTO_UDP:
3378	{
3379		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3380			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3381			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3382			/* no need for context if already set */
3383			if (txr->last_hw_offload == CSUM_UDP)
3384				return;
3385			txr->last_hw_offload = CSUM_UDP;
3386			/*
3387			 * Start offset for header checksum calculation.
3388			 * End offset for header checksum calculation.
3389			 * Offset of place to put the checksum.
3390			 */
3391			TXD = (struct e1000_context_desc *)
3392			    &txr->tx_base[cur];
3393			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3394			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3395			TXD->upper_setup.tcp_fields.tucso =
3396			    hdr_len + offsetof(struct udphdr, uh_sum);
3397		}
3398		/* Fall Thru */
3399	}
3400	default:
3401		break;
3402	}
3403
3404	if (TXD == NULL)
3405		return;
3406	TXD->tcp_seg_setup.data = htole32(0);
3407	TXD->cmd_and_length =
3408	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3409	tx_buffer = &txr->tx_buffers[cur];
3410	tx_buffer->m_head = NULL;
3411	tx_buffer->next_eop = -1;
3412
3413	if (++cur == adapter->num_tx_desc)
3414		cur = 0;
3415
3416	txr->tx_avail--;
3417	txr->next_avail_desc = cur;
3418}
3419
3420
3421/**********************************************************************
3422 *
3423 *  Setup work for hardware segmentation offload (TSO)
3424 *
3425 **********************************************************************/
3426static bool
3427em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3428   u32 *txd_lower)
3429{
3430	struct adapter			*adapter = txr->adapter;
3431	struct e1000_context_desc	*TXD;
3432	struct em_buffer		*tx_buffer;
3433	struct ether_vlan_header	*eh;
3434	struct ip			*ip;
3435	struct ip6_hdr			*ip6;
3436	struct tcphdr			*th;
3437	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3438	u16 etype;
3439
3440	/*
3441	 * This function could/should be extended to support IP/IPv6
3442	 * fragmentation as well.  But as they say, one step at a time.
3443	 */
3444
3445	/*
3446	 * Determine where frame payload starts.
3447	 * Jump over vlan headers if already present,
3448	 * helpful for QinQ too.
3449	 */
3450	eh = mtod(mp, struct ether_vlan_header *);
3451	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3452		etype = ntohs(eh->evl_proto);
3453		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3454	} else {
3455		etype = ntohs(eh->evl_encap_proto);
3456		ehdrlen = ETHER_HDR_LEN;
3457	}
3458
3459	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3460	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3461		return FALSE;	/* -1 */
3462
3463	/*
3464	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3465	 * TODO: Support SCTP too when it hits the tree.
3466	 */
3467	switch (etype) {
3468	case ETHERTYPE_IP:
3469		isip6 = 0;
3470		ip = (struct ip *)(mp->m_data + ehdrlen);
3471		if (ip->ip_p != IPPROTO_TCP)
3472			return FALSE;	/* 0 */
3473		ip->ip_len = 0;
3474		ip->ip_sum = 0;
3475		ip_hlen = ip->ip_hl << 2;
3476		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3477			return FALSE;	/* -1 */
3478		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3479#if 1
3480		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3481		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3482#else
3483		th->th_sum = mp->m_pkthdr.csum_data;
3484#endif
3485		break;
3486	case ETHERTYPE_IPV6:
3487		isip6 = 1;
3488		return FALSE;			/* Not supported yet. */
3489		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3490		if (ip6->ip6_nxt != IPPROTO_TCP)
3491			return FALSE;	/* 0 */
3492		ip6->ip6_plen = 0;
3493		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3494		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3495			return FALSE;	/* -1 */
3496		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3497#if 0
3498		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3499		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3500#else
3501		th->th_sum = mp->m_pkthdr.csum_data;
3502#endif
3503		break;
3504	default:
3505		return FALSE;
3506	}
3507	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3508
3509	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3510		      E1000_TXD_DTYP_D |	/* Data descr type */
3511		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3512
3513	/* IP and/or TCP header checksum calculation and insertion. */
3514	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3515		      E1000_TXD_POPTS_TXSM) << 8;
3516
3517	cur = txr->next_avail_desc;
3518	tx_buffer = &txr->tx_buffers[cur];
3519	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3520
3521	/* IPv6 doesn't have a header checksum. */
3522	if (!isip6) {
3523		/*
3524		 * Start offset for header checksum calculation.
3525		 * End offset for header checksum calculation.
3526		 * Offset of place put the checksum.
3527		 */
3528		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3529		TXD->lower_setup.ip_fields.ipcse =
3530		    htole16(ehdrlen + ip_hlen - 1);
3531		TXD->lower_setup.ip_fields.ipcso =
3532		    ehdrlen + offsetof(struct ip, ip_sum);
3533	}
3534	/*
3535	 * Start offset for payload checksum calculation.
3536	 * End offset for payload checksum calculation.
3537	 * Offset of place to put the checksum.
3538	 */
3539	TXD->upper_setup.tcp_fields.tucss =
3540	    ehdrlen + ip_hlen;
3541	TXD->upper_setup.tcp_fields.tucse = 0;
3542	TXD->upper_setup.tcp_fields.tucso =
3543	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3544	/*
3545	 * Payload size per packet w/o any headers.
3546	 * Length of all headers up to payload.
3547	 */
3548	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3549	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3550
3551	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3552				E1000_TXD_CMD_DEXT |	/* Extended descr */
3553				E1000_TXD_CMD_TSE |	/* TSE context */
3554				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3555				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3556				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3557
3558	tx_buffer->m_head = NULL;
3559	tx_buffer->next_eop = -1;
3560
3561	if (++cur == adapter->num_tx_desc)
3562		cur = 0;
3563
3564	txr->tx_avail--;
3565	txr->next_avail_desc = cur;
3566	txr->tx_tso = TRUE;
3567
3568	return TRUE;
3569}
3570
3571
3572/**********************************************************************
3573 *
3574 *  Examine each tx_buffer in the used queue. If the hardware is done
3575 *  processing the packet then free associated resources. The
3576 *  tx_buffer is put back on the free queue.
3577 *
3578 **********************************************************************/
3579static bool
3580em_txeof(struct tx_ring *txr)
3581{
3582	struct adapter	*adapter = txr->adapter;
3583        int first, last, done, num_avail;
3584        struct em_buffer *tx_buffer;
3585        struct e1000_tx_desc   *tx_desc, *eop_desc;
3586	struct ifnet   *ifp = adapter->ifp;
3587
3588	EM_TX_LOCK_ASSERT(txr);
3589
3590        if (txr->tx_avail == adapter->num_tx_desc)
3591                return (FALSE);
3592
3593        num_avail = txr->tx_avail;
3594        first = txr->next_to_clean;
3595        tx_desc = &txr->tx_base[first];
3596        tx_buffer = &txr->tx_buffers[first];
3597	last = tx_buffer->next_eop;
3598        eop_desc = &txr->tx_base[last];
3599
3600	/*
3601	 * What this does is get the index of the
3602	 * first descriptor AFTER the EOP of the
3603	 * first packet, that way we can do the
3604	 * simple comparison on the inner while loop.
3605	 */
3606	if (++last == adapter->num_tx_desc)
3607 		last = 0;
3608	done = last;
3609
3610        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3611            BUS_DMASYNC_POSTREAD);
3612
3613        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3614		/* We clean the range of the packet */
3615		while (first != done) {
3616                	tx_desc->upper.data = 0;
3617                	tx_desc->lower.data = 0;
3618                	tx_desc->buffer_addr = 0;
3619                	++num_avail;
3620
3621			if (tx_buffer->m_head) {
3622				ifp->if_opackets++;
3623				bus_dmamap_sync(txr->txtag,
3624				    tx_buffer->map,
3625				    BUS_DMASYNC_POSTWRITE);
3626				bus_dmamap_unload(txr->txtag,
3627				    tx_buffer->map);
3628
3629                        	m_freem(tx_buffer->m_head);
3630                        	tx_buffer->m_head = NULL;
3631                	}
3632			tx_buffer->next_eop = -1;
3633			txr->watchdog_time = ticks;
3634
3635	                if (++first == adapter->num_tx_desc)
3636				first = 0;
3637
3638	                tx_buffer = &txr->tx_buffers[first];
3639			tx_desc = &txr->tx_base[first];
3640		}
3641		/* See if we can continue to the next packet */
3642		last = tx_buffer->next_eop;
3643		if (last != -1) {
3644        		eop_desc = &txr->tx_base[last];
3645			/* Get new done point */
3646			if (++last == adapter->num_tx_desc) last = 0;
3647			done = last;
3648		} else
3649			break;
3650        }
3651        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3652            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3653
3654        txr->next_to_clean = first;
3655
3656        /*
3657         * If we have enough room, clear IFF_DRV_OACTIVE to
3658         * tell the stack that it is OK to send packets.
3659         * If there are no pending descriptors, clear the watchdog.
3660         */
3661        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3662                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3663                if (num_avail == adapter->num_tx_desc) {
3664			txr->watchdog_check = FALSE;
3665        		txr->tx_avail = num_avail;
3666			return (FALSE);
3667		}
3668        }
3669
3670        txr->tx_avail = num_avail;
3671	return (TRUE);
3672}
3673
3674
3675/*********************************************************************
3676 *
3677 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3678 *
3679 **********************************************************************/
3680static void
3681em_refresh_mbufs(struct rx_ring *rxr, int limit)
3682{
3683	struct adapter		*adapter = rxr->adapter;
3684	struct mbuf		*m;
3685	bus_dma_segment_t	segs[1];
3686	struct em_buffer	*rxbuf;
3687	int			i, error, nsegs, cleaned;
3688
3689	i = rxr->next_to_refresh;
3690	cleaned = -1;
3691	while (i != limit) {
3692		rxbuf = &rxr->rx_buffers[i];
3693		/*
3694		** Just skip entries with a buffer,
3695		** they can only be due to an error
3696		** and are to be reused.
3697		*/
3698		if (rxbuf->m_head != NULL)
3699			continue;
3700		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3701		/*
3702		** If we have a temporary resource shortage
3703		** that causes a failure, just abort refresh
3704		** for now, we will return to this point when
3705		** reinvoked from em_rxeof.
3706		*/
3707		if (m == NULL)
3708			goto update;
3709		m->m_len = m->m_pkthdr.len = MCLBYTES;
3710
3711		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3712			m_adj(m, ETHER_ALIGN);
3713
3714		/* Use bus_dma machinery to setup the memory mapping  */
3715		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3716		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3717		if (error != 0) {
3718			m_free(m);
3719			goto update;
3720		}
3721
3722		/* If nsegs is wrong then the stack is corrupt. */
3723		KASSERT(nsegs == 1, ("Too many segments returned!"));
3724
3725		bus_dmamap_sync(rxr->rxtag,
3726		    rxbuf->map, BUS_DMASYNC_PREREAD);
3727		rxbuf->m_head = m;
3728		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3729
3730		cleaned = i;
3731		/* Calculate next index */
3732		if (++i == adapter->num_rx_desc)
3733			i = 0;
3734		/* This is the work marker for refresh */
3735		rxr->next_to_refresh = i;
3736	}
3737update:
3738	/*
3739	** Update the tail pointer only if,
3740	** and as far as we have refreshed.
3741	*/
3742	if (cleaned != -1) /* Update tail index */
3743		E1000_WRITE_REG(&adapter->hw,
3744		    E1000_RDT(rxr->me), cleaned);
3745
3746	return;
3747}
3748
3749
3750/*********************************************************************
3751 *
3752 *  Allocate memory for rx_buffer structures. Since we use one
3753 *  rx_buffer per received packet, the maximum number of rx_buffer's
3754 *  that we'll need is equal to the number of receive descriptors
3755 *  that we've allocated.
3756 *
3757 **********************************************************************/
3758static int
3759em_allocate_receive_buffers(struct rx_ring *rxr)
3760{
3761	struct adapter		*adapter = rxr->adapter;
3762	device_t		dev = adapter->dev;
3763	struct em_buffer	*rxbuf;
3764	int			error;
3765
3766	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3767	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3768	if (rxr->rx_buffers == NULL) {
3769		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3770		return (ENOMEM);
3771	}
3772
3773	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3774				1, 0,			/* alignment, bounds */
3775				BUS_SPACE_MAXADDR,	/* lowaddr */
3776				BUS_SPACE_MAXADDR,	/* highaddr */
3777				NULL, NULL,		/* filter, filterarg */
3778				MCLBYTES,		/* maxsize */
3779				1,			/* nsegments */
3780				MCLBYTES,		/* maxsegsize */
3781				0,			/* flags */
3782				NULL,			/* lockfunc */
3783				NULL,			/* lockarg */
3784				&rxr->rxtag);
3785	if (error) {
3786		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3787		    __func__, error);
3788		goto fail;
3789	}
3790
3791	rxbuf = rxr->rx_buffers;
3792	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3793		rxbuf = &rxr->rx_buffers[i];
3794		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3795		    &rxbuf->map);
3796		if (error) {
3797			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3798			    __func__, error);
3799			goto fail;
3800		}
3801	}
3802
3803	return (0);
3804
3805fail:
3806	em_free_receive_structures(adapter);
3807	return (error);
3808}
3809
3810
3811/*********************************************************************
3812 *
3813 *  Initialize a receive ring and its buffers.
3814 *
3815 **********************************************************************/
3816static int
3817em_setup_receive_ring(struct rx_ring *rxr)
3818{
3819	struct	adapter 	*adapter = rxr->adapter;
3820	struct em_buffer	*rxbuf;
3821	bus_dma_segment_t	seg[1];
3822	int			rsize, nsegs, error;
3823
3824
3825	/* Clear the ring contents */
3826	EM_RX_LOCK(rxr);
3827	rsize = roundup2(adapter->num_rx_desc *
3828	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3829	bzero((void *)rxr->rx_base, rsize);
3830
3831	/*
3832	** Free current RX buffer structs and their mbufs
3833	*/
3834	for (int i = 0; i < adapter->num_rx_desc; i++) {
3835		rxbuf = &rxr->rx_buffers[i];
3836		if (rxbuf->m_head != NULL) {
3837			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3838			    BUS_DMASYNC_POSTREAD);
3839			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3840			m_freem(rxbuf->m_head);
3841		}
3842	}
3843
3844	/* Now replenish the mbufs */
3845	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3846
3847		rxbuf = &rxr->rx_buffers[j];
3848		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3849		if (rxbuf->m_head == NULL)
3850			return (ENOBUFS);
3851		rxbuf->m_head->m_len = MCLBYTES;
3852		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3853		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3854
3855		/* Get the memory mapping */
3856		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3857		    rxbuf->map, rxbuf->m_head, seg,
3858		    &nsegs, BUS_DMA_NOWAIT);
3859		if (error != 0) {
3860			m_freem(rxbuf->m_head);
3861			rxbuf->m_head = NULL;
3862			return (error);
3863		}
3864		bus_dmamap_sync(rxr->rxtag,
3865		    rxbuf->map, BUS_DMASYNC_PREREAD);
3866
3867		/* Update descriptor */
3868		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3869	}
3870
3871
3872	/* Setup our descriptor indices */
3873	rxr->next_to_check = 0;
3874	rxr->next_to_refresh = 0;
3875
3876	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3877	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3878
3879	EM_RX_UNLOCK(rxr);
3880	return (0);
3881}
3882
3883/*********************************************************************
3884 *
3885 *  Initialize all receive rings.
3886 *
3887 **********************************************************************/
3888static int
3889em_setup_receive_structures(struct adapter *adapter)
3890{
3891	struct rx_ring *rxr = adapter->rx_rings;
3892	int j;
3893
3894	for (j = 0; j < adapter->num_queues; j++, rxr++)
3895		if (em_setup_receive_ring(rxr))
3896			goto fail;
3897
3898	return (0);
3899fail:
3900	/*
3901	 * Free RX buffers allocated so far, we will only handle
3902	 * the rings that completed, the failing case will have
3903	 * cleaned up for itself. 'j' failed, so its the terminus.
3904	 */
3905	for (int i = 0; i < j; ++i) {
3906		rxr = &adapter->rx_rings[i];
3907		for (int n = 0; n < adapter->num_rx_desc; n++) {
3908			struct em_buffer *rxbuf;
3909			rxbuf = &rxr->rx_buffers[n];
3910			if (rxbuf->m_head != NULL) {
3911				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3912			  	  BUS_DMASYNC_POSTREAD);
3913				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3914				m_freem(rxbuf->m_head);
3915				rxbuf->m_head = NULL;
3916			}
3917		}
3918	}
3919
3920	return (ENOBUFS);
3921}
3922
3923/*********************************************************************
3924 *
3925 *  Free all receive rings.
3926 *
3927 **********************************************************************/
3928static void
3929em_free_receive_structures(struct adapter *adapter)
3930{
3931	struct rx_ring *rxr = adapter->rx_rings;
3932
3933	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3934		em_free_receive_buffers(rxr);
3935		/* Free the ring memory as well */
3936		em_dma_free(adapter, &rxr->rxdma);
3937		EM_RX_LOCK_DESTROY(rxr);
3938	}
3939
3940	free(adapter->rx_rings, M_DEVBUF);
3941}
3942
3943
3944/*********************************************************************
3945 *
3946 *  Free receive ring data structures
3947 *
3948 **********************************************************************/
3949static void
3950em_free_receive_buffers(struct rx_ring *rxr)
3951{
3952	struct adapter		*adapter = rxr->adapter;
3953	struct em_buffer	*rxbuf = NULL;
3954
3955	INIT_DEBUGOUT("free_receive_buffers: begin");
3956
3957	if (rxr->rx_buffers != NULL) {
3958		for (int i = 0; i < adapter->num_rx_desc; i++) {
3959			rxbuf = &rxr->rx_buffers[i];
3960			if (rxbuf->map != NULL) {
3961				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3962				    BUS_DMASYNC_POSTREAD);
3963				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3964				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3965			}
3966			if (rxbuf->m_head != NULL) {
3967				m_freem(rxbuf->m_head);
3968				rxbuf->m_head = NULL;
3969			}
3970		}
3971		free(rxr->rx_buffers, M_DEVBUF);
3972		rxr->rx_buffers = NULL;
3973	}
3974
3975	if (rxr->rxtag != NULL) {
3976		bus_dma_tag_destroy(rxr->rxtag);
3977		rxr->rxtag = NULL;
3978	}
3979
3980	return;
3981}
3982
3983
3984/*********************************************************************
3985 *
3986 *  Enable receive unit.
3987 *
3988 **********************************************************************/
3989#define MAX_INTS_PER_SEC	8000
3990#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3991
3992static void
3993em_initialize_receive_unit(struct adapter *adapter)
3994{
3995	struct rx_ring	*rxr = adapter->rx_rings;
3996	struct ifnet	*ifp = adapter->ifp;
3997	struct e1000_hw	*hw = &adapter->hw;
3998	u64	bus_addr;
3999	u32	rctl, rxcsum;
4000
4001	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4002
4003	/*
4004	 * Make sure receives are disabled while setting
4005	 * up the descriptor ring
4006	 */
4007	rctl = E1000_READ_REG(hw, E1000_RCTL);
4008	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4009
4010	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4011	    adapter->rx_abs_int_delay.value);
4012	/*
4013	 * Set the interrupt throttling rate. Value is calculated
4014	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4015	 */
4016	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4017
4018	/*
4019	** When using MSIX interrupts we need to throttle
4020	** using the EITR register (82574 only)
4021	*/
4022	if (hw->mac.type == e1000_82574)
4023		for (int i = 0; i < 4; i++)
4024			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4025			    DEFAULT_ITR);
4026
4027	/* Disable accelerated ackknowledge */
4028	if (adapter->hw.mac.type == e1000_82574)
4029		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4030
4031	if (ifp->if_capenable & IFCAP_RXCSUM) {
4032		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4033		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4034		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4035	}
4036
4037	/*
4038	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4039	** long latencies are observed, like Lenovo X60. This
4040	** change eliminates the problem, but since having positive
4041	** values in RDTR is a known source of problems on other
4042	** platforms another solution is being sought.
4043	*/
4044	if (hw->mac.type == e1000_82573)
4045		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4046
4047	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4048		/* Setup the Base and Length of the Rx Descriptor Ring */
4049		bus_addr = rxr->rxdma.dma_paddr;
4050		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4051		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4052		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4053		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4054		/* Setup the Head and Tail Descriptor Pointers */
4055		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4056		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4057	}
4058
4059	/* Setup the Receive Control Register */
4060	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4061	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4062	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4063	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4064
4065        /* Strip the CRC */
4066        rctl |= E1000_RCTL_SECRC;
4067
4068        /* Make sure VLAN Filters are off */
4069        rctl &= ~E1000_RCTL_VFE;
4070	rctl &= ~E1000_RCTL_SBP;
4071	rctl |= E1000_RCTL_SZ_2048;
4072	if (ifp->if_mtu > ETHERMTU)
4073		rctl |= E1000_RCTL_LPE;
4074	else
4075		rctl &= ~E1000_RCTL_LPE;
4076
4077	/* Write out the settings */
4078	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4079
4080	return;
4081}
4082
4083
4084/*********************************************************************
4085 *
4086 *  This routine executes in interrupt context. It replenishes
4087 *  the mbufs in the descriptor and sends data which has been
4088 *  dma'ed into host memory to upper layer.
4089 *
4090 *  We loop at most count times if count is > 0, or until done if
4091 *  count < 0.
4092 *
4093 *  For polling we also now return the number of cleaned packets
4094 *********************************************************************/
4095static bool
4096em_rxeof(struct rx_ring *rxr, int count, int *done)
4097{
4098	struct adapter		*adapter = rxr->adapter;
4099	struct ifnet		*ifp = adapter->ifp;
4100	struct mbuf		*mp, *sendmp;
4101	u8			status = 0;
4102	u16 			len;
4103	int			i, processed, rxdone = 0;
4104	bool			eop;
4105	struct e1000_rx_desc	*cur;
4106
4107	EM_RX_LOCK(rxr);
4108
4109	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4110
4111		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4112			break;
4113
4114		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4115		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4116
4117		cur = &rxr->rx_base[i];
4118		status = cur->status;
4119		mp = sendmp = NULL;
4120
4121		if ((status & E1000_RXD_STAT_DD) == 0)
4122			break;
4123
4124		len = le16toh(cur->length);
4125		eop = (status & E1000_RXD_STAT_EOP) != 0;
4126		count--;
4127
4128		if (((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) &&
4129		    (rxr->discard == FALSE)) {
4130
4131			/* Assign correct length to the current fragment */
4132			mp = rxr->rx_buffers[i].m_head;
4133			mp->m_len = len;
4134
4135			/* Trigger for refresh */
4136			rxr->rx_buffers[i].m_head = NULL;
4137
4138			if (rxr->fmp == NULL) {
4139				mp->m_pkthdr.len = len;
4140				rxr->fmp = mp; /* Store the first mbuf */
4141				rxr->lmp = mp;
4142			} else {
4143				/* Chain mbuf's together */
4144				mp->m_flags &= ~M_PKTHDR;
4145				rxr->lmp->m_next = mp;
4146				rxr->lmp = rxr->lmp->m_next;
4147				rxr->fmp->m_pkthdr.len += len;
4148			}
4149
4150			if (eop) {
4151				rxr->fmp->m_pkthdr.rcvif = ifp;
4152				ifp->if_ipackets++;
4153				em_receive_checksum(cur, rxr->fmp);
4154#ifndef __NO_STRICT_ALIGNMENT
4155				if (adapter->max_frame_size >
4156				    (MCLBYTES - ETHER_ALIGN) &&
4157				    em_fixup_rx(rxr) != 0)
4158					goto skip;
4159#endif
4160				if (status & E1000_RXD_STAT_VP) {
4161					rxr->fmp->m_pkthdr.ether_vtag =
4162					    (le16toh(cur->special) &
4163					    E1000_RXD_SPC_VLAN_MASK);
4164					rxr->fmp->m_flags |= M_VLANTAG;
4165				}
4166#ifdef EM_MULTIQUEUE
4167				rxr->fmp->m_pkthdr.flowid = curcpu;
4168				rxr->fmp->m_flags |= M_FLOWID;
4169#endif
4170#ifndef __NO_STRICT_ALIGNMENT
4171skip:
4172#endif
4173				sendmp = rxr->fmp;
4174				rxr->fmp = NULL;
4175				rxr->lmp = NULL;
4176			}
4177		} else {
4178			ifp->if_ierrors++;
4179			++rxr->rx_discarded;
4180			if (!eop) /* Catch subsequent segs */
4181				rxr->discard = TRUE;
4182			else
4183				rxr->discard = FALSE;
4184			em_rx_discard(rxr, i);
4185			sendmp = NULL;
4186		}
4187
4188		/* Zero out the receive descriptors status. */
4189		cur->status = 0;
4190		++rxdone;	/* cumulative for POLL */
4191		++processed;
4192
4193		/* Advance our pointers to the next descriptor. */
4194		if (++i == adapter->num_rx_desc)
4195			i = 0;
4196
4197		/* Send to the stack */
4198		if (sendmp != NULL) {
4199			rxr->next_to_check = i;
4200			EM_RX_UNLOCK(rxr);
4201			(*ifp->if_input)(ifp, sendmp);
4202			EM_RX_LOCK(rxr);
4203			i = rxr->next_to_check;
4204		}
4205
4206		/* Only refresh mbufs every 8 descriptors */
4207		if (processed == 8) {
4208			em_refresh_mbufs(rxr, i);
4209			processed = 0;
4210		}
4211	}
4212
4213	/* Catch any remaining refresh work */
4214	if (processed != 0) {
4215		em_refresh_mbufs(rxr, i);
4216		processed = 0;
4217	}
4218
4219	rxr->next_to_check = i;
4220	if (done != NULL)
4221		*done = rxdone;
4222	EM_RX_UNLOCK(rxr);
4223
4224	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4225}
4226
4227static __inline void
4228em_rx_discard(struct rx_ring *rxr, int i)
4229{
4230	struct em_buffer	*rbuf;
4231	struct mbuf		*m;
4232
4233	rbuf = &rxr->rx_buffers[i];
4234	/* Free any previous pieces */
4235	if (rxr->fmp != NULL) {
4236		rxr->fmp->m_flags |= M_PKTHDR;
4237		m_freem(rxr->fmp);
4238		rxr->fmp = NULL;
4239		rxr->lmp = NULL;
4240	}
4241
4242	/* Reset state, keep loaded DMA map and reuse */
4243	m = rbuf->m_head;
4244	m->m_len = m->m_pkthdr.len = MCLBYTES;
4245	m->m_flags |= M_PKTHDR;
4246	m->m_data = m->m_ext.ext_buf;
4247	m->m_next = NULL;
4248
4249	return;
4250}
4251
4252#ifndef __NO_STRICT_ALIGNMENT
4253/*
4254 * When jumbo frames are enabled we should realign entire payload on
4255 * architecures with strict alignment. This is serious design mistake of 8254x
4256 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4257 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4258 * payload. On architecures without strict alignment restrictions 8254x still
4259 * performs unaligned memory access which would reduce the performance too.
4260 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4261 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4262 * existing mbuf chain.
4263 *
4264 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4265 * not used at all on architectures with strict alignment.
4266 */
4267static int
4268em_fixup_rx(struct rx_ring *rxr)
4269{
4270	struct adapter *adapter = rxr->adapter;
4271	struct mbuf *m, *n;
4272	int error;
4273
4274	error = 0;
4275	m = rxr->fmp;
4276	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4277		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4278		m->m_data += ETHER_HDR_LEN;
4279	} else {
4280		MGETHDR(n, M_DONTWAIT, MT_DATA);
4281		if (n != NULL) {
4282			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4283			m->m_data += ETHER_HDR_LEN;
4284			m->m_len -= ETHER_HDR_LEN;
4285			n->m_len = ETHER_HDR_LEN;
4286			M_MOVE_PKTHDR(n, m);
4287			n->m_next = m;
4288			rxr->fmp = n;
4289		} else {
4290			adapter->dropped_pkts++;
4291			m_freem(rxr->fmp);
4292			rxr->fmp = NULL;
4293			error = ENOMEM;
4294		}
4295	}
4296
4297	return (error);
4298}
4299#endif
4300
4301/*********************************************************************
4302 *
4303 *  Verify that the hardware indicated that the checksum is valid.
4304 *  Inform the stack about the status of checksum so that stack
4305 *  doesn't spend time verifying the checksum.
4306 *
4307 *********************************************************************/
4308static void
4309em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4310{
4311	/* Ignore Checksum bit is set */
4312	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4313		mp->m_pkthdr.csum_flags = 0;
4314		return;
4315	}
4316
4317	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4318		/* Did it pass? */
4319		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4320			/* IP Checksum Good */
4321			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4322			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4323
4324		} else {
4325			mp->m_pkthdr.csum_flags = 0;
4326		}
4327	}
4328
4329	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4330		/* Did it pass? */
4331		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4332			mp->m_pkthdr.csum_flags |=
4333			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4334			mp->m_pkthdr.csum_data = htons(0xffff);
4335		}
4336	}
4337}
4338
4339/*
4340 * This routine is run via an vlan
4341 * config EVENT
4342 */
4343static void
4344em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4345{
4346	struct adapter	*adapter = ifp->if_softc;
4347	u32		index, bit;
4348
4349	if (ifp->if_softc !=  arg)   /* Not our event */
4350		return;
4351
4352	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4353                return;
4354
4355	index = (vtag >> 5) & 0x7F;
4356	bit = vtag & 0x1F;
4357	em_shadow_vfta[index] |= (1 << bit);
4358	++adapter->num_vlans;
4359	/* Re-init to load the changes */
4360	em_init(adapter);
4361}
4362
4363/*
4364 * This routine is run via an vlan
4365 * unconfig EVENT
4366 */
4367static void
4368em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4369{
4370	struct adapter	*adapter = ifp->if_softc;
4371	u32		index, bit;
4372
4373	if (ifp->if_softc !=  arg)
4374		return;
4375
4376	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4377                return;
4378
4379	index = (vtag >> 5) & 0x7F;
4380	bit = vtag & 0x1F;
4381	em_shadow_vfta[index] &= ~(1 << bit);
4382	--adapter->num_vlans;
4383	/* Re-init to load the changes */
4384	em_init(adapter);
4385}
4386
4387static void
4388em_setup_vlan_hw_support(struct adapter *adapter)
4389{
4390	struct e1000_hw *hw = &adapter->hw;
4391	u32             reg;
4392
4393	/*
4394	** We get here thru init_locked, meaning
4395	** a soft reset, this has already cleared
4396	** the VFTA and other state, so if there
4397	** have been no vlan's registered do nothing.
4398	*/
4399	if (adapter->num_vlans == 0)
4400                return;
4401
4402	/*
4403	** A soft reset zero's out the VFTA, so
4404	** we need to repopulate it now.
4405	*/
4406	for (int i = 0; i < EM_VFTA_SIZE; i++)
4407                if (em_shadow_vfta[i] != 0)
4408			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4409                            i, em_shadow_vfta[i]);
4410
4411	reg = E1000_READ_REG(hw, E1000_CTRL);
4412	reg |= E1000_CTRL_VME;
4413	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4414
4415	/* Enable the Filter Table */
4416	reg = E1000_READ_REG(hw, E1000_RCTL);
4417	reg &= ~E1000_RCTL_CFIEN;
4418	reg |= E1000_RCTL_VFE;
4419	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4420
4421	/* Update the frame size */
4422	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4423	    adapter->max_frame_size + VLAN_TAG_SIZE);
4424}
4425
4426static void
4427em_enable_intr(struct adapter *adapter)
4428{
4429	struct e1000_hw *hw = &adapter->hw;
4430	u32 ims_mask = IMS_ENABLE_MASK;
4431
4432	if (hw->mac.type == e1000_82574) {
4433		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4434		ims_mask |= EM_MSIX_MASK;
4435	}
4436	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4437}
4438
4439static void
4440em_disable_intr(struct adapter *adapter)
4441{
4442	struct e1000_hw *hw = &adapter->hw;
4443
4444	if (hw->mac.type == e1000_82574)
4445		E1000_WRITE_REG(hw, EM_EIAC, 0);
4446	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4447}
4448
4449/*
4450 * Bit of a misnomer, what this really means is
4451 * to enable OS management of the system... aka
4452 * to disable special hardware management features
4453 */
4454static void
4455em_init_manageability(struct adapter *adapter)
4456{
4457	/* A shared code workaround */
4458#define E1000_82542_MANC2H E1000_MANC2H
4459	if (adapter->has_manage) {
4460		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4461		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4462
4463		/* disable hardware interception of ARP */
4464		manc &= ~(E1000_MANC_ARP_EN);
4465
4466                /* enable receiving management packets to the host */
4467		manc |= E1000_MANC_EN_MNG2HOST;
4468#define E1000_MNG2HOST_PORT_623 (1 << 5)
4469#define E1000_MNG2HOST_PORT_664 (1 << 6)
4470		manc2h |= E1000_MNG2HOST_PORT_623;
4471		manc2h |= E1000_MNG2HOST_PORT_664;
4472		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4473		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4474	}
4475}
4476
4477/*
4478 * Give control back to hardware management
4479 * controller if there is one.
4480 */
4481static void
4482em_release_manageability(struct adapter *adapter)
4483{
4484	if (adapter->has_manage) {
4485		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4486
4487		/* re-enable hardware interception of ARP */
4488		manc |= E1000_MANC_ARP_EN;
4489		manc &= ~E1000_MANC_EN_MNG2HOST;
4490
4491		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4492	}
4493}
4494
4495/*
4496 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4497 * For ASF and Pass Through versions of f/w this means
4498 * that the driver is loaded. For AMT version type f/w
4499 * this means that the network i/f is open.
4500 */
4501static void
4502em_get_hw_control(struct adapter *adapter)
4503{
4504	u32 ctrl_ext, swsm;
4505
4506	if (adapter->hw.mac.type == e1000_82573) {
4507		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4508		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4509		    swsm | E1000_SWSM_DRV_LOAD);
4510		return;
4511	}
4512	/* else */
4513	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4514	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4515	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4516	return;
4517}
4518
4519/*
4520 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4521 * For ASF and Pass Through versions of f/w this means that
4522 * the driver is no longer loaded. For AMT versions of the
4523 * f/w this means that the network i/f is closed.
4524 */
4525static void
4526em_release_hw_control(struct adapter *adapter)
4527{
4528	u32 ctrl_ext, swsm;
4529
4530	if (!adapter->has_manage)
4531		return;
4532
4533	if (adapter->hw.mac.type == e1000_82573) {
4534		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4535		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4536		    swsm & ~E1000_SWSM_DRV_LOAD);
4537		return;
4538	}
4539	/* else */
4540	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4541	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4542	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4543	return;
4544}
4545
4546static int
4547em_is_valid_ether_addr(u8 *addr)
4548{
4549	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4550
4551	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4552		return (FALSE);
4553	}
4554
4555	return (TRUE);
4556}
4557
4558/*
4559** Parse the interface capabilities with regard
4560** to both system management and wake-on-lan for
4561** later use.
4562*/
4563static void
4564em_get_wakeup(device_t dev)
4565{
4566	struct adapter	*adapter = device_get_softc(dev);
4567	u16		eeprom_data = 0, device_id, apme_mask;
4568
4569	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4570	apme_mask = EM_EEPROM_APME;
4571
4572	switch (adapter->hw.mac.type) {
4573	case e1000_82573:
4574	case e1000_82583:
4575		adapter->has_amt = TRUE;
4576		/* Falls thru */
4577	case e1000_82571:
4578	case e1000_82572:
4579	case e1000_80003es2lan:
4580		if (adapter->hw.bus.func == 1) {
4581			e1000_read_nvm(&adapter->hw,
4582			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4583			break;
4584		} else
4585			e1000_read_nvm(&adapter->hw,
4586			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4587		break;
4588	case e1000_ich8lan:
4589	case e1000_ich9lan:
4590	case e1000_ich10lan:
4591	case e1000_pchlan:
4592		apme_mask = E1000_WUC_APME;
4593		adapter->has_amt = TRUE;
4594		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4595		break;
4596	default:
4597		e1000_read_nvm(&adapter->hw,
4598		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4599		break;
4600	}
4601	if (eeprom_data & apme_mask)
4602		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4603	/*
4604         * We have the eeprom settings, now apply the special cases
4605         * where the eeprom may be wrong or the board won't support
4606         * wake on lan on a particular port
4607	 */
4608	device_id = pci_get_device(dev);
4609        switch (device_id) {
4610	case E1000_DEV_ID_82571EB_FIBER:
4611		/* Wake events only supported on port A for dual fiber
4612		 * regardless of eeprom setting */
4613		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4614		    E1000_STATUS_FUNC_1)
4615			adapter->wol = 0;
4616		break;
4617	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4618	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4619	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4620                /* if quad port adapter, disable WoL on all but port A */
4621		if (global_quad_port_a != 0)
4622			adapter->wol = 0;
4623		/* Reset for multiple quad port adapters */
4624		if (++global_quad_port_a == 4)
4625			global_quad_port_a = 0;
4626                break;
4627	}
4628	return;
4629}
4630
4631
4632/*
4633 * Enable PCI Wake On Lan capability
4634 */
4635static void
4636em_enable_wakeup(device_t dev)
4637{
4638	struct adapter	*adapter = device_get_softc(dev);
4639	struct ifnet	*ifp = adapter->ifp;
4640	u32		pmc, ctrl, ctrl_ext, rctl;
4641	u16     	status;
4642
4643	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4644		return;
4645
4646	/* Advertise the wakeup capability */
4647	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4648	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4649	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4650	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4651
4652	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4653	    (adapter->hw.mac.type == e1000_pchlan) ||
4654	    (adapter->hw.mac.type == e1000_ich9lan) ||
4655	    (adapter->hw.mac.type == e1000_ich10lan)) {
4656		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4657		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4658	}
4659
4660	/* Keep the laser running on Fiber adapters */
4661	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4662	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4663		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4664		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4665		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4666	}
4667
4668	/*
4669	** Determine type of Wakeup: note that wol
4670	** is set with all bits on by default.
4671	*/
4672	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4673		adapter->wol &= ~E1000_WUFC_MAG;
4674
4675	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4676		adapter->wol &= ~E1000_WUFC_MC;
4677	else {
4678		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4679		rctl |= E1000_RCTL_MPE;
4680		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4681	}
4682
4683	if (adapter->hw.mac.type == e1000_pchlan) {
4684		if (em_enable_phy_wakeup(adapter))
4685			return;
4686	} else {
4687		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4688		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4689	}
4690
4691	if (adapter->hw.phy.type == e1000_phy_igp_3)
4692		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4693
4694        /* Request PME */
4695        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4696	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4697	if (ifp->if_capenable & IFCAP_WOL)
4698		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4699        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4700
4701	return;
4702}
4703
4704/*
4705** WOL in the newer chipset interfaces (pchlan)
4706** require thing to be copied into the phy
4707*/
4708static int
4709em_enable_phy_wakeup(struct adapter *adapter)
4710{
4711	struct e1000_hw *hw = &adapter->hw;
4712	u32 mreg, ret = 0;
4713	u16 preg;
4714
4715	/* copy MAC RARs to PHY RARs */
4716	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4717		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4718		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4719		e1000_write_phy_reg(hw, BM_RAR_M(i),
4720		    (u16)((mreg >> 16) & 0xFFFF));
4721		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4722		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4723		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4724		    (u16)((mreg >> 16) & 0xFFFF));
4725	}
4726
4727	/* copy MAC MTA to PHY MTA */
4728	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4729		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4730		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4731		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4732		    (u16)((mreg >> 16) & 0xFFFF));
4733	}
4734
4735	/* configure PHY Rx Control register */
4736	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4737	mreg = E1000_READ_REG(hw, E1000_RCTL);
4738	if (mreg & E1000_RCTL_UPE)
4739		preg |= BM_RCTL_UPE;
4740	if (mreg & E1000_RCTL_MPE)
4741		preg |= BM_RCTL_MPE;
4742	preg &= ~(BM_RCTL_MO_MASK);
4743	if (mreg & E1000_RCTL_MO_3)
4744		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4745				<< BM_RCTL_MO_SHIFT);
4746	if (mreg & E1000_RCTL_BAM)
4747		preg |= BM_RCTL_BAM;
4748	if (mreg & E1000_RCTL_PMCF)
4749		preg |= BM_RCTL_PMCF;
4750	mreg = E1000_READ_REG(hw, E1000_CTRL);
4751	if (mreg & E1000_CTRL_RFCE)
4752		preg |= BM_RCTL_RFCE;
4753	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4754
4755	/* enable PHY wakeup in MAC register */
4756	E1000_WRITE_REG(hw, E1000_WUC,
4757	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4758	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4759
4760	/* configure and enable PHY wakeup in PHY registers */
4761	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4762	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4763
4764	/* activate PHY wakeup */
4765	ret = hw->phy.ops.acquire(hw);
4766	if (ret) {
4767		printf("Could not acquire PHY\n");
4768		return ret;
4769	}
4770	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4771	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4772	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4773	if (ret) {
4774		printf("Could not read PHY page 769\n");
4775		goto out;
4776	}
4777	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4778	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4779	if (ret)
4780		printf("Could not set PHY Host Wakeup bit\n");
4781out:
4782	hw->phy.ops.release(hw);
4783
4784	return ret;
4785}
4786
4787static void
4788em_led_func(void *arg, int onoff)
4789{
4790	struct adapter	*adapter = arg;
4791
4792	EM_CORE_LOCK(adapter);
4793	if (onoff) {
4794		e1000_setup_led(&adapter->hw);
4795		e1000_led_on(&adapter->hw);
4796	} else {
4797		e1000_led_off(&adapter->hw);
4798		e1000_cleanup_led(&adapter->hw);
4799	}
4800	EM_CORE_UNLOCK(adapter);
4801}
4802
4803/**********************************************************************
4804 *
4805 *  Update the board statistics counters.
4806 *
4807 **********************************************************************/
4808static void
4809em_update_stats_counters(struct adapter *adapter)
4810{
4811	struct ifnet   *ifp;
4812
4813	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4814	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4815		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4816		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4817	}
4818	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4819	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4820	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4821	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4822
4823	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4824	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4825	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4826	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4827	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4828	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4829	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4830	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4831	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4832	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4833	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4834	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4835	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4836	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4837	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4838	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4839	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4840	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4841	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4842	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4843
4844	/* For the 64-bit byte counters the low dword must be read first. */
4845	/* Both registers clear on the read of the high dword */
4846
4847	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4848	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4849
4850	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4851	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4852	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4853	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4854	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4855
4856	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4857	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4858
4859	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4860	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4861	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4862	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4863	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4864	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4865	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4866	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4867	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4868	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4869
4870	if (adapter->hw.mac.type >= e1000_82543) {
4871		adapter->stats.algnerrc +=
4872		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4873		adapter->stats.rxerrc +=
4874		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4875		adapter->stats.tncrs +=
4876		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4877		adapter->stats.cexterr +=
4878		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4879		adapter->stats.tsctc +=
4880		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4881		adapter->stats.tsctfc +=
4882		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4883	}
4884	ifp = adapter->ifp;
4885
4886	ifp->if_collisions = adapter->stats.colc;
4887
4888	/* Rx Errors */
4889	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4890	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4891	    adapter->stats.ruc + adapter->stats.roc +
4892	    adapter->stats.mpc + adapter->stats.cexterr;
4893
4894	/* Tx Errors */
4895	ifp->if_oerrors = adapter->stats.ecol +
4896	    adapter->stats.latecol + adapter->watchdog_events;
4897}
4898
4899
4900/*
4901 * Add sysctl variables, one per statistic, to the system.
4902 */
4903static void
4904em_add_hw_stats(struct adapter *adapter)
4905{
4906
4907	device_t dev = adapter->dev;
4908
4909	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4910	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4911	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4912	struct e1000_hw_stats *stats = &adapter->stats;
4913
4914	struct sysctl_oid *stat_node, *int_node, *host_node;
4915	struct sysctl_oid_list *stat_list, *int_list, *host_list;
4916
4917	/* Driver Statistics */
4918	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4919			CTLFLAG_RD, &adapter->link_irq, 0,
4920			"Link MSIX IRQ Handled");
4921	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
4922			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
4923			 "Std mbuf failed");
4924	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4925			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4926			 "Std mbuf cluster failed");
4927	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4928			CTLFLAG_RD, &adapter->dropped_pkts,
4929			"Driver dropped packets");
4930	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4931			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4932			"Driver tx dma failure in xmit");
4933
4934	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4935			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4936			"Flow Control High Watermark");
4937	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4938			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4939			"Flow Control Low Watermark");
4940
4941	/* MAC stats get the own sub node */
4942
4943	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4944				    CTLFLAG_RD, NULL, "Statistics");
4945	stat_list = SYSCTL_CHILDREN(stat_node);
4946
4947	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4948			CTLFLAG_RD, &stats->ecol,
4949			"Excessive collisions");
4950	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4951			CTLFLAG_RD, &adapter->stats.symerrs,
4952			"Symbol Errors");
4953	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4954			CTLFLAG_RD, &adapter->stats.sec,
4955			"Sequence Errors");
4956	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
4957			CTLFLAG_RD, &adapter->stats.dc,
4958			"Defer Count");
4959	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4960			CTLFLAG_RD, &adapter->stats.mpc,
4961			"Missed Packets");
4962	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4963			CTLFLAG_RD, &adapter->stats.rnbc,
4964			"Receive No Buffers");
4965	/* RLEC is inaccurate on some hardware, calculate our own. */
4966/* 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_len_errs", */
4967/* 			CTLFLAG_RD, adapter->stats.roc + adapter->stats.ruc, */
4968/* 			"Receive Length Errors"); */
4969
4970	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4971			CTLFLAG_RD, &adapter->stats.rxerrc,
4972			"Receive Errors");
4973	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4974			CTLFLAG_RD, &adapter->stats.crcerrs,
4975			"CRC errors");
4976	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4977			CTLFLAG_RD, &adapter->stats.algnerrc,
4978			"Alignment Errors");
4979	/* On 82575 these are collision counts */
4980	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4981			CTLFLAG_RD, &adapter->stats.cexterr,
4982			"Collision/Carrier extension errors");
4983	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
4984			CTLFLAG_RD, &adapter->rx_overruns,
4985			"RX overruns");
4986	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
4987			CTLFLAG_RD, &adapter->watchdog_events,
4988			"Watchdog timeouts");
4989	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4990			CTLFLAG_RD, &adapter->stats.xonrxc,
4991			"XON Received");
4992	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4993			CTLFLAG_RD, &adapter->stats.xontxc,
4994			"XON Transmitted");
4995	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4996			CTLFLAG_RD, &adapter->stats.xoffrxc,
4997			"XOFF Received");
4998	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4999			CTLFLAG_RD, &adapter->stats.xofftxc,
5000			"XOFF Transmitted");
5001
5002	/* Packet Reception Stats */
5003	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5004			CTLFLAG_RD, &adapter->stats.tpr,
5005			"Total Packets Received ");
5006	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5007			CTLFLAG_RD, &adapter->stats.gprc,
5008			"Good Packets Received");
5009	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5010			CTLFLAG_RD, &adapter->stats.bprc,
5011			"Broadcast Packets Received");
5012	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5013			CTLFLAG_RD, &adapter->stats.mprc,
5014			"Multicast Packets Received");
5015	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5016			CTLFLAG_RD, &adapter->stats.prc64,
5017			"64 byte frames received ");
5018	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5019			CTLFLAG_RD, &adapter->stats.prc127,
5020			"65-127 byte frames received");
5021	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5022			CTLFLAG_RD, &adapter->stats.prc255,
5023			"128-255 byte frames received");
5024	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5025			CTLFLAG_RD, &adapter->stats.prc511,
5026			"256-511 byte frames received");
5027	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5028			CTLFLAG_RD, &adapter->stats.prc1023,
5029			"512-1023 byte frames received");
5030	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5031			CTLFLAG_RD, &adapter->stats.prc1522,
5032			"1023-1522 byte frames received");
5033 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5034 			CTLFLAG_RD, &adapter->stats.gorc,
5035 			"Good Octets Received");
5036
5037	/* Packet Transmission Stats */
5038 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5039 			CTLFLAG_RD, &adapter->stats.gotc,
5040 			"Good Octest Transmitted");
5041	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5042			CTLFLAG_RD, &adapter->stats.tpt,
5043			"Total Packets Transmitted");
5044	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5045			CTLFLAG_RD, &adapter->stats.gptc,
5046			"Good Packets Transmitted");
5047	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5048			CTLFLAG_RD, &adapter->stats.bptc,
5049			"Broadcast Packets Transmitted");
5050	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5051			CTLFLAG_RD, &adapter->stats.mptc,
5052			"Multicast Packets Transmitted");
5053	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5054			CTLFLAG_RD, &adapter->stats.ptc64,
5055			"64 byte frames transmitted ");
5056	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5057			CTLFLAG_RD, &adapter->stats.ptc127,
5058			"65-127 byte frames transmitted");
5059	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5060			CTLFLAG_RD, &adapter->stats.ptc255,
5061			"128-255 byte frames transmitted");
5062	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5063			CTLFLAG_RD, &adapter->stats.ptc511,
5064			"256-511 byte frames transmitted");
5065	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5066			CTLFLAG_RD, &adapter->stats.ptc1023,
5067			"512-1023 byte frames transmitted");
5068	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5069			CTLFLAG_RD, &adapter->stats.ptc1522,
5070			"1024-1522 byte frames transmitted");
5071	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5072			CTLFLAG_RD, &adapter->stats.tsctc,
5073			"TSO Contexts Transmitted");
5074	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5075			CTLFLAG_RD, &adapter->stats.tsctfc,
5076			"TSO Contexts Failed");
5077
5078
5079	/* Interrupt Stats */
5080
5081	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5082				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5083	int_list = SYSCTL_CHILDREN(int_node);
5084
5085	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5086			CTLFLAG_RD, &adapter->stats.iac,
5087			"Interrupt Assertion Count");
5088
5089	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5090			CTLFLAG_RD, &adapter->stats.icrxptc,
5091			"Interrupt Cause Rx Pkt Timer Expire Count");
5092
5093	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5094			CTLFLAG_RD, &adapter->stats.icrxatc,
5095			"Interrupt Cause Rx Abs Timer Expire Count");
5096
5097	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5098			CTLFLAG_RD, &adapter->stats.ictxptc,
5099			"Interrupt Cause Tx Pkt Timer Expire Count");
5100
5101	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5102			CTLFLAG_RD, &adapter->stats.ictxatc,
5103			"Interrupt Cause Tx Abs Timer Expire Count");
5104
5105	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5106			CTLFLAG_RD, &adapter->stats.ictxqec,
5107			"Interrupt Cause Tx Queue Empty Count");
5108
5109	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5110			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5111			"Interrupt Cause Tx Queue Min Thresh Count");
5112
5113	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5114			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5115			"Interrupt Cause Rx Desc Min Thresh Count");
5116
5117	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5118			CTLFLAG_RD, &adapter->stats.icrxoc,
5119			"Interrupt Cause Receiver Overrun Count");
5120
5121	/* Host to Card Stats */
5122
5123	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5124				    CTLFLAG_RD, NULL,
5125				    "Host to Card Statistics");
5126
5127	host_list = SYSCTL_CHILDREN(host_node);
5128
5129	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5130			CTLFLAG_RD, &adapter->stats.cbtmpc,
5131			"Circuit Breaker Tx Packet Count");
5132
5133	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5134			CTLFLAG_RD, &adapter->stats.htdpmc,
5135			"Host Transmit Discarded Packets");
5136
5137	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5138			CTLFLAG_RD, &adapter->stats.rpthc,
5139			"Rx Packets To Host");
5140
5141	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5142			CTLFLAG_RD, &adapter->stats.cbrmpc,
5143			"Circuit Breaker Rx Packet Count");
5144
5145	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5146			CTLFLAG_RD, &adapter->stats.cbrdpc,
5147			"Circuit Breaker Rx Dropped Count");
5148
5149	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5150			CTLFLAG_RD, &adapter->stats.hgptc,
5151			"Host Good Packets Tx Count");
5152
5153	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5154			CTLFLAG_RD, &adapter->stats.htcbdpc,
5155			"Host Tx Circuit Breaker Dropped Count");
5156
5157	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5158			CTLFLAG_RD, &adapter->stats.hgorc,
5159			"Host Good Octets Received Count");
5160
5161	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5162			CTLFLAG_RD, &adapter->stats.hgotc,
5163			"Host Good Octets Transmit Count");
5164
5165	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5166			CTLFLAG_RD, &adapter->stats.lenerrs,
5167			"Length Errors");
5168
5169	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5170			CTLFLAG_RD, &adapter->stats.scvpc,
5171			"SerDes/SGMII Code Violation Pkt Count");
5172
5173	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5174			CTLFLAG_RD, &adapter->stats.hrmpc,
5175			"Header Redirection Missed Packet Count");
5176
5177}
5178
5179/**********************************************************************
5180 *
5181 *  This routine provides a way to dump out the adapter eeprom,
5182 *  often a useful debug/service tool. This only dumps the first
5183 *  32 words, stuff that matters is in that extent.
5184 *
5185 **********************************************************************/
5186static int
5187em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5188{
5189	struct adapter *adapter;
5190	int error;
5191	int result;
5192
5193	result = -1;
5194	error = sysctl_handle_int(oidp, &result, 0, req);
5195
5196	if (error || !req->newptr)
5197		return (error);
5198
5199	/*
5200	 * This value will cause a hex dump of the
5201	 * first 32 16-bit words of the EEPROM to
5202	 * the screen.
5203	 */
5204	if (result == 1) {
5205		adapter = (struct adapter *)arg1;
5206		em_print_nvm_info(adapter);
5207        }
5208
5209	return (error);
5210}
5211
5212static void
5213em_print_nvm_info(struct adapter *adapter)
5214{
5215	u16	eeprom_data;
5216	int	i, j, row = 0;
5217
5218	/* Its a bit crude, but it gets the job done */
5219	printf("\nInterface EEPROM Dump:\n");
5220	printf("Offset\n0x0000  ");
5221	for (i = 0, j = 0; i < 32; i++, j++) {
5222		if (j == 8) { /* Make the offset block */
5223			j = 0; ++row;
5224			printf("\n0x00%x0  ",row);
5225		}
5226		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5227		printf("%04x ", eeprom_data);
5228	}
5229	printf("\n");
5230}
5231
5232static int
5233em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5234{
5235	struct em_int_delay_info *info;
5236	struct adapter *adapter;
5237	u32 regval;
5238	int error, usecs, ticks;
5239
5240	info = (struct em_int_delay_info *)arg1;
5241	usecs = info->value;
5242	error = sysctl_handle_int(oidp, &usecs, 0, req);
5243	if (error != 0 || req->newptr == NULL)
5244		return (error);
5245	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5246		return (EINVAL);
5247	info->value = usecs;
5248	ticks = EM_USECS_TO_TICKS(usecs);
5249
5250	adapter = info->adapter;
5251
5252	EM_CORE_LOCK(adapter);
5253	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5254	regval = (regval & ~0xffff) | (ticks & 0xffff);
5255	/* Handle a few special cases. */
5256	switch (info->offset) {
5257	case E1000_RDTR:
5258		break;
5259	case E1000_TIDV:
5260		if (ticks == 0) {
5261			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5262			/* Don't write 0 into the TIDV register. */
5263			regval++;
5264		} else
5265			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5266		break;
5267	}
5268	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5269	EM_CORE_UNLOCK(adapter);
5270	return (0);
5271}
5272
5273static void
5274em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5275	const char *description, struct em_int_delay_info *info,
5276	int offset, int value)
5277{
5278	info->adapter = adapter;
5279	info->offset = offset;
5280	info->value = value;
5281	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5282	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5283	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5284	    info, 0, em_sysctl_int_delay, "I", description);
5285}
5286
5287static void
5288em_add_rx_process_limit(struct adapter *adapter, const char *name,
5289	const char *description, int *limit, int value)
5290{
5291	*limit = value;
5292	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5293	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5294	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5295}
5296
5297
5298