if_em.c revision 206403
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 206403 2010-04-08 19:13:42Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.2";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#if __FreeBSD_version >= 800000
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350static int em_enable_msix = TRUE;
351static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359/* Flow control setting - default to FULL */
360static int em_fc_setting = e1000_fc_full;
361TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363/*
364** Shadow VFTA table, this is needed because
365** the real vlan filter table gets cleared during
366** a soft reset and the driver needs to be able
367** to repopulate it.
368*/
369static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371/* Global used in WOL setup with multiport cards */
372static int global_quad_port_a = 0;
373
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  em_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384em_probe(device_t dev)
385{
386	char		adapter_name[60];
387	u16		pci_vendor_id = 0;
388	u16		pci_device_id = 0;
389	u16		pci_subvendor_id = 0;
390	u16		pci_subdevice_id = 0;
391	em_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("em_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != EM_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = em_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				em_strings[ent->index],
415				em_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436em_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447	/* SYSCTL stuff */
448	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451	    em_sysctl_debug_info, "I", "Debug Information");
452
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_stats, "I", "Statistics");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/* Copy the permanent MAC address out of the EEPROM */
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610
611	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	**  Do interrupt configuration
619	*/
620	if (adapter->msix > 1) /* Do MSIX */
621		error = em_allocate_msix(adapter);
622	else  /* MSI or Legacy */
623		error = em_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/*
628	 * Get Wake-on-Lan and Management info for later use
629	 */
630	em_get_wakeup(dev);
631
632	/* Setup OS specific network interface */
633	em_setup_interface(dev, adapter);
634
635	em_reset(adapter);
636
637	/* Initialize statistics */
638	em_update_stats_counters(adapter);
639
640	adapter->hw.mac.get_link_status = 1;
641	em_update_link_status(adapter);
642
643	/* Indicate SOL/IDER usage */
644	if (e1000_check_reset_block(&adapter->hw))
645		device_printf(dev,
646		    "PHY reset is blocked due to SOL/IDER session.\n");
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	EM_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	em_stop(adapter);
711	EM_CORE_UNLOCK(adapter);
712	EM_CORE_LOCK_DESTROY(adapter);
713
714	e1000_phy_hw_reset(&adapter->hw);
715
716	em_release_manageability(adapter);
717	em_release_hw_control(adapter);
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	ether_ifdetach(adapter->ifp);
726	callout_drain(&adapter->timer);
727
728	em_free_pci_resources(adapter);
729	bus_generic_detach(dev);
730	if_free(ifp);
731
732	em_free_transmit_structures(adapter);
733	em_free_receive_structures(adapter);
734
735	em_release_hw_control(adapter);
736
737	return (0);
738}
739
740/*********************************************************************
741 *
742 *  Shutdown entry point
743 *
744 **********************************************************************/
745
746static int
747em_shutdown(device_t dev)
748{
749	return em_suspend(dev);
750}
751
752/*
753 * Suspend/resume device methods.
754 */
755static int
756em_suspend(device_t dev)
757{
758	struct adapter *adapter = device_get_softc(dev);
759
760	EM_CORE_LOCK(adapter);
761
762        em_release_manageability(adapter);
763	em_release_hw_control(adapter);
764	em_enable_wakeup(dev);
765
766	EM_CORE_UNLOCK(adapter);
767
768	return bus_generic_suspend(dev);
769}
770
771static int
772em_resume(device_t dev)
773{
774	struct adapter *adapter = device_get_softc(dev);
775	struct ifnet *ifp = adapter->ifp;
776
777	if (adapter->led_dev != NULL)
778		led_destroy(adapter->led_dev);
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#if __FreeBSD_version >= 800000
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815	enq = 0;
816	if (m == NULL) {
817		next = drbr_dequeue(ifp, txr->br);
818	} else if (drbr_needs_enqueue(ifp, txr->br)) {
819		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
820			return (err);
821		next = drbr_dequeue(ifp, txr->br);
822	} else
823		next = m;
824
825	/* Process the queue */
826	while (next != NULL) {
827		if ((err = em_xmit(txr, &next)) != 0) {
828                        if (next != NULL)
829                                err = drbr_enqueue(ifp, txr->br, next);
830                        break;
831		}
832		enq++;
833		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
834		ETHER_BPF_MTAP(ifp, next);
835		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
836                        break;
837		next = drbr_dequeue(ifp, txr->br);
838	}
839
840	if (enq > 0) {
841                /* Set the watchdog */
842                txr->watchdog_check = TRUE;
843	}
844	return (err);
845}
846
847/*
848** Multiqueue capable stack interface, this is not
849** yet truely multiqueue, but that is coming...
850*/
851static int
852em_mq_start(struct ifnet *ifp, struct mbuf *m)
853{
854	struct adapter	*adapter = ifp->if_softc;
855	struct tx_ring	*txr;
856	int 		i, error = 0;
857
858	/* Which queue to use */
859	if ((m->m_flags & M_FLOWID) != 0)
860                i = m->m_pkthdr.flowid % adapter->num_queues;
861	else
862		i = curcpu % adapter->num_queues;
863
864	txr = &adapter->tx_rings[i];
865
866	if (EM_TX_TRYLOCK(txr)) {
867		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
868			error = em_mq_start_locked(ifp, txr, m);
869		EM_TX_UNLOCK(txr);
870	} else
871		error = drbr_enqueue(ifp, txr->br, m);
872
873	return (error);
874}
875
876/*
877** Flush all ring buffers
878*/
879static void
880em_qflush(struct ifnet *ifp)
881{
882	struct adapter  *adapter = ifp->if_softc;
883	struct tx_ring  *txr = adapter->tx_rings;
884	struct mbuf     *m;
885
886	for (int i = 0; i < adapter->num_queues; i++, txr++) {
887		EM_TX_LOCK(txr);
888		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
889			m_freem(m);
890		EM_TX_UNLOCK(txr);
891	}
892	if_qflush(ifp);
893}
894
895#endif /* FreeBSD_version */
896
897static void
898em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
899{
900	struct adapter	*adapter = ifp->if_softc;
901	struct mbuf	*m_head;
902
903	EM_TX_LOCK_ASSERT(txr);
904
905	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
906	    IFF_DRV_RUNNING)
907		return;
908
909	if (!adapter->link_active)
910		return;
911
912	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
913
914                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
915		if (m_head == NULL)
916			break;
917		/*
918		 *  Encapsulation can modify our pointer, and or make it
919		 *  NULL on failure.  In that event, we can't requeue.
920		 */
921		if (em_xmit(txr, &m_head)) {
922			if (m_head == NULL)
923				break;
924			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
925			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
926			break;
927		}
928
929		/* Send a copy of the frame to the BPF listener */
930		ETHER_BPF_MTAP(ifp, m_head);
931
932		/* Set timeout in case hardware has problems transmitting. */
933		txr->watchdog_check = TRUE;
934	}
935
936	return;
937}
938
939static void
940em_start(struct ifnet *ifp)
941{
942	struct adapter	*adapter = ifp->if_softc;
943	struct tx_ring	*txr = adapter->tx_rings;
944
945	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
946		EM_TX_LOCK(txr);
947		em_start_locked(ifp, txr);
948		EM_TX_UNLOCK(txr);
949	}
950	return;
951}
952
953/*********************************************************************
954 *  Ioctl entry point
955 *
956 *  em_ioctl is called when the user wants to configure the
957 *  interface.
958 *
959 *  return 0 on success, positive on failure
960 **********************************************************************/
961
962static int
963em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
964{
965	struct adapter	*adapter = ifp->if_softc;
966	struct ifreq *ifr = (struct ifreq *)data;
967#ifdef INET
968	struct ifaddr *ifa = (struct ifaddr *)data;
969#endif
970	int error = 0;
971
972	if (adapter->in_detach)
973		return (error);
974
975	switch (command) {
976	case SIOCSIFADDR:
977#ifdef INET
978		if (ifa->ifa_addr->sa_family == AF_INET) {
979			/*
980			 * XXX
981			 * Since resetting hardware takes a very long time
982			 * and results in link renegotiation we only
983			 * initialize the hardware only when it is absolutely
984			 * required.
985			 */
986			ifp->if_flags |= IFF_UP;
987			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
988				EM_CORE_LOCK(adapter);
989				em_init_locked(adapter);
990				EM_CORE_UNLOCK(adapter);
991			}
992			arp_ifinit(ifp, ifa);
993		} else
994#endif
995			error = ether_ioctl(ifp, command, data);
996		break;
997	case SIOCSIFMTU:
998	    {
999		int max_frame_size;
1000
1001		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1002
1003		EM_CORE_LOCK(adapter);
1004		switch (adapter->hw.mac.type) {
1005		case e1000_82571:
1006		case e1000_82572:
1007		case e1000_ich9lan:
1008		case e1000_ich10lan:
1009		case e1000_82574:
1010		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1011			max_frame_size = 9234;
1012			break;
1013		case e1000_pchlan:
1014			max_frame_size = 4096;
1015			break;
1016			/* Adapters that do not support jumbo frames */
1017		case e1000_82583:
1018		case e1000_ich8lan:
1019			max_frame_size = ETHER_MAX_LEN;
1020			break;
1021		default:
1022			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1023		}
1024		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1025		    ETHER_CRC_LEN) {
1026			EM_CORE_UNLOCK(adapter);
1027			error = EINVAL;
1028			break;
1029		}
1030
1031		ifp->if_mtu = ifr->ifr_mtu;
1032		adapter->max_frame_size =
1033		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1034		em_init_locked(adapter);
1035		EM_CORE_UNLOCK(adapter);
1036		break;
1037	    }
1038	case SIOCSIFFLAGS:
1039		IOCTL_DEBUGOUT("ioctl rcv'd:\
1040		    SIOCSIFFLAGS (Set Interface Flags)");
1041		EM_CORE_LOCK(adapter);
1042		if (ifp->if_flags & IFF_UP) {
1043			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1044				if ((ifp->if_flags ^ adapter->if_flags) &
1045				    (IFF_PROMISC | IFF_ALLMULTI)) {
1046					em_disable_promisc(adapter);
1047					em_set_promisc(adapter);
1048				}
1049			} else
1050				em_init_locked(adapter);
1051		} else
1052			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1053				em_stop(adapter);
1054		adapter->if_flags = ifp->if_flags;
1055		EM_CORE_UNLOCK(adapter);
1056		break;
1057	case SIOCADDMULTI:
1058	case SIOCDELMULTI:
1059		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1060		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1061			EM_CORE_LOCK(adapter);
1062			em_disable_intr(adapter);
1063			em_set_multi(adapter);
1064#ifdef DEVICE_POLLING
1065			if (!(ifp->if_capenable & IFCAP_POLLING))
1066#endif
1067				em_enable_intr(adapter);
1068			EM_CORE_UNLOCK(adapter);
1069		}
1070		break;
1071	case SIOCSIFMEDIA:
1072		/* Check SOL/IDER usage */
1073		EM_CORE_LOCK(adapter);
1074		if (e1000_check_reset_block(&adapter->hw)) {
1075			EM_CORE_UNLOCK(adapter);
1076			device_printf(adapter->dev, "Media change is"
1077			    " blocked due to SOL/IDER session.\n");
1078			break;
1079		}
1080		EM_CORE_UNLOCK(adapter);
1081	case SIOCGIFMEDIA:
1082		IOCTL_DEBUGOUT("ioctl rcv'd: \
1083		    SIOCxIFMEDIA (Get/Set Interface Media)");
1084		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1085		break;
1086	case SIOCSIFCAP:
1087	    {
1088		int mask, reinit;
1089
1090		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1091		reinit = 0;
1092		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1093#ifdef DEVICE_POLLING
1094		if (mask & IFCAP_POLLING) {
1095			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1096				error = ether_poll_register(em_poll, ifp);
1097				if (error)
1098					return (error);
1099				EM_CORE_LOCK(adapter);
1100				em_disable_intr(adapter);
1101				ifp->if_capenable |= IFCAP_POLLING;
1102				EM_CORE_UNLOCK(adapter);
1103			} else {
1104				error = ether_poll_deregister(ifp);
1105				/* Enable interrupt even in error case */
1106				EM_CORE_LOCK(adapter);
1107				em_enable_intr(adapter);
1108				ifp->if_capenable &= ~IFCAP_POLLING;
1109				EM_CORE_UNLOCK(adapter);
1110			}
1111		}
1112#endif
1113		if (mask & IFCAP_HWCSUM) {
1114			ifp->if_capenable ^= IFCAP_HWCSUM;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_TSO4) {
1118			ifp->if_capenable ^= IFCAP_TSO4;
1119			reinit = 1;
1120		}
1121		if (mask & IFCAP_VLAN_HWTAGGING) {
1122			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1123			reinit = 1;
1124		}
1125		if (mask & IFCAP_VLAN_HWFILTER) {
1126			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1127			reinit = 1;
1128		}
1129		if ((mask & IFCAP_WOL) &&
1130		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1131			if (mask & IFCAP_WOL_MCAST)
1132				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1133			if (mask & IFCAP_WOL_MAGIC)
1134				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1135		}
1136		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1137			em_init(adapter);
1138		VLAN_CAPABILITIES(ifp);
1139		break;
1140	    }
1141
1142	default:
1143		error = ether_ioctl(ifp, command, data);
1144		break;
1145	}
1146
1147	return (error);
1148}
1149
1150
1151/*********************************************************************
1152 *  Init entry point
1153 *
1154 *  This routine is used in two ways. It is used by the stack as
1155 *  init entry point in network interface structure. It is also used
1156 *  by the driver as a hw/sw initialization routine to get to a
1157 *  consistent state.
1158 *
1159 *  return 0 on success, positive on failure
1160 **********************************************************************/
1161
1162static void
1163em_init_locked(struct adapter *adapter)
1164{
1165	struct ifnet	*ifp = adapter->ifp;
1166	device_t	dev = adapter->dev;
1167	u32		pba;
1168
1169	INIT_DEBUGOUT("em_init: begin");
1170
1171	EM_CORE_LOCK_ASSERT(adapter);
1172
1173	em_disable_intr(adapter);
1174	callout_stop(&adapter->timer);
1175
1176	/*
1177	 * Packet Buffer Allocation (PBA)
1178	 * Writing PBA sets the receive portion of the buffer
1179	 * the remainder is used for the transmit buffer.
1180	 */
1181	switch (adapter->hw.mac.type) {
1182	/* Total Packet Buffer on these is 48K */
1183	case e1000_82571:
1184	case e1000_82572:
1185	case e1000_80003es2lan:
1186			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1187		break;
1188	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1189			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1190		break;
1191	case e1000_82574:
1192	case e1000_82583:
1193			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1194		break;
1195	case e1000_ich9lan:
1196	case e1000_ich10lan:
1197	case e1000_pchlan:
1198		pba = E1000_PBA_10K;
1199		break;
1200	case e1000_ich8lan:
1201		pba = E1000_PBA_8K;
1202		break;
1203	default:
1204		if (adapter->max_frame_size > 8192)
1205			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1206		else
1207			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1208	}
1209
1210	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1211	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1212
1213	/* Get the latest mac address, User can use a LAA */
1214        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1215              ETHER_ADDR_LEN);
1216
1217	/* Put the address into the Receive Address Array */
1218	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1219
1220	/*
1221	 * With the 82571 adapter, RAR[0] may be overwritten
1222	 * when the other port is reset, we make a duplicate
1223	 * in RAR[14] for that eventuality, this assures
1224	 * the interface continues to function.
1225	 */
1226	if (adapter->hw.mac.type == e1000_82571) {
1227		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1228		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1229		    E1000_RAR_ENTRIES - 1);
1230	}
1231
1232	/* Initialize the hardware */
1233	em_reset(adapter);
1234	em_update_link_status(adapter);
1235
1236	/* Setup VLAN support, basic and offload if available */
1237	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1238
1239	/* Use real VLAN Filter support? */
1240	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1241		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1242			/* Use real VLAN Filter support */
1243			em_setup_vlan_hw_support(adapter);
1244		else {
1245			u32 ctrl;
1246			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1247			ctrl |= E1000_CTRL_VME;
1248			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1249		}
1250	}
1251
1252	/* Set hardware offload abilities */
1253	ifp->if_hwassist = 0;
1254	if (ifp->if_capenable & IFCAP_TXCSUM)
1255		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1256	if (ifp->if_capenable & IFCAP_TSO4)
1257		ifp->if_hwassist |= CSUM_TSO;
1258
1259	/* Configure for OS presence */
1260	em_init_manageability(adapter);
1261
1262	/* Prepare transmit descriptors and buffers */
1263	em_setup_transmit_structures(adapter);
1264	em_initialize_transmit_unit(adapter);
1265
1266	/* Setup Multicast table */
1267	em_set_multi(adapter);
1268
1269	/* Prepare receive descriptors and buffers */
1270	if (em_setup_receive_structures(adapter)) {
1271		device_printf(dev, "Could not setup receive structures\n");
1272		em_stop(adapter);
1273		return;
1274	}
1275	em_initialize_receive_unit(adapter);
1276
1277	/* Don't lose promiscuous settings */
1278	em_set_promisc(adapter);
1279
1280	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1281	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1282
1283	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1284	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1285
1286	/* MSI/X configuration for 82574 */
1287	if (adapter->hw.mac.type == e1000_82574) {
1288		int tmp;
1289		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1290		tmp |= E1000_CTRL_EXT_PBA_CLR;
1291		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1292		/* Set the IVAR - interrupt vector routing. */
1293		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1294	}
1295
1296#ifdef DEVICE_POLLING
1297	/*
1298	 * Only enable interrupts if we are not polling, make sure
1299	 * they are off otherwise.
1300	 */
1301	if (ifp->if_capenable & IFCAP_POLLING)
1302		em_disable_intr(adapter);
1303	else
1304#endif /* DEVICE_POLLING */
1305		em_enable_intr(adapter);
1306
1307	/* AMT based hardware can now take control from firmware */
1308	if (adapter->has_manage && adapter->has_amt)
1309		em_get_hw_control(adapter);
1310
1311	/* Don't reset the phy next time init gets called */
1312	adapter->hw.phy.reset_disable = TRUE;
1313}
1314
1315static void
1316em_init(void *arg)
1317{
1318	struct adapter *adapter = arg;
1319
1320	EM_CORE_LOCK(adapter);
1321	em_init_locked(adapter);
1322	EM_CORE_UNLOCK(adapter);
1323}
1324
1325
1326#ifdef DEVICE_POLLING
1327/*********************************************************************
1328 *
1329 *  Legacy polling routine: note this only works with single queue
1330 *
1331 *********************************************************************/
1332static int
1333em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1334{
1335	struct adapter *adapter = ifp->if_softc;
1336	struct tx_ring	*txr = adapter->tx_rings;
1337	struct rx_ring	*rxr = adapter->rx_rings;
1338	u32		reg_icr, rx_done = 0;
1339
1340	EM_CORE_LOCK(adapter);
1341	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1342		EM_CORE_UNLOCK(adapter);
1343		return (rx_done);
1344	}
1345
1346	if (cmd == POLL_AND_CHECK_STATUS) {
1347		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1348		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1349			callout_stop(&adapter->timer);
1350			adapter->hw.mac.get_link_status = 1;
1351			em_update_link_status(adapter);
1352			callout_reset(&adapter->timer, hz,
1353			    em_local_timer, adapter);
1354		}
1355	}
1356	EM_CORE_UNLOCK(adapter);
1357
1358	rx_done = em_rxeof(rxr, count);
1359
1360	EM_TX_LOCK(txr);
1361	em_txeof(txr);
1362#if __FreeBSD_version >= 800000
1363	if (!drbr_empty(ifp, txr->br))
1364		em_mq_start_locked(ifp, txr, NULL);
1365#else
1366	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1367		em_start_locked(ifp, txr);
1368#endif
1369	EM_TX_UNLOCK(txr);
1370
1371	return (rx_done);
1372}
1373#endif /* DEVICE_POLLING */
1374
1375
1376/*********************************************************************
1377 *
1378 *  Fast Legacy/MSI Combined Interrupt Service routine
1379 *
1380 *********************************************************************/
1381static int
1382em_irq_fast(void *arg)
1383{
1384	struct adapter	*adapter = arg;
1385	struct ifnet	*ifp;
1386	u32		reg_icr;
1387
1388	ifp = adapter->ifp;
1389
1390	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1391
1392	/* Hot eject?  */
1393	if (reg_icr == 0xffffffff)
1394		return FILTER_STRAY;
1395
1396	/* Definitely not our interrupt.  */
1397	if (reg_icr == 0x0)
1398		return FILTER_STRAY;
1399
1400	/*
1401	 * Starting with the 82571 chip, bit 31 should be used to
1402	 * determine whether the interrupt belongs to us.
1403	 */
1404	if (adapter->hw.mac.type >= e1000_82571 &&
1405	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1406		return FILTER_STRAY;
1407
1408	em_disable_intr(adapter);
1409	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1410
1411	/* Link status change */
1412	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1413		adapter->hw.mac.get_link_status = 1;
1414		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1415	}
1416
1417	if (reg_icr & E1000_ICR_RXO)
1418		adapter->rx_overruns++;
1419	return FILTER_HANDLED;
1420}
1421
1422/* Combined RX/TX handler, used by Legacy and MSI */
1423static void
1424em_handle_que(void *context, int pending)
1425{
1426	struct adapter	*adapter = context;
1427	struct ifnet	*ifp = adapter->ifp;
1428	struct tx_ring	*txr = adapter->tx_rings;
1429	struct rx_ring	*rxr = adapter->rx_rings;
1430	u32		loop = EM_MAX_LOOP;
1431	bool		more_rx, more_tx;
1432
1433
1434	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1435		EM_TX_LOCK(txr);
1436		do {
1437			more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1438			more_tx = em_txeof(txr);
1439		} while (loop-- && (more_rx || more_tx));
1440
1441#if __FreeBSD_version >= 800000
1442		if (!drbr_empty(ifp, txr->br))
1443			em_mq_start_locked(ifp, txr, NULL);
1444#else
1445		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1446			em_start_locked(ifp, txr);
1447#endif
1448		if (more_rx || more_tx)
1449			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1450
1451		EM_TX_UNLOCK(txr);
1452	}
1453
1454	em_enable_intr(adapter);
1455	return;
1456}
1457
1458
1459/*********************************************************************
1460 *
1461 *  MSIX Interrupt Service Routines
1462 *
1463 **********************************************************************/
1464static void
1465em_msix_tx(void *arg)
1466{
1467	struct tx_ring *txr = arg;
1468	struct adapter *adapter = txr->adapter;
1469	bool		more;
1470
1471	++txr->tx_irq;
1472	EM_TX_LOCK(txr);
1473	more = em_txeof(txr);
1474	EM_TX_UNLOCK(txr);
1475	if (more)
1476		taskqueue_enqueue(txr->tq, &txr->tx_task);
1477	else
1478		/* Reenable this interrupt */
1479		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1480	return;
1481}
1482
1483/*********************************************************************
1484 *
1485 *  MSIX RX Interrupt Service routine
1486 *
1487 **********************************************************************/
1488
1489static void
1490em_msix_rx(void *arg)
1491{
1492	struct rx_ring	*rxr = arg;
1493	struct adapter	*adapter = rxr->adapter;
1494	bool		more;
1495
1496	++rxr->rx_irq;
1497	more = em_rxeof(rxr, adapter->rx_process_limit);
1498	if (more)
1499		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1500	else
1501		/* Reenable this interrupt */
1502		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1503	return;
1504}
1505
1506/*********************************************************************
1507 *
1508 *  MSIX Link Fast Interrupt Service routine
1509 *
1510 **********************************************************************/
1511static void
1512em_msix_link(void *arg)
1513{
1514	struct adapter	*adapter = arg;
1515	u32		reg_icr;
1516
1517	++adapter->link_irq;
1518	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1519
1520	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1521		adapter->hw.mac.get_link_status = 1;
1522		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1523	} else
1524		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1525		    EM_MSIX_LINK | E1000_IMS_LSC);
1526	return;
1527}
1528
1529static void
1530em_handle_rx(void *context, int pending)
1531{
1532	struct rx_ring	*rxr = context;
1533	struct adapter	*adapter = rxr->adapter;
1534	u32		loop = EM_MAX_LOOP;
1535        bool            more;
1536
1537        do {
1538		more = em_rxeof(rxr, adapter->rx_process_limit);
1539        } while (loop-- && more);
1540        /* Reenable this interrupt */
1541	E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1542}
1543
1544static void
1545em_handle_tx(void *context, int pending)
1546{
1547	struct tx_ring	*txr = context;
1548	struct adapter	*adapter = txr->adapter;
1549	struct ifnet	*ifp = adapter->ifp;
1550	u32		loop = EM_MAX_LOOP;
1551        bool            more;
1552
1553	if (!EM_TX_TRYLOCK(txr))
1554		return;
1555	do {
1556		more = em_txeof(txr);
1557	} while (loop-- && more);
1558
1559#if __FreeBSD_version >= 800000
1560	if (!drbr_empty(ifp, txr->br))
1561		em_mq_start_locked(ifp, txr, NULL);
1562#else
1563	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1564		em_start_locked(ifp, txr);
1565#endif
1566	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1567	EM_TX_UNLOCK(txr);
1568}
1569
1570static void
1571em_handle_link(void *context, int pending)
1572{
1573	struct adapter	*adapter = context;
1574	struct ifnet *ifp = adapter->ifp;
1575
1576	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1577		return;
1578
1579	EM_CORE_LOCK(adapter);
1580	callout_stop(&adapter->timer);
1581	em_update_link_status(adapter);
1582	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1583	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1584	    EM_MSIX_LINK | E1000_IMS_LSC);
1585	EM_CORE_UNLOCK(adapter);
1586}
1587
1588
1589/*********************************************************************
1590 *
1591 *  Media Ioctl callback
1592 *
1593 *  This routine is called whenever the user queries the status of
1594 *  the interface using ifconfig.
1595 *
1596 **********************************************************************/
1597static void
1598em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1599{
1600	struct adapter *adapter = ifp->if_softc;
1601	u_char fiber_type = IFM_1000_SX;
1602
1603	INIT_DEBUGOUT("em_media_status: begin");
1604
1605	EM_CORE_LOCK(adapter);
1606	em_update_link_status(adapter);
1607
1608	ifmr->ifm_status = IFM_AVALID;
1609	ifmr->ifm_active = IFM_ETHER;
1610
1611	if (!adapter->link_active) {
1612		EM_CORE_UNLOCK(adapter);
1613		return;
1614	}
1615
1616	ifmr->ifm_status |= IFM_ACTIVE;
1617
1618	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1619	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1620		ifmr->ifm_active |= fiber_type | IFM_FDX;
1621	} else {
1622		switch (adapter->link_speed) {
1623		case 10:
1624			ifmr->ifm_active |= IFM_10_T;
1625			break;
1626		case 100:
1627			ifmr->ifm_active |= IFM_100_TX;
1628			break;
1629		case 1000:
1630			ifmr->ifm_active |= IFM_1000_T;
1631			break;
1632		}
1633		if (adapter->link_duplex == FULL_DUPLEX)
1634			ifmr->ifm_active |= IFM_FDX;
1635		else
1636			ifmr->ifm_active |= IFM_HDX;
1637	}
1638	EM_CORE_UNLOCK(adapter);
1639}
1640
1641/*********************************************************************
1642 *
1643 *  Media Ioctl callback
1644 *
1645 *  This routine is called when the user changes speed/duplex using
1646 *  media/mediopt option with ifconfig.
1647 *
1648 **********************************************************************/
1649static int
1650em_media_change(struct ifnet *ifp)
1651{
1652	struct adapter *adapter = ifp->if_softc;
1653	struct ifmedia  *ifm = &adapter->media;
1654
1655	INIT_DEBUGOUT("em_media_change: begin");
1656
1657	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1658		return (EINVAL);
1659
1660	EM_CORE_LOCK(adapter);
1661	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1662	case IFM_AUTO:
1663		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1664		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1665		break;
1666	case IFM_1000_LX:
1667	case IFM_1000_SX:
1668	case IFM_1000_T:
1669		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1670		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1671		break;
1672	case IFM_100_TX:
1673		adapter->hw.mac.autoneg = FALSE;
1674		adapter->hw.phy.autoneg_advertised = 0;
1675		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1676			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1677		else
1678			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1679		break;
1680	case IFM_10_T:
1681		adapter->hw.mac.autoneg = FALSE;
1682		adapter->hw.phy.autoneg_advertised = 0;
1683		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1684			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1685		else
1686			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1687		break;
1688	default:
1689		device_printf(adapter->dev, "Unsupported media type\n");
1690	}
1691
1692	/* As the speed/duplex settings my have changed we need to
1693	 * reset the PHY.
1694	 */
1695	adapter->hw.phy.reset_disable = FALSE;
1696
1697	em_init_locked(adapter);
1698	EM_CORE_UNLOCK(adapter);
1699
1700	return (0);
1701}
1702
1703/*********************************************************************
1704 *
1705 *  This routine maps the mbufs to tx descriptors.
1706 *
1707 *  return 0 on success, positive on failure
1708 **********************************************************************/
1709
1710static int
1711em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1712{
1713	struct adapter		*adapter = txr->adapter;
1714	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1715	bus_dmamap_t		map;
1716	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1717	struct e1000_tx_desc	*ctxd = NULL;
1718	struct mbuf		*m_head;
1719	u32			txd_upper, txd_lower, txd_used, txd_saved;
1720	int			nsegs, i, j, first, last = 0;
1721	int			error, do_tso, tso_desc = 0;
1722
1723	m_head = *m_headp;
1724	txd_upper = txd_lower = txd_used = txd_saved = 0;
1725	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1726
1727	/*
1728	 * TSO workaround:
1729	 *  If an mbuf is only header we need
1730	 *     to pull 4 bytes of data into it.
1731	 */
1732	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1733		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1734		*m_headp = m_head;
1735		if (m_head == NULL)
1736			return (ENOBUFS);
1737	}
1738
1739	/*
1740	 * Map the packet for DMA
1741	 *
1742	 * Capture the first descriptor index,
1743	 * this descriptor will have the index
1744	 * of the EOP which is the only one that
1745	 * now gets a DONE bit writeback.
1746	 */
1747	first = txr->next_avail_desc;
1748	tx_buffer = &txr->tx_buffers[first];
1749	tx_buffer_mapped = tx_buffer;
1750	map = tx_buffer->map;
1751
1752	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1753	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1754
1755	/*
1756	 * There are two types of errors we can (try) to handle:
1757	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1758	 *   out of segments.  Defragment the mbuf chain and try again.
1759	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1760	 *   at this point in time.  Defer sending and try again later.
1761	 * All other errors, in particular EINVAL, are fatal and prevent the
1762	 * mbuf chain from ever going through.  Drop it and report error.
1763	 */
1764	if (error == EFBIG) {
1765		struct mbuf *m;
1766
1767		m = m_defrag(*m_headp, M_DONTWAIT);
1768		if (m == NULL) {
1769			adapter->mbuf_alloc_failed++;
1770			m_freem(*m_headp);
1771			*m_headp = NULL;
1772			return (ENOBUFS);
1773		}
1774		*m_headp = m;
1775
1776		/* Try it again */
1777		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1778		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1779
1780		if (error) {
1781			adapter->no_tx_dma_setup++;
1782			m_freem(*m_headp);
1783			*m_headp = NULL;
1784			return (error);
1785		}
1786	} else if (error != 0) {
1787		adapter->no_tx_dma_setup++;
1788		return (error);
1789	}
1790
1791	/*
1792	 * TSO Hardware workaround, if this packet is not
1793	 * TSO, and is only a single descriptor long, and
1794	 * it follows a TSO burst, then we need to add a
1795	 * sentinel descriptor to prevent premature writeback.
1796	 */
1797	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1798		if (nsegs == 1)
1799			tso_desc = TRUE;
1800		txr->tx_tso = FALSE;
1801	}
1802
1803        if (nsegs > (txr->tx_avail - 2)) {
1804                txr->no_desc_avail++;
1805		bus_dmamap_unload(txr->txtag, map);
1806		return (ENOBUFS);
1807        }
1808	m_head = *m_headp;
1809
1810	/* Do hardware assists */
1811#if __FreeBSD_version >= 700000
1812	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1813		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1814		if (error != TRUE)
1815			return (ENXIO); /* something foobar */
1816		/* we need to make a final sentinel transmit desc */
1817		tso_desc = TRUE;
1818	} else
1819#endif
1820	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1821		em_transmit_checksum_setup(txr,  m_head,
1822		    &txd_upper, &txd_lower);
1823
1824	i = txr->next_avail_desc;
1825
1826	/* Set up our transmit descriptors */
1827	for (j = 0; j < nsegs; j++) {
1828		bus_size_t seg_len;
1829		bus_addr_t seg_addr;
1830
1831		tx_buffer = &txr->tx_buffers[i];
1832		ctxd = &txr->tx_base[i];
1833		seg_addr = segs[j].ds_addr;
1834		seg_len  = segs[j].ds_len;
1835		/*
1836		** TSO Workaround:
1837		** If this is the last descriptor, we want to
1838		** split it so we have a small final sentinel
1839		*/
1840		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1841			seg_len -= 4;
1842			ctxd->buffer_addr = htole64(seg_addr);
1843			ctxd->lower.data = htole32(
1844			adapter->txd_cmd | txd_lower | seg_len);
1845			ctxd->upper.data =
1846			    htole32(txd_upper);
1847			if (++i == adapter->num_tx_desc)
1848				i = 0;
1849			/* Now make the sentinel */
1850			++txd_used; /* using an extra txd */
1851			ctxd = &txr->tx_base[i];
1852			tx_buffer = &txr->tx_buffers[i];
1853			ctxd->buffer_addr =
1854			    htole64(seg_addr + seg_len);
1855			ctxd->lower.data = htole32(
1856			adapter->txd_cmd | txd_lower | 4);
1857			ctxd->upper.data =
1858			    htole32(txd_upper);
1859			last = i;
1860			if (++i == adapter->num_tx_desc)
1861				i = 0;
1862		} else {
1863			ctxd->buffer_addr = htole64(seg_addr);
1864			ctxd->lower.data = htole32(
1865			adapter->txd_cmd | txd_lower | seg_len);
1866			ctxd->upper.data =
1867			    htole32(txd_upper);
1868			last = i;
1869			if (++i == adapter->num_tx_desc)
1870				i = 0;
1871		}
1872		tx_buffer->m_head = NULL;
1873		tx_buffer->next_eop = -1;
1874	}
1875
1876	txr->next_avail_desc = i;
1877	txr->tx_avail -= nsegs;
1878	if (tso_desc) /* TSO used an extra for sentinel */
1879		txr->tx_avail -= txd_used;
1880
1881	if (m_head->m_flags & M_VLANTAG) {
1882		/* Set the vlan id. */
1883		ctxd->upper.fields.special =
1884		    htole16(m_head->m_pkthdr.ether_vtag);
1885                /* Tell hardware to add tag */
1886                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1887        }
1888
1889        tx_buffer->m_head = m_head;
1890	tx_buffer_mapped->map = tx_buffer->map;
1891	tx_buffer->map = map;
1892        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1893
1894        /*
1895         * Last Descriptor of Packet
1896	 * needs End Of Packet (EOP)
1897	 * and Report Status (RS)
1898         */
1899        ctxd->lower.data |=
1900	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1901	/*
1902	 * Keep track in the first buffer which
1903	 * descriptor will be written back
1904	 */
1905	tx_buffer = &txr->tx_buffers[first];
1906	tx_buffer->next_eop = last;
1907
1908	/*
1909	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1910	 * that this frame is available to transmit.
1911	 */
1912	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1913	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1914	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1915	txr->watchdog_time = ticks;
1916
1917        /* Call cleanup if number of TX descriptors low */
1918	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1919		em_txeof(txr);
1920
1921	return (0);
1922}
1923
1924static void
1925em_set_promisc(struct adapter *adapter)
1926{
1927	struct ifnet	*ifp = adapter->ifp;
1928	u32		reg_rctl;
1929
1930	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1931
1932	if (ifp->if_flags & IFF_PROMISC) {
1933		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1934		/* Turn this on if you want to see bad packets */
1935		if (em_debug_sbp)
1936			reg_rctl |= E1000_RCTL_SBP;
1937		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1938	} else if (ifp->if_flags & IFF_ALLMULTI) {
1939		reg_rctl |= E1000_RCTL_MPE;
1940		reg_rctl &= ~E1000_RCTL_UPE;
1941		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1942	}
1943}
1944
1945static void
1946em_disable_promisc(struct adapter *adapter)
1947{
1948	u32	reg_rctl;
1949
1950	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1951
1952	reg_rctl &=  (~E1000_RCTL_UPE);
1953	reg_rctl &=  (~E1000_RCTL_MPE);
1954	reg_rctl &=  (~E1000_RCTL_SBP);
1955	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1956}
1957
1958
1959/*********************************************************************
1960 *  Multicast Update
1961 *
1962 *  This routine is called whenever multicast address list is updated.
1963 *
1964 **********************************************************************/
1965
1966static void
1967em_set_multi(struct adapter *adapter)
1968{
1969	struct ifnet	*ifp = adapter->ifp;
1970	struct ifmultiaddr *ifma;
1971	u32 reg_rctl = 0;
1972	u8  *mta; /* Multicast array memory */
1973	int mcnt = 0;
1974
1975	IOCTL_DEBUGOUT("em_set_multi: begin");
1976
1977	if (adapter->hw.mac.type == e1000_82542 &&
1978	    adapter->hw.revision_id == E1000_REVISION_2) {
1979		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1980		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1981			e1000_pci_clear_mwi(&adapter->hw);
1982		reg_rctl |= E1000_RCTL_RST;
1983		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1984		msec_delay(5);
1985	}
1986
1987	/* Allocate temporary memory to setup array */
1988	mta = malloc(sizeof(u8) *
1989	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
1990	    M_DEVBUF, M_NOWAIT | M_ZERO);
1991	if (mta == NULL)
1992		panic("em_set_multi memory failure\n");
1993
1994#if __FreeBSD_version < 800000
1995	IF_ADDR_LOCK(ifp);
1996#else
1997	if_maddr_rlock(ifp);
1998#endif
1999	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2000		if (ifma->ifma_addr->sa_family != AF_LINK)
2001			continue;
2002
2003		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2004			break;
2005
2006		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2007		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2008		mcnt++;
2009	}
2010#if __FreeBSD_version < 800000
2011	IF_ADDR_UNLOCK(ifp);
2012#else
2013	if_maddr_runlock(ifp);
2014#endif
2015	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2016		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2017		reg_rctl |= E1000_RCTL_MPE;
2018		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2019	} else
2020		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2021
2022	if (adapter->hw.mac.type == e1000_82542 &&
2023	    adapter->hw.revision_id == E1000_REVISION_2) {
2024		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2025		reg_rctl &= ~E1000_RCTL_RST;
2026		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2027		msec_delay(5);
2028		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2029			e1000_pci_set_mwi(&adapter->hw);
2030	}
2031	free(mta, M_DEVBUF);
2032}
2033
2034
2035/*********************************************************************
2036 *  Timer routine
2037 *
2038 *  This routine checks for link status and updates statistics.
2039 *
2040 **********************************************************************/
2041
2042static void
2043em_local_timer(void *arg)
2044{
2045	struct adapter	*adapter = arg;
2046	struct ifnet	*ifp = adapter->ifp;
2047	struct tx_ring	*txr = adapter->tx_rings;
2048
2049	EM_CORE_LOCK_ASSERT(adapter);
2050
2051	em_update_link_status(adapter);
2052	em_update_stats_counters(adapter);
2053
2054	/* Reset LAA into RAR[0] on 82571 */
2055	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2056		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2057
2058	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2059		em_print_hw_stats(adapter);
2060
2061	/*
2062	** Check for time since any descriptor was cleaned
2063	*/
2064	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2065		EM_TX_LOCK(txr);
2066		if (txr->watchdog_check == FALSE) {
2067			EM_TX_UNLOCK(txr);
2068			continue;
2069		}
2070		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2071			goto hung;
2072		EM_TX_UNLOCK(txr);
2073	}
2074
2075	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2076	return;
2077hung:
2078	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2079	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2080	adapter->watchdog_events++;
2081	EM_TX_UNLOCK(txr);
2082	em_init_locked(adapter);
2083}
2084
2085
2086static void
2087em_update_link_status(struct adapter *adapter)
2088{
2089	struct e1000_hw *hw = &adapter->hw;
2090	struct ifnet *ifp = adapter->ifp;
2091	device_t dev = adapter->dev;
2092	u32 link_check = 0;
2093
2094	/* Get the cached link value or read phy for real */
2095	switch (hw->phy.media_type) {
2096	case e1000_media_type_copper:
2097		if (hw->mac.get_link_status) {
2098			/* Do the work to read phy */
2099			e1000_check_for_link(hw);
2100			link_check = !hw->mac.get_link_status;
2101			if (link_check) /* ESB2 fix */
2102				e1000_cfg_on_link_up(hw);
2103		} else
2104			link_check = TRUE;
2105		break;
2106	case e1000_media_type_fiber:
2107		e1000_check_for_link(hw);
2108		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2109                                 E1000_STATUS_LU);
2110		break;
2111	case e1000_media_type_internal_serdes:
2112		e1000_check_for_link(hw);
2113		link_check = adapter->hw.mac.serdes_has_link;
2114		break;
2115	default:
2116	case e1000_media_type_unknown:
2117		break;
2118	}
2119
2120	/* Now check for a transition */
2121	if (link_check && (adapter->link_active == 0)) {
2122		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2123		    &adapter->link_duplex);
2124		/* Check if we must disable SPEED_MODE bit on PCI-E */
2125		if ((adapter->link_speed != SPEED_1000) &&
2126		    ((hw->mac.type == e1000_82571) ||
2127		    (hw->mac.type == e1000_82572))) {
2128			int tarc0;
2129			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2130			tarc0 &= ~SPEED_MODE_BIT;
2131			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2132		}
2133		if (bootverbose)
2134			device_printf(dev, "Link is up %d Mbps %s\n",
2135			    adapter->link_speed,
2136			    ((adapter->link_duplex == FULL_DUPLEX) ?
2137			    "Full Duplex" : "Half Duplex"));
2138		adapter->link_active = 1;
2139		adapter->smartspeed = 0;
2140		ifp->if_baudrate = adapter->link_speed * 1000000;
2141		if_link_state_change(ifp, LINK_STATE_UP);
2142	} else if (!link_check && (adapter->link_active == 1)) {
2143		ifp->if_baudrate = adapter->link_speed = 0;
2144		adapter->link_duplex = 0;
2145		if (bootverbose)
2146			device_printf(dev, "Link is Down\n");
2147		adapter->link_active = 0;
2148		/* Link down, disable watchdog */
2149		// JFV change later
2150		//adapter->watchdog_check = FALSE;
2151		if_link_state_change(ifp, LINK_STATE_DOWN);
2152	}
2153}
2154
2155/*********************************************************************
2156 *
2157 *  This routine disables all traffic on the adapter by issuing a
2158 *  global reset on the MAC and deallocates TX/RX buffers.
2159 *
2160 *  This routine should always be called with BOTH the CORE
2161 *  and TX locks.
2162 **********************************************************************/
2163
2164static void
2165em_stop(void *arg)
2166{
2167	struct adapter	*adapter = arg;
2168	struct ifnet	*ifp = adapter->ifp;
2169	struct tx_ring	*txr = adapter->tx_rings;
2170
2171	EM_CORE_LOCK_ASSERT(adapter);
2172
2173	INIT_DEBUGOUT("em_stop: begin");
2174
2175	em_disable_intr(adapter);
2176	callout_stop(&adapter->timer);
2177
2178	/* Tell the stack that the interface is no longer active */
2179	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2180
2181        /* Unarm watchdog timer. */
2182	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2183		EM_TX_LOCK(txr);
2184		txr->watchdog_check = FALSE;
2185		EM_TX_UNLOCK(txr);
2186	}
2187
2188	e1000_reset_hw(&adapter->hw);
2189	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2190
2191	e1000_led_off(&adapter->hw);
2192	e1000_cleanup_led(&adapter->hw);
2193}
2194
2195
2196/*********************************************************************
2197 *
2198 *  Determine hardware revision.
2199 *
2200 **********************************************************************/
2201static void
2202em_identify_hardware(struct adapter *adapter)
2203{
2204	device_t dev = adapter->dev;
2205
2206	/* Make sure our PCI config space has the necessary stuff set */
2207	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2208	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2209	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2210		device_printf(dev, "Memory Access and/or Bus Master bits "
2211		    "were not set!\n");
2212		adapter->hw.bus.pci_cmd_word |=
2213		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2214		pci_write_config(dev, PCIR_COMMAND,
2215		    adapter->hw.bus.pci_cmd_word, 2);
2216	}
2217
2218	/* Save off the information about this board */
2219	adapter->hw.vendor_id = pci_get_vendor(dev);
2220	adapter->hw.device_id = pci_get_device(dev);
2221	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2222	adapter->hw.subsystem_vendor_id =
2223	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2224	adapter->hw.subsystem_device_id =
2225	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2226
2227	/* Do Shared Code Init and Setup */
2228	if (e1000_set_mac_type(&adapter->hw)) {
2229		device_printf(dev, "Setup init failure\n");
2230		return;
2231	}
2232}
2233
2234static int
2235em_allocate_pci_resources(struct adapter *adapter)
2236{
2237	device_t	dev = adapter->dev;
2238	int		rid;
2239
2240	rid = PCIR_BAR(0);
2241	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2242	    &rid, RF_ACTIVE);
2243	if (adapter->memory == NULL) {
2244		device_printf(dev, "Unable to allocate bus resource: memory\n");
2245		return (ENXIO);
2246	}
2247	adapter->osdep.mem_bus_space_tag =
2248	    rman_get_bustag(adapter->memory);
2249	adapter->osdep.mem_bus_space_handle =
2250	    rman_get_bushandle(adapter->memory);
2251	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2252
2253	/* Default to a single queue */
2254	adapter->num_queues = 1;
2255
2256	/*
2257	 * Setup MSI/X or MSI if PCI Express
2258	 */
2259	adapter->msix = em_setup_msix(adapter);
2260
2261	adapter->hw.back = &adapter->osdep;
2262
2263	return (0);
2264}
2265
2266/*********************************************************************
2267 *
2268 *  Setup the Legacy or MSI Interrupt handler
2269 *
2270 **********************************************************************/
2271int
2272em_allocate_legacy(struct adapter *adapter)
2273{
2274	device_t dev = adapter->dev;
2275	int error, rid = 0;
2276
2277	/* Manually turn off all interrupts */
2278	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2279
2280	if (adapter->msix == 1) /* using MSI */
2281		rid = 1;
2282	/* We allocate a single interrupt resource */
2283	adapter->res = bus_alloc_resource_any(dev,
2284	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2285	if (adapter->res == NULL) {
2286		device_printf(dev, "Unable to allocate bus resource: "
2287		    "interrupt\n");
2288		return (ENXIO);
2289	}
2290
2291	/*
2292	 * Allocate a fast interrupt and the associated
2293	 * deferred processing contexts.
2294	 */
2295	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2296	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2297	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2298	    taskqueue_thread_enqueue, &adapter->tq);
2299	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2300	    device_get_nameunit(adapter->dev));
2301	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2302	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2303		device_printf(dev, "Failed to register fast interrupt "
2304			    "handler: %d\n", error);
2305		taskqueue_free(adapter->tq);
2306		adapter->tq = NULL;
2307		return (error);
2308	}
2309
2310	return (0);
2311}
2312
2313/*********************************************************************
2314 *
2315 *  Setup the MSIX Interrupt handlers
2316 *   This is not really Multiqueue, rather
2317 *   its just multiple interrupt vectors.
2318 *
2319 **********************************************************************/
2320int
2321em_allocate_msix(struct adapter *adapter)
2322{
2323	device_t	dev = adapter->dev;
2324	struct		tx_ring *txr = adapter->tx_rings;
2325	struct		rx_ring *rxr = adapter->rx_rings;
2326	int		error, rid, vector = 0;
2327
2328
2329	/* Make sure all interrupts are disabled */
2330	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2331
2332	/* First set up ring resources */
2333	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2334
2335		/* RX ring */
2336		rid = vector + 1;
2337
2338		rxr->res = bus_alloc_resource_any(dev,
2339		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2340		if (rxr->res == NULL) {
2341			device_printf(dev,
2342			    "Unable to allocate bus resource: "
2343			    "RX MSIX Interrupt %d\n", i);
2344			return (ENXIO);
2345		}
2346		if ((error = bus_setup_intr(dev, rxr->res,
2347		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2348		    rxr, &rxr->tag)) != 0) {
2349			device_printf(dev, "Failed to register RX handler");
2350			return (error);
2351		}
2352		rxr->msix = vector++; /* NOTE increment vector for TX */
2353		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2354		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2355		    taskqueue_thread_enqueue, &rxr->tq);
2356		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2357		    device_get_nameunit(adapter->dev));
2358		/*
2359		** Set the bit to enable interrupt
2360		** in E1000_IMS -- bits 20 and 21
2361		** are for RX0 and RX1, note this has
2362		** NOTHING to do with the MSIX vector
2363		*/
2364		rxr->ims = 1 << (20 + i);
2365		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2366
2367		/* TX ring */
2368		rid = vector + 1;
2369		txr->res = bus_alloc_resource_any(dev,
2370		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2371		if (txr->res == NULL) {
2372			device_printf(dev,
2373			    "Unable to allocate bus resource: "
2374			    "TX MSIX Interrupt %d\n", i);
2375			return (ENXIO);
2376		}
2377		if ((error = bus_setup_intr(dev, txr->res,
2378		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2379		    txr, &txr->tag)) != 0) {
2380			device_printf(dev, "Failed to register TX handler");
2381			return (error);
2382		}
2383		txr->msix = vector++; /* Increment vector for next pass */
2384		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2385		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2386		    taskqueue_thread_enqueue, &txr->tq);
2387		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2388		    device_get_nameunit(adapter->dev));
2389		/*
2390		** Set the bit to enable interrupt
2391		** in E1000_IMS -- bits 22 and 23
2392		** are for TX0 and TX1, note this has
2393		** NOTHING to do with the MSIX vector
2394		*/
2395		txr->ims = 1 << (22 + i);
2396		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2397	}
2398
2399	/* Link interrupt */
2400	++rid;
2401	adapter->res = bus_alloc_resource_any(dev,
2402	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2403	if (!adapter->res) {
2404		device_printf(dev,"Unable to allocate "
2405		    "bus resource: Link interrupt [%d]\n", rid);
2406		return (ENXIO);
2407        }
2408	/* Set the link handler function */
2409	error = bus_setup_intr(dev, adapter->res,
2410	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2411	    em_msix_link, adapter, &adapter->tag);
2412	if (error) {
2413		adapter->res = NULL;
2414		device_printf(dev, "Failed to register LINK handler");
2415		return (error);
2416	}
2417	adapter->linkvec = vector;
2418	adapter->ivars |=  (8 | vector) << 16;
2419	adapter->ivars |= 0x80000000;
2420	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2421	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2422	    taskqueue_thread_enqueue, &adapter->tq);
2423	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2424	    device_get_nameunit(adapter->dev));
2425
2426	return (0);
2427}
2428
2429
2430static void
2431em_free_pci_resources(struct adapter *adapter)
2432{
2433	device_t	dev = adapter->dev;
2434	struct tx_ring	*txr;
2435	struct rx_ring	*rxr;
2436	int		rid;
2437
2438
2439	/*
2440	** Release all the queue interrupt resources:
2441	*/
2442	for (int i = 0; i < adapter->num_queues; i++) {
2443		txr = &adapter->tx_rings[i];
2444		rxr = &adapter->rx_rings[i];
2445		rid = txr->msix +1;
2446		if (txr->tag != NULL) {
2447			bus_teardown_intr(dev, txr->res, txr->tag);
2448			txr->tag = NULL;
2449		}
2450		if (txr->res != NULL)
2451			bus_release_resource(dev, SYS_RES_IRQ,
2452			    rid, txr->res);
2453		rid = rxr->msix +1;
2454		if (rxr->tag != NULL) {
2455			bus_teardown_intr(dev, rxr->res, rxr->tag);
2456			rxr->tag = NULL;
2457		}
2458		if (rxr->res != NULL)
2459			bus_release_resource(dev, SYS_RES_IRQ,
2460			    rid, rxr->res);
2461	}
2462
2463        if (adapter->linkvec) /* we are doing MSIX */
2464                rid = adapter->linkvec + 1;
2465        else
2466                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2467
2468	if (adapter->tag != NULL) {
2469		bus_teardown_intr(dev, adapter->res, adapter->tag);
2470		adapter->tag = NULL;
2471	}
2472
2473	if (adapter->res != NULL)
2474		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2475
2476
2477	if (adapter->msix)
2478		pci_release_msi(dev);
2479
2480	if (adapter->msix_mem != NULL)
2481		bus_release_resource(dev, SYS_RES_MEMORY,
2482		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2483
2484	if (adapter->memory != NULL)
2485		bus_release_resource(dev, SYS_RES_MEMORY,
2486		    PCIR_BAR(0), adapter->memory);
2487
2488	if (adapter->flash != NULL)
2489		bus_release_resource(dev, SYS_RES_MEMORY,
2490		    EM_FLASH, adapter->flash);
2491}
2492
2493/*
2494 * Setup MSI or MSI/X
2495 */
2496static int
2497em_setup_msix(struct adapter *adapter)
2498{
2499	device_t dev = adapter->dev;
2500	int val = 0;
2501
2502
2503	/* Setup MSI/X for Hartwell */
2504	if ((adapter->hw.mac.type == e1000_82574) &&
2505	    (em_enable_msix == TRUE)) {
2506		/* Map the MSIX BAR */
2507		int rid = PCIR_BAR(EM_MSIX_BAR);
2508		adapter->msix_mem = bus_alloc_resource_any(dev,
2509		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2510       		if (!adapter->msix_mem) {
2511			/* May not be enabled */
2512               		device_printf(adapter->dev,
2513			    "Unable to map MSIX table \n");
2514			goto msi;
2515       		}
2516		val = pci_msix_count(dev);
2517		if (val != 5) {
2518			bus_release_resource(dev, SYS_RES_MEMORY,
2519			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2520			adapter->msix_mem = NULL;
2521               		device_printf(adapter->dev,
2522			    "MSIX vectors wrong, using MSI \n");
2523			goto msi;
2524		}
2525		if (em_msix_queues == 2) {
2526			val = 5;
2527			adapter->num_queues = 2;
2528		} else {
2529			val = 3;
2530			adapter->num_queues = 1;
2531		}
2532		if (pci_alloc_msix(dev, &val) == 0) {
2533			device_printf(adapter->dev,
2534			    "Using MSIX interrupts "
2535			    "with %d vectors\n", val);
2536		}
2537
2538		return (val);
2539	}
2540msi:
2541       	val = pci_msi_count(dev);
2542       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2543               	adapter->msix = 1;
2544               	device_printf(adapter->dev,"Using MSI interrupt\n");
2545		return (val);
2546	}
2547	/* Should only happen due to manual invention */
2548	device_printf(adapter->dev,"Setup MSIX failure\n");
2549	return (0);
2550}
2551
2552
2553/*********************************************************************
2554 *
2555 *  Initialize the hardware to a configuration
2556 *  as specified by the adapter structure.
2557 *
2558 **********************************************************************/
2559static void
2560em_reset(struct adapter *adapter)
2561{
2562	device_t	dev = adapter->dev;
2563	struct e1000_hw	*hw = &adapter->hw;
2564	u16		rx_buffer_size;
2565
2566	INIT_DEBUGOUT("em_reset: begin");
2567
2568	/* Set up smart power down as default off on newer adapters. */
2569	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2570	    hw->mac.type == e1000_82572)) {
2571		u16 phy_tmp = 0;
2572
2573		/* Speed up time to link by disabling smart power down. */
2574		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2575		phy_tmp &= ~IGP02E1000_PM_SPD;
2576		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2577	}
2578
2579	/*
2580	 * These parameters control the automatic generation (Tx) and
2581	 * response (Rx) to Ethernet PAUSE frames.
2582	 * - High water mark should allow for at least two frames to be
2583	 *   received after sending an XOFF.
2584	 * - Low water mark works best when it is very near the high water mark.
2585	 *   This allows the receiver to restart by sending XON when it has
2586	 *   drained a bit. Here we use an arbitary value of 1500 which will
2587	 *   restart after one full frame is pulled from the buffer. There
2588	 *   could be several smaller frames in the buffer and if so they will
2589	 *   not trigger the XON until their total number reduces the buffer
2590	 *   by 1500.
2591	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2592	 */
2593	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2594
2595	hw->fc.high_water = rx_buffer_size -
2596	    roundup2(adapter->max_frame_size, 1024);
2597	hw->fc.low_water = hw->fc.high_water - 1500;
2598
2599	if (hw->mac.type == e1000_80003es2lan)
2600		hw->fc.pause_time = 0xFFFF;
2601	else
2602		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2603
2604	hw->fc.send_xon = TRUE;
2605
2606        /* Set Flow control, use the tunable location if sane */
2607        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2608		hw->fc.requested_mode = em_fc_setting;
2609	else
2610		hw->fc.requested_mode = e1000_fc_none;
2611
2612	/* Override - workaround for PCHLAN issue */
2613	if (hw->mac.type == e1000_pchlan)
2614                hw->fc.requested_mode = e1000_fc_rx_pause;
2615
2616	/* Issue a global reset */
2617	e1000_reset_hw(hw);
2618	E1000_WRITE_REG(hw, E1000_WUC, 0);
2619
2620	if (e1000_init_hw(hw) < 0) {
2621		device_printf(dev, "Hardware Initialization Failed\n");
2622		return;
2623	}
2624
2625	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2626	e1000_get_phy_info(hw);
2627	e1000_check_for_link(hw);
2628	return;
2629}
2630
2631/*********************************************************************
2632 *
2633 *  Setup networking device structure and register an interface.
2634 *
2635 **********************************************************************/
2636static void
2637em_setup_interface(device_t dev, struct adapter *adapter)
2638{
2639	struct ifnet   *ifp;
2640
2641	INIT_DEBUGOUT("em_setup_interface: begin");
2642
2643	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2644	if (ifp == NULL)
2645		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2646	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2647	ifp->if_mtu = ETHERMTU;
2648	ifp->if_init =  em_init;
2649	ifp->if_softc = adapter;
2650	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2651	ifp->if_ioctl = em_ioctl;
2652	ifp->if_start = em_start;
2653	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2654	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2655	IFQ_SET_READY(&ifp->if_snd);
2656
2657	ether_ifattach(ifp, adapter->hw.mac.addr);
2658
2659	ifp->if_capabilities = ifp->if_capenable = 0;
2660
2661#if __FreeBSD_version >= 800000
2662	/* Multiqueue tx functions */
2663	ifp->if_transmit = em_mq_start;
2664	ifp->if_qflush = em_qflush;
2665#endif
2666
2667	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2668	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2669
2670	/* Enable TSO by default, can disable with ifconfig */
2671	ifp->if_capabilities |= IFCAP_TSO4;
2672	ifp->if_capenable |= IFCAP_TSO4;
2673
2674	/*
2675	 * Tell the upper layer(s) we
2676	 * support full VLAN capability
2677	 */
2678	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2679	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2680	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2681
2682	/*
2683	** Dont turn this on by default, if vlans are
2684	** created on another pseudo device (eg. lagg)
2685	** then vlan events are not passed thru, breaking
2686	** operation, but with HW FILTER off it works. If
2687	** using vlans directly on the em driver you can
2688	** enable this and get full hardware tag filtering.
2689	*/
2690	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2691
2692#ifdef DEVICE_POLLING
2693	ifp->if_capabilities |= IFCAP_POLLING;
2694#endif
2695
2696	/* Enable All WOL methods by default */
2697	if (adapter->wol) {
2698		ifp->if_capabilities |= IFCAP_WOL;
2699		ifp->if_capenable |= IFCAP_WOL;
2700	}
2701
2702	/*
2703	 * Specify the media types supported by this adapter and register
2704	 * callbacks to update media and link information
2705	 */
2706	ifmedia_init(&adapter->media, IFM_IMASK,
2707	    em_media_change, em_media_status);
2708	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2709	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2710		u_char fiber_type = IFM_1000_SX;	/* default type */
2711
2712		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2713			    0, NULL);
2714		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2715	} else {
2716		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2717		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2718			    0, NULL);
2719		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2720			    0, NULL);
2721		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2722			    0, NULL);
2723		if (adapter->hw.phy.type != e1000_phy_ife) {
2724			ifmedia_add(&adapter->media,
2725				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2726			ifmedia_add(&adapter->media,
2727				IFM_ETHER | IFM_1000_T, 0, NULL);
2728		}
2729	}
2730	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2731	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2732}
2733
2734
2735/*
2736 * Manage DMA'able memory.
2737 */
2738static void
2739em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2740{
2741	if (error)
2742		return;
2743	*(bus_addr_t *) arg = segs[0].ds_addr;
2744}
2745
2746static int
2747em_dma_malloc(struct adapter *adapter, bus_size_t size,
2748        struct em_dma_alloc *dma, int mapflags)
2749{
2750	int error;
2751
2752	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2753				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2754				BUS_SPACE_MAXADDR,	/* lowaddr */
2755				BUS_SPACE_MAXADDR,	/* highaddr */
2756				NULL, NULL,		/* filter, filterarg */
2757				size,			/* maxsize */
2758				1,			/* nsegments */
2759				size,			/* maxsegsize */
2760				0,			/* flags */
2761				NULL,			/* lockfunc */
2762				NULL,			/* lockarg */
2763				&dma->dma_tag);
2764	if (error) {
2765		device_printf(adapter->dev,
2766		    "%s: bus_dma_tag_create failed: %d\n",
2767		    __func__, error);
2768		goto fail_0;
2769	}
2770
2771	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2772	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2773	if (error) {
2774		device_printf(adapter->dev,
2775		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2776		    __func__, (uintmax_t)size, error);
2777		goto fail_2;
2778	}
2779
2780	dma->dma_paddr = 0;
2781	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2782	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2783	if (error || dma->dma_paddr == 0) {
2784		device_printf(adapter->dev,
2785		    "%s: bus_dmamap_load failed: %d\n",
2786		    __func__, error);
2787		goto fail_3;
2788	}
2789
2790	return (0);
2791
2792fail_3:
2793	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2794fail_2:
2795	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2796	bus_dma_tag_destroy(dma->dma_tag);
2797fail_0:
2798	dma->dma_map = NULL;
2799	dma->dma_tag = NULL;
2800
2801	return (error);
2802}
2803
2804static void
2805em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2806{
2807	if (dma->dma_tag == NULL)
2808		return;
2809	if (dma->dma_map != NULL) {
2810		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2811		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2812		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2813		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2814		dma->dma_map = NULL;
2815	}
2816	bus_dma_tag_destroy(dma->dma_tag);
2817	dma->dma_tag = NULL;
2818}
2819
2820
2821/*********************************************************************
2822 *
2823 *  Allocate memory for the transmit and receive rings, and then
2824 *  the descriptors associated with each, called only once at attach.
2825 *
2826 **********************************************************************/
2827static int
2828em_allocate_queues(struct adapter *adapter)
2829{
2830	device_t		dev = adapter->dev;
2831	struct tx_ring		*txr = NULL;
2832	struct rx_ring		*rxr = NULL;
2833	int rsize, tsize, error = E1000_SUCCESS;
2834	int txconf = 0, rxconf = 0;
2835
2836
2837	/* Allocate the TX ring struct memory */
2838	if (!(adapter->tx_rings =
2839	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2840	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2841		device_printf(dev, "Unable to allocate TX ring memory\n");
2842		error = ENOMEM;
2843		goto fail;
2844	}
2845
2846	/* Now allocate the RX */
2847	if (!(adapter->rx_rings =
2848	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2849	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2850		device_printf(dev, "Unable to allocate RX ring memory\n");
2851		error = ENOMEM;
2852		goto rx_fail;
2853	}
2854
2855	tsize = roundup2(adapter->num_tx_desc *
2856	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2857	/*
2858	 * Now set up the TX queues, txconf is needed to handle the
2859	 * possibility that things fail midcourse and we need to
2860	 * undo memory gracefully
2861	 */
2862	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2863		/* Set up some basics */
2864		txr = &adapter->tx_rings[i];
2865		txr->adapter = adapter;
2866		txr->me = i;
2867
2868		/* Initialize the TX lock */
2869		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2870		    device_get_nameunit(dev), txr->me);
2871		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2872
2873		if (em_dma_malloc(adapter, tsize,
2874			&txr->txdma, BUS_DMA_NOWAIT)) {
2875			device_printf(dev,
2876			    "Unable to allocate TX Descriptor memory\n");
2877			error = ENOMEM;
2878			goto err_tx_desc;
2879		}
2880		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2881		bzero((void *)txr->tx_base, tsize);
2882
2883        	if (em_allocate_transmit_buffers(txr)) {
2884			device_printf(dev,
2885			    "Critical Failure setting up transmit buffers\n");
2886			error = ENOMEM;
2887			goto err_tx_desc;
2888        	}
2889#if __FreeBSD_version >= 800000
2890		/* Allocate a buf ring */
2891		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2892		    M_WAITOK, &txr->tx_mtx);
2893#endif
2894	}
2895
2896	/*
2897	 * Next the RX queues...
2898	 */
2899	rsize = roundup2(adapter->num_rx_desc *
2900	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2901	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2902		rxr = &adapter->rx_rings[i];
2903		rxr->adapter = adapter;
2904		rxr->me = i;
2905
2906		/* Initialize the RX lock */
2907		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2908		    device_get_nameunit(dev), txr->me);
2909		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2910
2911		if (em_dma_malloc(adapter, rsize,
2912			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2913			device_printf(dev,
2914			    "Unable to allocate RxDescriptor memory\n");
2915			error = ENOMEM;
2916			goto err_rx_desc;
2917		}
2918		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2919		bzero((void *)rxr->rx_base, rsize);
2920
2921        	/* Allocate receive buffers for the ring*/
2922		if (em_allocate_receive_buffers(rxr)) {
2923			device_printf(dev,
2924			    "Critical Failure setting up receive buffers\n");
2925			error = ENOMEM;
2926			goto err_rx_desc;
2927		}
2928	}
2929
2930	return (0);
2931
2932err_rx_desc:
2933	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2934		em_dma_free(adapter, &rxr->rxdma);
2935err_tx_desc:
2936	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2937		em_dma_free(adapter, &txr->txdma);
2938	free(adapter->rx_rings, M_DEVBUF);
2939rx_fail:
2940	buf_ring_free(txr->br, M_DEVBUF);
2941	free(adapter->tx_rings, M_DEVBUF);
2942fail:
2943	return (error);
2944}
2945
2946
2947/*********************************************************************
2948 *
2949 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2950 *  the information needed to transmit a packet on the wire. This is
2951 *  called only once at attach, setup is done every reset.
2952 *
2953 **********************************************************************/
2954static int
2955em_allocate_transmit_buffers(struct tx_ring *txr)
2956{
2957	struct adapter *adapter = txr->adapter;
2958	device_t dev = adapter->dev;
2959	struct em_buffer *txbuf;
2960	int error, i;
2961
2962	/*
2963	 * Setup DMA descriptor areas.
2964	 */
2965	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2966			       1, 0,			/* alignment, bounds */
2967			       BUS_SPACE_MAXADDR,	/* lowaddr */
2968			       BUS_SPACE_MAXADDR,	/* highaddr */
2969			       NULL, NULL,		/* filter, filterarg */
2970			       EM_TSO_SIZE,		/* maxsize */
2971			       EM_MAX_SCATTER,		/* nsegments */
2972			       PAGE_SIZE,		/* maxsegsize */
2973			       0,			/* flags */
2974			       NULL,			/* lockfunc */
2975			       NULL,			/* lockfuncarg */
2976			       &txr->txtag))) {
2977		device_printf(dev,"Unable to allocate TX DMA tag\n");
2978		goto fail;
2979	}
2980
2981	if (!(txr->tx_buffers =
2982	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2983	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2984		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2985		error = ENOMEM;
2986		goto fail;
2987	}
2988
2989        /* Create the descriptor buffer dma maps */
2990	txbuf = txr->tx_buffers;
2991	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2992		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2993		if (error != 0) {
2994			device_printf(dev, "Unable to create TX DMA map\n");
2995			goto fail;
2996		}
2997	}
2998
2999	return 0;
3000fail:
3001	/* We free all, it handles case where we are in the middle */
3002	em_free_transmit_structures(adapter);
3003	return (error);
3004}
3005
3006/*********************************************************************
3007 *
3008 *  Initialize a transmit ring.
3009 *
3010 **********************************************************************/
3011static void
3012em_setup_transmit_ring(struct tx_ring *txr)
3013{
3014	struct adapter *adapter = txr->adapter;
3015	struct em_buffer *txbuf;
3016	int i;
3017
3018	/* Clear the old descriptor contents */
3019	EM_TX_LOCK(txr);
3020	bzero((void *)txr->tx_base,
3021	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3022	/* Reset indices */
3023	txr->next_avail_desc = 0;
3024	txr->next_to_clean = 0;
3025
3026	/* Free any existing tx buffers. */
3027        txbuf = txr->tx_buffers;
3028	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3029		if (txbuf->m_head != NULL) {
3030			bus_dmamap_sync(txr->txtag, txbuf->map,
3031			    BUS_DMASYNC_POSTWRITE);
3032			bus_dmamap_unload(txr->txtag, txbuf->map);
3033			m_freem(txbuf->m_head);
3034			txbuf->m_head = NULL;
3035		}
3036		/* clear the watch index */
3037		txbuf->next_eop = -1;
3038        }
3039
3040	/* Set number of descriptors available */
3041	txr->tx_avail = adapter->num_tx_desc;
3042
3043	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3044	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3045	EM_TX_UNLOCK(txr);
3046}
3047
3048/*********************************************************************
3049 *
3050 *  Initialize all transmit rings.
3051 *
3052 **********************************************************************/
3053static void
3054em_setup_transmit_structures(struct adapter *adapter)
3055{
3056	struct tx_ring *txr = adapter->tx_rings;
3057
3058	for (int i = 0; i < adapter->num_queues; i++, txr++)
3059		em_setup_transmit_ring(txr);
3060
3061	return;
3062}
3063
3064/*********************************************************************
3065 *
3066 *  Enable transmit unit.
3067 *
3068 **********************************************************************/
3069static void
3070em_initialize_transmit_unit(struct adapter *adapter)
3071{
3072	struct tx_ring	*txr = adapter->tx_rings;
3073	struct e1000_hw	*hw = &adapter->hw;
3074	u32	tctl, tarc, tipg = 0;
3075
3076	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3077
3078	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3079		u64 bus_addr = txr->txdma.dma_paddr;
3080		/* Base and Len of TX Ring */
3081		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3082	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3083		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3084	    	    (u32)(bus_addr >> 32));
3085		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3086	    	    (u32)bus_addr);
3087		/* Init the HEAD/TAIL indices */
3088		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3089		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3090
3091		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3092		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3093		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3094
3095		txr->watchdog_check = FALSE;
3096	}
3097
3098	/* Set the default values for the Tx Inter Packet Gap timer */
3099	switch (adapter->hw.mac.type) {
3100	case e1000_82542:
3101		tipg = DEFAULT_82542_TIPG_IPGT;
3102		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3103		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3104		break;
3105	case e1000_80003es2lan:
3106		tipg = DEFAULT_82543_TIPG_IPGR1;
3107		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3108		    E1000_TIPG_IPGR2_SHIFT;
3109		break;
3110	default:
3111		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3112		    (adapter->hw.phy.media_type ==
3113		    e1000_media_type_internal_serdes))
3114			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3115		else
3116			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3117		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3118		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3119	}
3120
3121	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3122	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3123
3124	if(adapter->hw.mac.type >= e1000_82540)
3125		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3126		    adapter->tx_abs_int_delay.value);
3127
3128	if ((adapter->hw.mac.type == e1000_82571) ||
3129	    (adapter->hw.mac.type == e1000_82572)) {
3130		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3131		tarc |= SPEED_MODE_BIT;
3132		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3133	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3134		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3135		tarc |= 1;
3136		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3137		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3138		tarc |= 1;
3139		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3140	}
3141
3142	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3143	if (adapter->tx_int_delay.value > 0)
3144		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3145
3146	/* Program the Transmit Control Register */
3147	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3148	tctl &= ~E1000_TCTL_CT;
3149	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3150		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3151
3152	if (adapter->hw.mac.type >= e1000_82571)
3153		tctl |= E1000_TCTL_MULR;
3154
3155	/* This write will effectively turn on the transmit unit. */
3156	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3157
3158}
3159
3160
3161/*********************************************************************
3162 *
3163 *  Free all transmit rings.
3164 *
3165 **********************************************************************/
3166static void
3167em_free_transmit_structures(struct adapter *adapter)
3168{
3169	struct tx_ring *txr = adapter->tx_rings;
3170
3171	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3172		EM_TX_LOCK(txr);
3173		em_free_transmit_buffers(txr);
3174		em_dma_free(adapter, &txr->txdma);
3175		EM_TX_UNLOCK(txr);
3176		EM_TX_LOCK_DESTROY(txr);
3177	}
3178
3179	free(adapter->tx_rings, M_DEVBUF);
3180}
3181
3182/*********************************************************************
3183 *
3184 *  Free transmit ring related data structures.
3185 *
3186 **********************************************************************/
3187static void
3188em_free_transmit_buffers(struct tx_ring *txr)
3189{
3190	struct adapter		*adapter = txr->adapter;
3191	struct em_buffer	*txbuf;
3192
3193	INIT_DEBUGOUT("free_transmit_ring: begin");
3194
3195	if (txr->tx_buffers == NULL)
3196		return;
3197
3198	for (int i = 0; i < adapter->num_tx_desc; i++) {
3199		txbuf = &txr->tx_buffers[i];
3200		if (txbuf->m_head != NULL) {
3201			bus_dmamap_sync(txr->txtag, txbuf->map,
3202			    BUS_DMASYNC_POSTWRITE);
3203			bus_dmamap_unload(txr->txtag,
3204			    txbuf->map);
3205			m_freem(txbuf->m_head);
3206			txbuf->m_head = NULL;
3207			if (txbuf->map != NULL) {
3208				bus_dmamap_destroy(txr->txtag,
3209				    txbuf->map);
3210				txbuf->map = NULL;
3211			}
3212		} else if (txbuf->map != NULL) {
3213			bus_dmamap_unload(txr->txtag,
3214			    txbuf->map);
3215			bus_dmamap_destroy(txr->txtag,
3216			    txbuf->map);
3217			txbuf->map = NULL;
3218		}
3219	}
3220#if __FreeBSD_version >= 800000
3221	if (txr->br != NULL)
3222		buf_ring_free(txr->br, M_DEVBUF);
3223#endif
3224	if (txr->tx_buffers != NULL) {
3225		free(txr->tx_buffers, M_DEVBUF);
3226		txr->tx_buffers = NULL;
3227	}
3228	if (txr->txtag != NULL) {
3229		bus_dma_tag_destroy(txr->txtag);
3230		txr->txtag = NULL;
3231	}
3232	return;
3233}
3234
3235
3236/*********************************************************************
3237 *
3238 *  The offload context needs to be set when we transfer the first
3239 *  packet of a particular protocol (TCP/UDP). This routine has been
3240 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3241 *
3242 *  Added back the old method of keeping the current context type
3243 *  and not setting if unnecessary, as this is reported to be a
3244 *  big performance win.  -jfv
3245 **********************************************************************/
3246static void
3247em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3248    u32 *txd_upper, u32 *txd_lower)
3249{
3250	struct adapter			*adapter = txr->adapter;
3251	struct e1000_context_desc	*TXD = NULL;
3252	struct em_buffer *tx_buffer;
3253	struct ether_vlan_header *eh;
3254	struct ip *ip = NULL;
3255	struct ip6_hdr *ip6;
3256	int cur, ehdrlen;
3257	u32 cmd, hdr_len, ip_hlen;
3258	u16 etype;
3259	u8 ipproto;
3260
3261
3262	cmd = hdr_len = ipproto = 0;
3263	cur = txr->next_avail_desc;
3264
3265	/*
3266	 * Determine where frame payload starts.
3267	 * Jump over vlan headers if already present,
3268	 * helpful for QinQ too.
3269	 */
3270	eh = mtod(mp, struct ether_vlan_header *);
3271	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3272		etype = ntohs(eh->evl_proto);
3273		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3274	} else {
3275		etype = ntohs(eh->evl_encap_proto);
3276		ehdrlen = ETHER_HDR_LEN;
3277	}
3278
3279	/*
3280	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3281	 * TODO: Support SCTP too when it hits the tree.
3282	 */
3283	switch (etype) {
3284	case ETHERTYPE_IP:
3285		ip = (struct ip *)(mp->m_data + ehdrlen);
3286		ip_hlen = ip->ip_hl << 2;
3287
3288		/* Setup of IP header checksum. */
3289		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3290			/*
3291			 * Start offset for header checksum calculation.
3292			 * End offset for header checksum calculation.
3293			 * Offset of place to put the checksum.
3294			 */
3295			TXD = (struct e1000_context_desc *)
3296			    &txr->tx_base[cur];
3297			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3298			TXD->lower_setup.ip_fields.ipcse =
3299			    htole16(ehdrlen + ip_hlen);
3300			TXD->lower_setup.ip_fields.ipcso =
3301			    ehdrlen + offsetof(struct ip, ip_sum);
3302			cmd |= E1000_TXD_CMD_IP;
3303			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3304		}
3305
3306		if (mp->m_len < ehdrlen + ip_hlen)
3307			return;	/* failure */
3308
3309		hdr_len = ehdrlen + ip_hlen;
3310		ipproto = ip->ip_p;
3311
3312		break;
3313	case ETHERTYPE_IPV6:
3314		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3315		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3316
3317		if (mp->m_len < ehdrlen + ip_hlen)
3318			return;	/* failure */
3319
3320		/* IPv6 doesn't have a header checksum. */
3321
3322		hdr_len = ehdrlen + ip_hlen;
3323		ipproto = ip6->ip6_nxt;
3324
3325		break;
3326	default:
3327		*txd_upper = 0;
3328		*txd_lower = 0;
3329		return;
3330	}
3331
3332	switch (ipproto) {
3333	case IPPROTO_TCP:
3334		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3335			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3336			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3337			/* no need for context if already set */
3338			if (txr->last_hw_offload == CSUM_TCP)
3339				return;
3340			txr->last_hw_offload = CSUM_TCP;
3341			/*
3342			 * Start offset for payload checksum calculation.
3343			 * End offset for payload checksum calculation.
3344			 * Offset of place to put the checksum.
3345			 */
3346			TXD = (struct e1000_context_desc *)
3347			    &txr->tx_base[cur];
3348			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3349			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3350			TXD->upper_setup.tcp_fields.tucso =
3351			    hdr_len + offsetof(struct tcphdr, th_sum);
3352			cmd |= E1000_TXD_CMD_TCP;
3353		}
3354		break;
3355	case IPPROTO_UDP:
3356	{
3357		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3358			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3359			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3360			/* no need for context if already set */
3361			if (txr->last_hw_offload == CSUM_UDP)
3362				return;
3363			txr->last_hw_offload = CSUM_UDP;
3364			/*
3365			 * Start offset for header checksum calculation.
3366			 * End offset for header checksum calculation.
3367			 * Offset of place to put the checksum.
3368			 */
3369			TXD = (struct e1000_context_desc *)
3370			    &txr->tx_base[cur];
3371			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3372			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3373			TXD->upper_setup.tcp_fields.tucso =
3374			    hdr_len + offsetof(struct udphdr, uh_sum);
3375		}
3376		/* Fall Thru */
3377	}
3378	default:
3379		break;
3380	}
3381
3382	TXD->tcp_seg_setup.data = htole32(0);
3383	TXD->cmd_and_length =
3384	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3385	tx_buffer = &txr->tx_buffers[cur];
3386	tx_buffer->m_head = NULL;
3387	tx_buffer->next_eop = -1;
3388
3389	if (++cur == adapter->num_tx_desc)
3390		cur = 0;
3391
3392	txr->tx_avail--;
3393	txr->next_avail_desc = cur;
3394}
3395
3396
3397/**********************************************************************
3398 *
3399 *  Setup work for hardware segmentation offload (TSO)
3400 *
3401 **********************************************************************/
3402static bool
3403em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3404   u32 *txd_lower)
3405{
3406	struct adapter			*adapter = txr->adapter;
3407	struct e1000_context_desc	*TXD;
3408	struct em_buffer		*tx_buffer;
3409	struct ether_vlan_header	*eh;
3410	struct ip			*ip;
3411	struct ip6_hdr			*ip6;
3412	struct tcphdr			*th;
3413	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3414	u16 etype;
3415
3416	/*
3417	 * This function could/should be extended to support IP/IPv6
3418	 * fragmentation as well.  But as they say, one step at a time.
3419	 */
3420
3421	/*
3422	 * Determine where frame payload starts.
3423	 * Jump over vlan headers if already present,
3424	 * helpful for QinQ too.
3425	 */
3426	eh = mtod(mp, struct ether_vlan_header *);
3427	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3428		etype = ntohs(eh->evl_proto);
3429		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3430	} else {
3431		etype = ntohs(eh->evl_encap_proto);
3432		ehdrlen = ETHER_HDR_LEN;
3433	}
3434
3435	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3436	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3437		return FALSE;	/* -1 */
3438
3439	/*
3440	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3441	 * TODO: Support SCTP too when it hits the tree.
3442	 */
3443	switch (etype) {
3444	case ETHERTYPE_IP:
3445		isip6 = 0;
3446		ip = (struct ip *)(mp->m_data + ehdrlen);
3447		if (ip->ip_p != IPPROTO_TCP)
3448			return FALSE;	/* 0 */
3449		ip->ip_len = 0;
3450		ip->ip_sum = 0;
3451		ip_hlen = ip->ip_hl << 2;
3452		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3453			return FALSE;	/* -1 */
3454		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3455#if 1
3456		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3457		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3458#else
3459		th->th_sum = mp->m_pkthdr.csum_data;
3460#endif
3461		break;
3462	case ETHERTYPE_IPV6:
3463		isip6 = 1;
3464		return FALSE;			/* Not supported yet. */
3465		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3466		if (ip6->ip6_nxt != IPPROTO_TCP)
3467			return FALSE;	/* 0 */
3468		ip6->ip6_plen = 0;
3469		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3470		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3471			return FALSE;	/* -1 */
3472		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3473#if 0
3474		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3475		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3476#else
3477		th->th_sum = mp->m_pkthdr.csum_data;
3478#endif
3479		break;
3480	default:
3481		return FALSE;
3482	}
3483	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3484
3485	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3486		      E1000_TXD_DTYP_D |	/* Data descr type */
3487		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3488
3489	/* IP and/or TCP header checksum calculation and insertion. */
3490	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3491		      E1000_TXD_POPTS_TXSM) << 8;
3492
3493	cur = txr->next_avail_desc;
3494	tx_buffer = &txr->tx_buffers[cur];
3495	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3496
3497	/* IPv6 doesn't have a header checksum. */
3498	if (!isip6) {
3499		/*
3500		 * Start offset for header checksum calculation.
3501		 * End offset for header checksum calculation.
3502		 * Offset of place put the checksum.
3503		 */
3504		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3505		TXD->lower_setup.ip_fields.ipcse =
3506		    htole16(ehdrlen + ip_hlen - 1);
3507		TXD->lower_setup.ip_fields.ipcso =
3508		    ehdrlen + offsetof(struct ip, ip_sum);
3509	}
3510	/*
3511	 * Start offset for payload checksum calculation.
3512	 * End offset for payload checksum calculation.
3513	 * Offset of place to put the checksum.
3514	 */
3515	TXD->upper_setup.tcp_fields.tucss =
3516	    ehdrlen + ip_hlen;
3517	TXD->upper_setup.tcp_fields.tucse = 0;
3518	TXD->upper_setup.tcp_fields.tucso =
3519	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3520	/*
3521	 * Payload size per packet w/o any headers.
3522	 * Length of all headers up to payload.
3523	 */
3524	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3525	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3526
3527	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3528				E1000_TXD_CMD_DEXT |	/* Extended descr */
3529				E1000_TXD_CMD_TSE |	/* TSE context */
3530				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3531				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3532				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3533
3534	tx_buffer->m_head = NULL;
3535	tx_buffer->next_eop = -1;
3536
3537	if (++cur == adapter->num_tx_desc)
3538		cur = 0;
3539
3540	txr->tx_avail--;
3541	txr->next_avail_desc = cur;
3542	txr->tx_tso = TRUE;
3543
3544	return TRUE;
3545}
3546
3547
3548/**********************************************************************
3549 *
3550 *  Examine each tx_buffer in the used queue. If the hardware is done
3551 *  processing the packet then free associated resources. The
3552 *  tx_buffer is put back on the free queue.
3553 *
3554 **********************************************************************/
3555static bool
3556em_txeof(struct tx_ring *txr)
3557{
3558	struct adapter	*adapter = txr->adapter;
3559        int first, last, done, num_avail;
3560        struct em_buffer *tx_buffer;
3561        struct e1000_tx_desc   *tx_desc, *eop_desc;
3562	struct ifnet   *ifp = adapter->ifp;
3563
3564	EM_TX_LOCK_ASSERT(txr);
3565
3566        if (txr->tx_avail == adapter->num_tx_desc)
3567                return (FALSE);
3568
3569        num_avail = txr->tx_avail;
3570        first = txr->next_to_clean;
3571        tx_desc = &txr->tx_base[first];
3572        tx_buffer = &txr->tx_buffers[first];
3573	last = tx_buffer->next_eop;
3574        eop_desc = &txr->tx_base[last];
3575
3576	/*
3577	 * What this does is get the index of the
3578	 * first descriptor AFTER the EOP of the
3579	 * first packet, that way we can do the
3580	 * simple comparison on the inner while loop.
3581	 */
3582	if (++last == adapter->num_tx_desc)
3583 		last = 0;
3584	done = last;
3585
3586        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3587            BUS_DMASYNC_POSTREAD);
3588
3589        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3590		/* We clean the range of the packet */
3591		while (first != done) {
3592                	tx_desc->upper.data = 0;
3593                	tx_desc->lower.data = 0;
3594                	tx_desc->buffer_addr = 0;
3595                	++num_avail;
3596
3597			if (tx_buffer->m_head) {
3598				ifp->if_opackets++;
3599				bus_dmamap_sync(txr->txtag,
3600				    tx_buffer->map,
3601				    BUS_DMASYNC_POSTWRITE);
3602				bus_dmamap_unload(txr->txtag,
3603				    tx_buffer->map);
3604
3605                        	m_freem(tx_buffer->m_head);
3606                        	tx_buffer->m_head = NULL;
3607                	}
3608			tx_buffer->next_eop = -1;
3609			txr->watchdog_time = ticks;
3610
3611	                if (++first == adapter->num_tx_desc)
3612				first = 0;
3613
3614	                tx_buffer = &txr->tx_buffers[first];
3615			tx_desc = &txr->tx_base[first];
3616		}
3617		/* See if we can continue to the next packet */
3618		last = tx_buffer->next_eop;
3619		if (last != -1) {
3620        		eop_desc = &txr->tx_base[last];
3621			/* Get new done point */
3622			if (++last == adapter->num_tx_desc) last = 0;
3623			done = last;
3624		} else
3625			break;
3626        }
3627        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3628            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3629
3630        txr->next_to_clean = first;
3631
3632        /*
3633         * If we have enough room, clear IFF_DRV_OACTIVE to
3634         * tell the stack that it is OK to send packets.
3635         * If there are no pending descriptors, clear the watchdog.
3636         */
3637        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3638                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3639                if (num_avail == adapter->num_tx_desc) {
3640			txr->watchdog_check = FALSE;
3641        		txr->tx_avail = num_avail;
3642			return (FALSE);
3643		}
3644        }
3645
3646        txr->tx_avail = num_avail;
3647	return (TRUE);
3648}
3649
3650
3651/*********************************************************************
3652 *
3653 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3654 *
3655 **********************************************************************/
3656static void
3657em_refresh_mbufs(struct rx_ring *rxr, int limit)
3658{
3659	struct adapter		*adapter = rxr->adapter;
3660	struct mbuf		*m;
3661	bus_dma_segment_t	segs[1];
3662	bus_dmamap_t		map;
3663	struct em_buffer	*rxbuf;
3664	int			i, error, nsegs, cleaned;
3665
3666	i = rxr->next_to_refresh;
3667	cleaned = -1;
3668	while (i != limit) {
3669		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3670		if (m == NULL)
3671			goto update;
3672		m->m_len = m->m_pkthdr.len = MCLBYTES;
3673
3674		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3675			m_adj(m, ETHER_ALIGN);
3676
3677		/*
3678		 * Using memory from the mbuf cluster pool, invoke the
3679		 * bus_dma machinery to arrange the memory mapping.
3680		 */
3681		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3682		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3683		if (error != 0) {
3684			m_free(m);
3685			goto update;
3686		}
3687
3688		/* If nsegs is wrong then the stack is corrupt. */
3689		KASSERT(nsegs == 1, ("Too many segments returned!"));
3690
3691		rxbuf = &rxr->rx_buffers[i];
3692		if (rxbuf->m_head != NULL)
3693			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3694
3695		map = rxbuf->map;
3696		rxbuf->map = rxr->rx_sparemap;
3697		rxr->rx_sparemap = map;
3698		bus_dmamap_sync(rxr->rxtag,
3699		    rxbuf->map, BUS_DMASYNC_PREREAD);
3700		rxbuf->m_head = m;
3701		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3702
3703		cleaned = i;
3704		/* Calculate next index */
3705		if (++i == adapter->num_rx_desc)
3706			i = 0;
3707		/* This is the work marker for refresh */
3708		rxr->next_to_refresh = i;
3709	}
3710update:
3711	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3712	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3713	if (cleaned != -1) /* Update tail index */
3714		E1000_WRITE_REG(&adapter->hw,
3715		    E1000_RDT(rxr->me), cleaned);
3716
3717	return;
3718}
3719
3720
3721/*********************************************************************
3722 *
3723 *  Allocate memory for rx_buffer structures. Since we use one
3724 *  rx_buffer per received packet, the maximum number of rx_buffer's
3725 *  that we'll need is equal to the number of receive descriptors
3726 *  that we've allocated.
3727 *
3728 **********************************************************************/
3729static int
3730em_allocate_receive_buffers(struct rx_ring *rxr)
3731{
3732	struct adapter		*adapter = rxr->adapter;
3733	device_t		dev = adapter->dev;
3734	struct em_buffer	*rxbuf;
3735	int			error;
3736
3737	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3738	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3739	if (rxr->rx_buffers == NULL) {
3740		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3741		return (ENOMEM);
3742	}
3743
3744	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3745				1, 0,			/* alignment, bounds */
3746				BUS_SPACE_MAXADDR,	/* lowaddr */
3747				BUS_SPACE_MAXADDR,	/* highaddr */
3748				NULL, NULL,		/* filter, filterarg */
3749				MCLBYTES,		/* maxsize */
3750				1,			/* nsegments */
3751				MCLBYTES,		/* maxsegsize */
3752				0,			/* flags */
3753				NULL,			/* lockfunc */
3754				NULL,			/* lockarg */
3755				&rxr->rxtag);
3756	if (error) {
3757		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3758		    __func__, error);
3759		goto fail;
3760	}
3761
3762	/* Create the spare map (used by getbuf) */
3763	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3764	     &rxr->rx_sparemap);
3765	if (error) {
3766		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3767		    __func__, error);
3768		goto fail;
3769	}
3770
3771	rxbuf = rxr->rx_buffers;
3772	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3773		rxbuf = &rxr->rx_buffers[i];
3774		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3775		    &rxbuf->map);
3776		if (error) {
3777			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3778			    __func__, error);
3779			goto fail;
3780		}
3781	}
3782
3783	return (0);
3784
3785fail:
3786	em_free_receive_structures(adapter);
3787	return (error);
3788}
3789
3790
3791/*********************************************************************
3792 *
3793 *  Initialize a receive ring and its buffers.
3794 *
3795 **********************************************************************/
3796static int
3797em_setup_receive_ring(struct rx_ring *rxr)
3798{
3799	struct	adapter 	*adapter = rxr->adapter;
3800	struct em_buffer	*rxbuf;
3801	bus_dma_segment_t	seg[1];
3802	int			rsize, nsegs, error;
3803
3804
3805	/* Clear the ring contents */
3806	EM_RX_LOCK(rxr);
3807	rsize = roundup2(adapter->num_rx_desc *
3808	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3809	bzero((void *)rxr->rx_base, rsize);
3810
3811	/*
3812	** Free current RX buffer structs and their mbufs
3813	*/
3814	for (int i = 0; i < adapter->num_rx_desc; i++) {
3815		rxbuf = &rxr->rx_buffers[i];
3816		if (rxbuf->m_head != NULL) {
3817			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3818			    BUS_DMASYNC_POSTREAD);
3819			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3820			m_freem(rxbuf->m_head);
3821		}
3822	}
3823
3824	/* Now replenish the mbufs */
3825	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3826
3827		rxbuf = &rxr->rx_buffers[j];
3828		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3829		if (rxbuf->m_head == NULL)
3830			panic("RX ring hdr initialization failed!\n");
3831		rxbuf->m_head->m_len = MCLBYTES;
3832		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3833		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3834
3835		/* Get the memory mapping */
3836		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3837		    rxbuf->map, rxbuf->m_head, seg,
3838		    &nsegs, BUS_DMA_NOWAIT);
3839		if (error != 0)
3840			panic("RX ring dma initialization failed!\n");
3841		bus_dmamap_sync(rxr->rxtag,
3842		    rxbuf->map, BUS_DMASYNC_PREREAD);
3843
3844		/* Update descriptor */
3845		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3846	}
3847
3848
3849	/* Setup our descriptor indices */
3850	rxr->next_to_check = 0;
3851	rxr->next_to_refresh = 0;
3852
3853	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3854	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3855
3856	EM_RX_UNLOCK(rxr);
3857	return (0);
3858}
3859
3860/*********************************************************************
3861 *
3862 *  Initialize all receive rings.
3863 *
3864 **********************************************************************/
3865static int
3866em_setup_receive_structures(struct adapter *adapter)
3867{
3868	struct rx_ring *rxr = adapter->rx_rings;
3869	int j;
3870
3871	for (j = 0; j < adapter->num_queues; j++, rxr++)
3872		if (em_setup_receive_ring(rxr))
3873			goto fail;
3874
3875	return (0);
3876fail:
3877	/*
3878	 * Free RX buffers allocated so far, we will only handle
3879	 * the rings that completed, the failing case will have
3880	 * cleaned up for itself. 'j' failed, so its the terminus.
3881	 */
3882	for (int i = 0; i < j; ++i) {
3883		rxr = &adapter->rx_rings[i];
3884		for (int n = 0; n < adapter->num_rx_desc; n++) {
3885			struct em_buffer *rxbuf;
3886			rxbuf = &rxr->rx_buffers[n];
3887			if (rxbuf->m_head != NULL) {
3888				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3889			  	  BUS_DMASYNC_POSTREAD);
3890				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3891				m_freem(rxbuf->m_head);
3892				rxbuf->m_head = NULL;
3893			}
3894		}
3895	}
3896
3897	return (ENOBUFS);
3898}
3899
3900/*********************************************************************
3901 *
3902 *  Free all receive rings.
3903 *
3904 **********************************************************************/
3905static void
3906em_free_receive_structures(struct adapter *adapter)
3907{
3908	struct rx_ring *rxr = adapter->rx_rings;
3909
3910	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3911		em_free_receive_buffers(rxr);
3912		/* Free the ring memory as well */
3913		em_dma_free(adapter, &rxr->rxdma);
3914		EM_RX_LOCK_DESTROY(rxr);
3915	}
3916
3917	free(adapter->rx_rings, M_DEVBUF);
3918}
3919
3920
3921/*********************************************************************
3922 *
3923 *  Free receive ring data structures
3924 *
3925 **********************************************************************/
3926static void
3927em_free_receive_buffers(struct rx_ring *rxr)
3928{
3929	struct adapter		*adapter = rxr->adapter;
3930	struct em_buffer	*rxbuf = NULL;
3931
3932	INIT_DEBUGOUT("free_receive_buffers: begin");
3933
3934	if (rxr->rx_sparemap) {
3935		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3936		rxr->rx_sparemap = NULL;
3937	}
3938
3939	if (rxr->rx_buffers != NULL) {
3940		for (int i = 0; i < adapter->num_rx_desc; i++) {
3941			rxbuf = &rxr->rx_buffers[i];
3942			if (rxbuf->map != NULL) {
3943				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3944				    BUS_DMASYNC_POSTREAD);
3945				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3946				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3947			}
3948			if (rxbuf->m_head != NULL) {
3949				m_freem(rxbuf->m_head);
3950				rxbuf->m_head = NULL;
3951			}
3952		}
3953		free(rxr->rx_buffers, M_DEVBUF);
3954		rxr->rx_buffers = NULL;
3955	}
3956
3957	if (rxr->rxtag != NULL) {
3958		bus_dma_tag_destroy(rxr->rxtag);
3959		rxr->rxtag = NULL;
3960	}
3961
3962	return;
3963}
3964
3965
3966/*********************************************************************
3967 *
3968 *  Enable receive unit.
3969 *
3970 **********************************************************************/
3971#define MAX_INTS_PER_SEC	8000
3972#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3973
3974static void
3975em_initialize_receive_unit(struct adapter *adapter)
3976{
3977	struct rx_ring	*rxr = adapter->rx_rings;
3978	struct ifnet	*ifp = adapter->ifp;
3979	struct e1000_hw	*hw = &adapter->hw;
3980	u64	bus_addr;
3981	u32	rctl, rxcsum;
3982
3983	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3984
3985	/*
3986	 * Make sure receives are disabled while setting
3987	 * up the descriptor ring
3988	 */
3989	rctl = E1000_READ_REG(hw, E1000_RCTL);
3990	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3991
3992	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3993	    adapter->rx_abs_int_delay.value);
3994	/*
3995	 * Set the interrupt throttling rate. Value is calculated
3996	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3997	 */
3998	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
3999
4000	/*
4001	** When using MSIX interrupts we need to throttle
4002	** using the EITR register (82574 only)
4003	*/
4004	if (adapter->msix)
4005		for (int i = 0; i < 4; i++)
4006			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4007			    DEFAULT_ITR);
4008
4009	/* Disable accelerated ackknowledge */
4010	if (adapter->hw.mac.type == e1000_82574)
4011		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4012
4013	if (ifp->if_capenable & IFCAP_RXCSUM) {
4014		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4015		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4016		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4017	}
4018
4019	/*
4020	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4021	** long latencies are observed, like Lenovo X60. This
4022	** change eliminates the problem, but since having positive
4023	** values in RDTR is a known source of problems on other
4024	** platforms another solution is being sought.
4025	*/
4026	if (hw->mac.type == e1000_82573)
4027		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4028
4029	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4030		/* Setup the Base and Length of the Rx Descriptor Ring */
4031		bus_addr = rxr->rxdma.dma_paddr;
4032		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4033		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4034		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4035		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4036		/* Setup the Head and Tail Descriptor Pointers */
4037		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4038		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4039	}
4040
4041	/* Setup the Receive Control Register */
4042	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4043	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4044	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4045	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4046
4047        /* Strip the CRC */
4048        rctl |= E1000_RCTL_SECRC;
4049
4050        /* Make sure VLAN Filters are off */
4051        rctl &= ~E1000_RCTL_VFE;
4052	rctl &= ~E1000_RCTL_SBP;
4053	rctl |= E1000_RCTL_SZ_2048;
4054	if (ifp->if_mtu > ETHERMTU)
4055		rctl |= E1000_RCTL_LPE;
4056	else
4057		rctl &= ~E1000_RCTL_LPE;
4058
4059	/* Write out the settings */
4060	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4061
4062	return;
4063}
4064
4065
4066/*********************************************************************
4067 *
4068 *  This routine executes in interrupt context. It replenishes
4069 *  the mbufs in the descriptor and sends data which has been
4070 *  dma'ed into host memory to upper layer.
4071 *
4072 *  We loop at most count times if count is > 0, or until done if
4073 *  count < 0.
4074 *
4075 *  For polling we also now return the number of cleaned packets
4076 *********************************************************************/
4077static int
4078em_rxeof(struct rx_ring *rxr, int count)
4079{
4080	struct adapter		*adapter = rxr->adapter;
4081	struct ifnet		*ifp = adapter->ifp;;
4082	struct mbuf		*mp, *sendmp;
4083	u8			status;
4084	u16 			len;
4085	int			i, processed, rxdone = 0;
4086	bool			eop;
4087	struct e1000_rx_desc	*cur;
4088
4089	EM_RX_LOCK(rxr);
4090
4091	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4092
4093		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4094			break;
4095
4096		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4097		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4098
4099		cur = &rxr->rx_base[i];
4100		status = cur->status;
4101		mp = sendmp = NULL;
4102
4103		if ((status & E1000_RXD_STAT_DD) == 0)
4104			break;
4105
4106		len = le16toh(cur->length);
4107		eop = (status & E1000_RXD_STAT_EOP) != 0;
4108		count--;
4109
4110		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4111
4112			/* Assign correct length to the current fragment */
4113			mp = rxr->rx_buffers[i].m_head;
4114			mp->m_len = len;
4115
4116			if (rxr->fmp == NULL) {
4117				mp->m_pkthdr.len = len;
4118				rxr->fmp = mp; /* Store the first mbuf */
4119				rxr->lmp = mp;
4120			} else {
4121				/* Chain mbuf's together */
4122				mp->m_flags &= ~M_PKTHDR;
4123				rxr->lmp->m_next = mp;
4124				rxr->lmp = rxr->lmp->m_next;
4125				rxr->fmp->m_pkthdr.len += len;
4126			}
4127
4128			if (eop) {
4129				rxr->fmp->m_pkthdr.rcvif = ifp;
4130				ifp->if_ipackets++;
4131				em_receive_checksum(cur, rxr->fmp);
4132#ifndef __NO_STRICT_ALIGNMENT
4133				if (adapter->max_frame_size >
4134				    (MCLBYTES - ETHER_ALIGN) &&
4135				    em_fixup_rx(rxr) != 0)
4136					goto skip;
4137#endif
4138				if (status & E1000_RXD_STAT_VP) {
4139					rxr->fmp->m_pkthdr.ether_vtag =
4140					    (le16toh(cur->special) &
4141					    E1000_RXD_SPC_VLAN_MASK);
4142					rxr->fmp->m_flags |= M_VLANTAG;
4143				}
4144#ifndef __NO_STRICT_ALIGNMENT
4145skip:
4146#endif
4147				sendmp = rxr->fmp;
4148				rxr->fmp = NULL;
4149				rxr->lmp = NULL;
4150			}
4151		} else {
4152			ifp->if_ierrors++;
4153			/* Reuse loaded DMA map and just update mbuf chain */
4154			mp = rxr->rx_buffers[i].m_head;
4155			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4156			mp->m_data = mp->m_ext.ext_buf;
4157			mp->m_next = NULL;
4158			if (adapter->max_frame_size <=
4159			    (MCLBYTES - ETHER_ALIGN))
4160				m_adj(mp, ETHER_ALIGN);
4161			if (rxr->fmp != NULL) {
4162				m_freem(rxr->fmp);
4163				rxr->fmp = NULL;
4164				rxr->lmp = NULL;
4165			}
4166			sendmp = NULL;
4167		}
4168
4169		/* Zero out the receive descriptors status. */
4170		cur->status = 0;
4171		++rxdone;	/* cumulative for POLL */
4172		++processed;
4173
4174		/* Advance our pointers to the next descriptor. */
4175		if (++i == adapter->num_rx_desc)
4176			i = 0;
4177
4178		/* Send to the stack */
4179		if (sendmp != NULL)
4180			(*ifp->if_input)(ifp, sendmp);
4181
4182		/* Only refresh mbufs every 8 descriptors */
4183		if (processed == 8) {
4184			em_refresh_mbufs(rxr, i);
4185			processed = 0;
4186		}
4187	}
4188
4189	/* Catch any remaining refresh work */
4190	if (processed != 0) {
4191		em_refresh_mbufs(rxr, i);
4192		processed = 0;
4193	}
4194
4195	rxr->next_to_check = i;
4196
4197	EM_RX_UNLOCK(rxr);
4198	return (rxdone);
4199}
4200
4201#ifndef __NO_STRICT_ALIGNMENT
4202/*
4203 * When jumbo frames are enabled we should realign entire payload on
4204 * architecures with strict alignment. This is serious design mistake of 8254x
4205 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4206 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4207 * payload. On architecures without strict alignment restrictions 8254x still
4208 * performs unaligned memory access which would reduce the performance too.
4209 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4210 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4211 * existing mbuf chain.
4212 *
4213 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4214 * not used at all on architectures with strict alignment.
4215 */
4216static int
4217em_fixup_rx(struct rx_ring *rxr)
4218{
4219	struct adapter *adapter = rxr->adapter;
4220	struct mbuf *m, *n;
4221	int error;
4222
4223	error = 0;
4224	m = rxr->fmp;
4225	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4226		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4227		m->m_data += ETHER_HDR_LEN;
4228	} else {
4229		MGETHDR(n, M_DONTWAIT, MT_DATA);
4230		if (n != NULL) {
4231			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4232			m->m_data += ETHER_HDR_LEN;
4233			m->m_len -= ETHER_HDR_LEN;
4234			n->m_len = ETHER_HDR_LEN;
4235			M_MOVE_PKTHDR(n, m);
4236			n->m_next = m;
4237			rxr->fmp = n;
4238		} else {
4239			adapter->dropped_pkts++;
4240			m_freem(rxr->fmp);
4241			rxr->fmp = NULL;
4242			error = ENOMEM;
4243		}
4244	}
4245
4246	return (error);
4247}
4248#endif
4249
4250/*********************************************************************
4251 *
4252 *  Verify that the hardware indicated that the checksum is valid.
4253 *  Inform the stack about the status of checksum so that stack
4254 *  doesn't spend time verifying the checksum.
4255 *
4256 *********************************************************************/
4257static void
4258em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4259{
4260	/* Ignore Checksum bit is set */
4261	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4262		mp->m_pkthdr.csum_flags = 0;
4263		return;
4264	}
4265
4266	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4267		/* Did it pass? */
4268		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4269			/* IP Checksum Good */
4270			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4271			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4272
4273		} else {
4274			mp->m_pkthdr.csum_flags = 0;
4275		}
4276	}
4277
4278	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4279		/* Did it pass? */
4280		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4281			mp->m_pkthdr.csum_flags |=
4282			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4283			mp->m_pkthdr.csum_data = htons(0xffff);
4284		}
4285	}
4286}
4287
4288/*
4289 * This routine is run via an vlan
4290 * config EVENT
4291 */
4292static void
4293em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4294{
4295	struct adapter	*adapter = ifp->if_softc;
4296	u32		index, bit;
4297
4298	if (ifp->if_softc !=  arg)   /* Not our event */
4299		return;
4300
4301	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4302                return;
4303
4304	index = (vtag >> 5) & 0x7F;
4305	bit = vtag & 0x1F;
4306	em_shadow_vfta[index] |= (1 << bit);
4307	++adapter->num_vlans;
4308	/* Re-init to load the changes */
4309	em_init(adapter);
4310}
4311
4312/*
4313 * This routine is run via an vlan
4314 * unconfig EVENT
4315 */
4316static void
4317em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318{
4319	struct adapter	*adapter = ifp->if_softc;
4320	u32		index, bit;
4321
4322	if (ifp->if_softc !=  arg)
4323		return;
4324
4325	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4326                return;
4327
4328	index = (vtag >> 5) & 0x7F;
4329	bit = vtag & 0x1F;
4330	em_shadow_vfta[index] &= ~(1 << bit);
4331	--adapter->num_vlans;
4332	/* Re-init to load the changes */
4333	em_init(adapter);
4334}
4335
4336static void
4337em_setup_vlan_hw_support(struct adapter *adapter)
4338{
4339	struct e1000_hw *hw = &adapter->hw;
4340	u32             reg;
4341
4342	/*
4343	** We get here thru init_locked, meaning
4344	** a soft reset, this has already cleared
4345	** the VFTA and other state, so if there
4346	** have been no vlan's registered do nothing.
4347	*/
4348	if (adapter->num_vlans == 0)
4349                return;
4350
4351	/*
4352	** A soft reset zero's out the VFTA, so
4353	** we need to repopulate it now.
4354	*/
4355	for (int i = 0; i < EM_VFTA_SIZE; i++)
4356                if (em_shadow_vfta[i] != 0)
4357			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4358                            i, em_shadow_vfta[i]);
4359
4360	reg = E1000_READ_REG(hw, E1000_CTRL);
4361	reg |= E1000_CTRL_VME;
4362	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4363
4364	/* Enable the Filter Table */
4365	reg = E1000_READ_REG(hw, E1000_RCTL);
4366	reg &= ~E1000_RCTL_CFIEN;
4367	reg |= E1000_RCTL_VFE;
4368	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4369
4370	/* Update the frame size */
4371	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4372	    adapter->max_frame_size + VLAN_TAG_SIZE);
4373}
4374
4375static void
4376em_enable_intr(struct adapter *adapter)
4377{
4378	struct e1000_hw *hw = &adapter->hw;
4379	u32 ims_mask = IMS_ENABLE_MASK;
4380
4381	if (adapter->msix) {
4382		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4383		ims_mask |= EM_MSIX_MASK;
4384	}
4385	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4386}
4387
4388static void
4389em_disable_intr(struct adapter *adapter)
4390{
4391	struct e1000_hw *hw = &adapter->hw;
4392
4393	if (adapter->msix)
4394		E1000_WRITE_REG(hw, EM_EIAC, 0);
4395	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4396}
4397
4398/*
4399 * Bit of a misnomer, what this really means is
4400 * to enable OS management of the system... aka
4401 * to disable special hardware management features
4402 */
4403static void
4404em_init_manageability(struct adapter *adapter)
4405{
4406	/* A shared code workaround */
4407#define E1000_82542_MANC2H E1000_MANC2H
4408	if (adapter->has_manage) {
4409		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4410		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4411
4412		/* disable hardware interception of ARP */
4413		manc &= ~(E1000_MANC_ARP_EN);
4414
4415                /* enable receiving management packets to the host */
4416		manc |= E1000_MANC_EN_MNG2HOST;
4417#define E1000_MNG2HOST_PORT_623 (1 << 5)
4418#define E1000_MNG2HOST_PORT_664 (1 << 6)
4419		manc2h |= E1000_MNG2HOST_PORT_623;
4420		manc2h |= E1000_MNG2HOST_PORT_664;
4421		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4422		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4423	}
4424}
4425
4426/*
4427 * Give control back to hardware management
4428 * controller if there is one.
4429 */
4430static void
4431em_release_manageability(struct adapter *adapter)
4432{
4433	if (adapter->has_manage) {
4434		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4435
4436		/* re-enable hardware interception of ARP */
4437		manc |= E1000_MANC_ARP_EN;
4438		manc &= ~E1000_MANC_EN_MNG2HOST;
4439
4440		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4441	}
4442}
4443
4444/*
4445 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4446 * For ASF and Pass Through versions of f/w this means
4447 * that the driver is loaded. For AMT version type f/w
4448 * this means that the network i/f is open.
4449 */
4450static void
4451em_get_hw_control(struct adapter *adapter)
4452{
4453	u32 ctrl_ext, swsm;
4454
4455	if (adapter->hw.mac.type == e1000_82573) {
4456		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4457		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4458		    swsm | E1000_SWSM_DRV_LOAD);
4459		return;
4460	}
4461	/* else */
4462	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4463	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4464	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4465	return;
4466}
4467
4468/*
4469 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4470 * For ASF and Pass Through versions of f/w this means that
4471 * the driver is no longer loaded. For AMT versions of the
4472 * f/w this means that the network i/f is closed.
4473 */
4474static void
4475em_release_hw_control(struct adapter *adapter)
4476{
4477	u32 ctrl_ext, swsm;
4478
4479	if (!adapter->has_manage)
4480		return;
4481
4482	if (adapter->hw.mac.type == e1000_82573) {
4483		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4484		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4485		    swsm & ~E1000_SWSM_DRV_LOAD);
4486		return;
4487	}
4488	/* else */
4489	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4490	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4491	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4492	return;
4493}
4494
4495static int
4496em_is_valid_ether_addr(u8 *addr)
4497{
4498	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4499
4500	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4501		return (FALSE);
4502	}
4503
4504	return (TRUE);
4505}
4506
4507/*
4508** Parse the interface capabilities with regard
4509** to both system management and wake-on-lan for
4510** later use.
4511*/
4512static void
4513em_get_wakeup(device_t dev)
4514{
4515	struct adapter	*adapter = device_get_softc(dev);
4516	u16		eeprom_data = 0, device_id, apme_mask;
4517
4518	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4519	apme_mask = EM_EEPROM_APME;
4520
4521	switch (adapter->hw.mac.type) {
4522	case e1000_82573:
4523	case e1000_82583:
4524		adapter->has_amt = TRUE;
4525		/* Falls thru */
4526	case e1000_82571:
4527	case e1000_82572:
4528	case e1000_80003es2lan:
4529		if (adapter->hw.bus.func == 1) {
4530			e1000_read_nvm(&adapter->hw,
4531			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4532			break;
4533		} else
4534			e1000_read_nvm(&adapter->hw,
4535			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4536		break;
4537	case e1000_ich8lan:
4538	case e1000_ich9lan:
4539	case e1000_ich10lan:
4540	case e1000_pchlan:
4541		apme_mask = E1000_WUC_APME;
4542		adapter->has_amt = TRUE;
4543		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4544		break;
4545	default:
4546		e1000_read_nvm(&adapter->hw,
4547		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4548		break;
4549	}
4550	if (eeprom_data & apme_mask)
4551		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4552	/*
4553         * We have the eeprom settings, now apply the special cases
4554         * where the eeprom may be wrong or the board won't support
4555         * wake on lan on a particular port
4556	 */
4557	device_id = pci_get_device(dev);
4558        switch (device_id) {
4559	case E1000_DEV_ID_82571EB_FIBER:
4560		/* Wake events only supported on port A for dual fiber
4561		 * regardless of eeprom setting */
4562		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4563		    E1000_STATUS_FUNC_1)
4564			adapter->wol = 0;
4565		break;
4566	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4567	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4568	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4569                /* if quad port adapter, disable WoL on all but port A */
4570		if (global_quad_port_a != 0)
4571			adapter->wol = 0;
4572		/* Reset for multiple quad port adapters */
4573		if (++global_quad_port_a == 4)
4574			global_quad_port_a = 0;
4575                break;
4576	}
4577	return;
4578}
4579
4580
4581/*
4582 * Enable PCI Wake On Lan capability
4583 */
4584static void
4585em_enable_wakeup(device_t dev)
4586{
4587	struct adapter	*adapter = device_get_softc(dev);
4588	struct ifnet	*ifp = adapter->ifp;
4589	u32		pmc, ctrl, ctrl_ext, rctl;
4590	u16     	status;
4591
4592	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4593		return;
4594
4595	/* Advertise the wakeup capability */
4596	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4597	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4598	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4599	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4600
4601	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4602	    (adapter->hw.mac.type == e1000_pchlan) ||
4603	    (adapter->hw.mac.type == e1000_ich9lan) ||
4604	    (adapter->hw.mac.type == e1000_ich10lan)) {
4605		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4606		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4607	}
4608
4609	/* Keep the laser running on Fiber adapters */
4610	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4611	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4612		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4613		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4614		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4615	}
4616
4617	/*
4618	** Determine type of Wakeup: note that wol
4619	** is set with all bits on by default.
4620	*/
4621	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4622		adapter->wol &= ~E1000_WUFC_MAG;
4623
4624	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4625		adapter->wol &= ~E1000_WUFC_MC;
4626	else {
4627		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4628		rctl |= E1000_RCTL_MPE;
4629		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4630	}
4631
4632	if (adapter->hw.mac.type == e1000_pchlan) {
4633		if (em_enable_phy_wakeup(adapter))
4634			return;
4635	} else {
4636		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4637		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4638	}
4639
4640	if (adapter->hw.phy.type == e1000_phy_igp_3)
4641		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4642
4643        /* Request PME */
4644        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4645	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4646	if (ifp->if_capenable & IFCAP_WOL)
4647		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4648        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4649
4650	return;
4651}
4652
4653/*
4654** WOL in the newer chipset interfaces (pchlan)
4655** require thing to be copied into the phy
4656*/
4657static int
4658em_enable_phy_wakeup(struct adapter *adapter)
4659{
4660	struct e1000_hw *hw = &adapter->hw;
4661	u32 mreg, ret = 0;
4662	u16 preg;
4663
4664	/* copy MAC RARs to PHY RARs */
4665	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4666		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4667		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4668		e1000_write_phy_reg(hw, BM_RAR_M(i),
4669		    (u16)((mreg >> 16) & 0xFFFF));
4670		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4671		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4672		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4673		    (u16)((mreg >> 16) & 0xFFFF));
4674	}
4675
4676	/* copy MAC MTA to PHY MTA */
4677	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4678		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4679		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4680		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4681		    (u16)((mreg >> 16) & 0xFFFF));
4682	}
4683
4684	/* configure PHY Rx Control register */
4685	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4686	mreg = E1000_READ_REG(hw, E1000_RCTL);
4687	if (mreg & E1000_RCTL_UPE)
4688		preg |= BM_RCTL_UPE;
4689	if (mreg & E1000_RCTL_MPE)
4690		preg |= BM_RCTL_MPE;
4691	preg &= ~(BM_RCTL_MO_MASK);
4692	if (mreg & E1000_RCTL_MO_3)
4693		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4694				<< BM_RCTL_MO_SHIFT);
4695	if (mreg & E1000_RCTL_BAM)
4696		preg |= BM_RCTL_BAM;
4697	if (mreg & E1000_RCTL_PMCF)
4698		preg |= BM_RCTL_PMCF;
4699	mreg = E1000_READ_REG(hw, E1000_CTRL);
4700	if (mreg & E1000_CTRL_RFCE)
4701		preg |= BM_RCTL_RFCE;
4702	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4703
4704	/* enable PHY wakeup in MAC register */
4705	E1000_WRITE_REG(hw, E1000_WUC,
4706	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4707	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4708
4709	/* configure and enable PHY wakeup in PHY registers */
4710	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4711	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4712
4713	/* activate PHY wakeup */
4714	ret = hw->phy.ops.acquire(hw);
4715	if (ret) {
4716		printf("Could not acquire PHY\n");
4717		return ret;
4718	}
4719	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4720	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4721	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4722	if (ret) {
4723		printf("Could not read PHY page 769\n");
4724		goto out;
4725	}
4726	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4727	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4728	if (ret)
4729		printf("Could not set PHY Host Wakeup bit\n");
4730out:
4731	hw->phy.ops.release(hw);
4732
4733	return ret;
4734}
4735
4736static void
4737em_led_func(void *arg, int onoff)
4738{
4739	struct adapter	*adapter = arg;
4740
4741	EM_CORE_LOCK(adapter);
4742	if (onoff) {
4743		e1000_setup_led(&adapter->hw);
4744		e1000_led_on(&adapter->hw);
4745	} else {
4746		e1000_led_off(&adapter->hw);
4747		e1000_cleanup_led(&adapter->hw);
4748	}
4749	EM_CORE_UNLOCK(adapter);
4750}
4751
4752/**********************************************************************
4753 *
4754 *  Update the board statistics counters.
4755 *
4756 **********************************************************************/
4757static void
4758em_update_stats_counters(struct adapter *adapter)
4759{
4760	struct ifnet   *ifp;
4761
4762	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4763	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4764		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4765		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4766	}
4767	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4768	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4769	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4770	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4771
4772	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4773	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4774	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4775	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4776	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4777	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4778	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4779	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4780	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4781	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4782	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4783	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4784	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4785	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4786	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4787	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4788	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4789	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4790	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4791	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4792
4793	/* For the 64-bit byte counters the low dword must be read first. */
4794	/* Both registers clear on the read of the high dword */
4795
4796	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4797	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4798
4799	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4800	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4801	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4802	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4803	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4804
4805	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4806	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4807
4808	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4809	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4810	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4811	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4812	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4813	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4814	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4815	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4816	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4817	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4818
4819	if (adapter->hw.mac.type >= e1000_82543) {
4820		adapter->stats.algnerrc +=
4821		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4822		adapter->stats.rxerrc +=
4823		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4824		adapter->stats.tncrs +=
4825		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4826		adapter->stats.cexterr +=
4827		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4828		adapter->stats.tsctc +=
4829		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4830		adapter->stats.tsctfc +=
4831		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4832	}
4833	ifp = adapter->ifp;
4834
4835	ifp->if_collisions = adapter->stats.colc;
4836
4837	/* Rx Errors */
4838	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4839	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4840	    adapter->stats.ruc + adapter->stats.roc +
4841	    adapter->stats.mpc + adapter->stats.cexterr;
4842
4843	/* Tx Errors */
4844	ifp->if_oerrors = adapter->stats.ecol +
4845	    adapter->stats.latecol + adapter->watchdog_events;
4846}
4847
4848
4849/**********************************************************************
4850 *
4851 *  This routine is called only when em_display_debug_stats is enabled.
4852 *  This routine provides a way to take a look at important statistics
4853 *  maintained by the driver and hardware.
4854 *
4855 **********************************************************************/
4856static void
4857em_print_debug_info(struct adapter *adapter)
4858{
4859	device_t dev = adapter->dev;
4860	u8 *hw_addr = adapter->hw.hw_addr;
4861	struct rx_ring *rxr = adapter->rx_rings;
4862	struct tx_ring *txr = adapter->tx_rings;
4863
4864	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4865	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4866	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4867	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4868	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4869	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4870	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4871	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4872	    adapter->hw.fc.high_water,
4873	    adapter->hw.fc.low_water);
4874	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4875	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4876	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4877	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4878	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4879	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4880
4881	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4882		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4883		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4884		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4885		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4886		    txr->me, txr->no_desc_avail);
4887		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4888		    txr->me, txr->tx_irq);
4889		device_printf(dev, "Num Tx descriptors avail = %d\n",
4890		    txr->tx_avail);
4891		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4892		    txr->no_desc_avail);
4893	}
4894	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4895		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4896		    rxr->me, rxr->rx_irq);
4897		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4898		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4899		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4900	}
4901	device_printf(dev, "Std mbuf failed = %ld\n",
4902	    adapter->mbuf_alloc_failed);
4903	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4904	    adapter->mbuf_cluster_failed);
4905	device_printf(dev, "Driver dropped packets = %ld\n",
4906	    adapter->dropped_pkts);
4907}
4908
4909static void
4910em_print_hw_stats(struct adapter *adapter)
4911{
4912	device_t dev = adapter->dev;
4913
4914	device_printf(dev, "Excessive collisions = %lld\n",
4915	    (long long)adapter->stats.ecol);
4916#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4917	device_printf(dev, "Symbol errors = %lld\n",
4918	    (long long)adapter->stats.symerrs);
4919#endif
4920	device_printf(dev, "Sequence errors = %lld\n",
4921	    (long long)adapter->stats.sec);
4922	device_printf(dev, "Defer count = %lld\n",
4923	    (long long)adapter->stats.dc);
4924	device_printf(dev, "Missed Packets = %lld\n",
4925	    (long long)adapter->stats.mpc);
4926	device_printf(dev, "Receive No Buffers = %lld\n",
4927	    (long long)adapter->stats.rnbc);
4928	/* RLEC is inaccurate on some hardware, calculate our own. */
4929	device_printf(dev, "Receive Length Errors = %lld\n",
4930	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4931	device_printf(dev, "Receive errors = %lld\n",
4932	    (long long)adapter->stats.rxerrc);
4933	device_printf(dev, "Crc errors = %lld\n",
4934	    (long long)adapter->stats.crcerrs);
4935	device_printf(dev, "Alignment errors = %lld\n",
4936	    (long long)adapter->stats.algnerrc);
4937	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4938	    (long long)adapter->stats.cexterr);
4939	device_printf(dev, "watchdog timeouts = %ld\n",
4940	    adapter->watchdog_events);
4941	device_printf(dev, "XON Rcvd = %lld\n",
4942	    (long long)adapter->stats.xonrxc);
4943	device_printf(dev, "XON Xmtd = %lld\n",
4944	    (long long)adapter->stats.xontxc);
4945	device_printf(dev, "XOFF Rcvd = %lld\n",
4946	    (long long)adapter->stats.xoffrxc);
4947	device_printf(dev, "XOFF Xmtd = %lld\n",
4948	    (long long)adapter->stats.xofftxc);
4949	device_printf(dev, "Good Packets Rcvd = %lld\n",
4950	    (long long)adapter->stats.gprc);
4951	device_printf(dev, "Good Packets Xmtd = %lld\n",
4952	    (long long)adapter->stats.gptc);
4953	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4954	    (long long)adapter->stats.tsctc);
4955	device_printf(dev, "TSO Contexts Failed = %lld\n",
4956	    (long long)adapter->stats.tsctfc);
4957}
4958
4959/**********************************************************************
4960 *
4961 *  This routine provides a way to dump out the adapter eeprom,
4962 *  often a useful debug/service tool. This only dumps the first
4963 *  32 words, stuff that matters is in that extent.
4964 *
4965 **********************************************************************/
4966static void
4967em_print_nvm_info(struct adapter *adapter)
4968{
4969	u16	eeprom_data;
4970	int	i, j, row = 0;
4971
4972	/* Its a bit crude, but it gets the job done */
4973	printf("\nInterface EEPROM Dump:\n");
4974	printf("Offset\n0x0000  ");
4975	for (i = 0, j = 0; i < 32; i++, j++) {
4976		if (j == 8) { /* Make the offset block */
4977			j = 0; ++row;
4978			printf("\n0x00%x0  ",row);
4979		}
4980		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4981		printf("%04x ", eeprom_data);
4982	}
4983	printf("\n");
4984}
4985
4986static int
4987em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4988{
4989	struct adapter *adapter;
4990	int error;
4991	int result;
4992
4993	result = -1;
4994	error = sysctl_handle_int(oidp, &result, 0, req);
4995
4996	if (error || !req->newptr)
4997		return (error);
4998
4999	if (result == 1) {
5000		adapter = (struct adapter *)arg1;
5001		em_print_debug_info(adapter);
5002	}
5003	/*
5004	 * This value will cause a hex dump of the
5005	 * first 32 16-bit words of the EEPROM to
5006	 * the screen.
5007	 */
5008	if (result == 2) {
5009		adapter = (struct adapter *)arg1;
5010		em_print_nvm_info(adapter);
5011        }
5012
5013	return (error);
5014}
5015
5016
5017static int
5018em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5019{
5020	struct adapter *adapter;
5021	int error;
5022	int result;
5023
5024	result = -1;
5025	error = sysctl_handle_int(oidp, &result, 0, req);
5026
5027	if (error || !req->newptr)
5028		return (error);
5029
5030	if (result == 1) {
5031		adapter = (struct adapter *)arg1;
5032		em_print_hw_stats(adapter);
5033	}
5034
5035	return (error);
5036}
5037
5038static int
5039em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5040{
5041	struct em_int_delay_info *info;
5042	struct adapter *adapter;
5043	u32 regval;
5044	int error, usecs, ticks;
5045
5046	info = (struct em_int_delay_info *)arg1;
5047	usecs = info->value;
5048	error = sysctl_handle_int(oidp, &usecs, 0, req);
5049	if (error != 0 || req->newptr == NULL)
5050		return (error);
5051	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5052		return (EINVAL);
5053	info->value = usecs;
5054	ticks = EM_USECS_TO_TICKS(usecs);
5055
5056	adapter = info->adapter;
5057
5058	EM_CORE_LOCK(adapter);
5059	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5060	regval = (regval & ~0xffff) | (ticks & 0xffff);
5061	/* Handle a few special cases. */
5062	switch (info->offset) {
5063	case E1000_RDTR:
5064		break;
5065	case E1000_TIDV:
5066		if (ticks == 0) {
5067			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5068			/* Don't write 0 into the TIDV register. */
5069			regval++;
5070		} else
5071			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5072		break;
5073	}
5074	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5075	EM_CORE_UNLOCK(adapter);
5076	return (0);
5077}
5078
5079static void
5080em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5081	const char *description, struct em_int_delay_info *info,
5082	int offset, int value)
5083{
5084	info->adapter = adapter;
5085	info->offset = offset;
5086	info->value = value;
5087	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5088	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5089	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5090	    info, 0, em_sysctl_int_delay, "I", description);
5091}
5092
5093static void
5094em_add_rx_process_limit(struct adapter *adapter, const char *name,
5095	const char *description, int *limit, int value)
5096{
5097	*limit = value;
5098	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5099	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5100	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5101}
5102
5103
5104