if_em.c revision 206388
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 206388 2010-04-08 00:50:43Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.1";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#if __FreeBSD_version >= 800000
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350static int em_enable_msix = TRUE;
351static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359/* Flow control setting - default to FULL */
360static int em_fc_setting = e1000_fc_full;
361TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363/*
364** Shadow VFTA table, this is needed because
365** the real vlan filter table gets cleared during
366** a soft reset and the driver needs to be able
367** to repopulate it.
368*/
369static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371/* Global used in WOL setup with multiport cards */
372static int global_quad_port_a = 0;
373
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  em_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384em_probe(device_t dev)
385{
386	char		adapter_name[60];
387	u16		pci_vendor_id = 0;
388	u16		pci_device_id = 0;
389	u16		pci_subvendor_id = 0;
390	u16		pci_subdevice_id = 0;
391	em_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("em_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != EM_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = em_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				em_strings[ent->index],
415				em_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436em_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447	/* SYSCTL stuff */
448	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451	    em_sysctl_debug_info, "I", "Debug Information");
452
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_stats, "I", "Statistics");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/* Copy the permanent MAC address out of the EEPROM */
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610
611	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	**  Do interrupt configuration
619	*/
620	if (adapter->msix > 1) /* Do MSIX */
621		error = em_allocate_msix(adapter);
622	else  /* MSI or Legacy */
623		error = em_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/*
628	 * Get Wake-on-Lan and Management info for later use
629	 */
630	em_get_wakeup(dev);
631
632	/* Setup OS specific network interface */
633	em_setup_interface(dev, adapter);
634
635	em_reset(adapter);
636
637	/* Initialize statistics */
638	em_update_stats_counters(adapter);
639
640	adapter->hw.mac.get_link_status = 1;
641	em_update_link_status(adapter);
642
643	/* Indicate SOL/IDER usage */
644	if (e1000_check_reset_block(&adapter->hw))
645		device_printf(dev,
646		    "PHY reset is blocked due to SOL/IDER session.\n");
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	EM_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	em_stop(adapter);
711	EM_CORE_UNLOCK(adapter);
712	EM_CORE_LOCK_DESTROY(adapter);
713
714	e1000_phy_hw_reset(&adapter->hw);
715
716	em_release_manageability(adapter);
717	em_release_hw_control(adapter);
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	ether_ifdetach(adapter->ifp);
726	callout_drain(&adapter->timer);
727
728	em_free_pci_resources(adapter);
729	bus_generic_detach(dev);
730	if_free(ifp);
731
732	em_free_transmit_structures(adapter);
733	em_free_receive_structures(adapter);
734
735	em_release_hw_control(adapter);
736
737	return (0);
738}
739
740/*********************************************************************
741 *
742 *  Shutdown entry point
743 *
744 **********************************************************************/
745
746static int
747em_shutdown(device_t dev)
748{
749	return em_suspend(dev);
750}
751
752/*
753 * Suspend/resume device methods.
754 */
755static int
756em_suspend(device_t dev)
757{
758	struct adapter *adapter = device_get_softc(dev);
759
760	EM_CORE_LOCK(adapter);
761
762        em_release_manageability(adapter);
763	em_release_hw_control(adapter);
764	em_enable_wakeup(dev);
765
766	EM_CORE_UNLOCK(adapter);
767
768	return bus_generic_suspend(dev);
769}
770
771static int
772em_resume(device_t dev)
773{
774	struct adapter *adapter = device_get_softc(dev);
775	struct ifnet *ifp = adapter->ifp;
776
777	if (adapter->led_dev != NULL)
778		led_destroy(adapter->led_dev);
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#if __FreeBSD_version >= 800000
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815	enq = 0;
816	if (m == NULL)
817		next = drbr_dequeue(ifp, txr->br);
818	else
819		next = m;
820
821	/* Process the queue */
822	while (next != NULL) {
823		if ((err = em_xmit(txr, &next)) != 0) {
824                        if (next != NULL)
825                                err = drbr_enqueue(ifp, txr->br, next);
826                        break;
827		}
828		enq++;
829		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
830		ETHER_BPF_MTAP(ifp, next);
831		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
832                        break;
833		next = drbr_dequeue(ifp, txr->br);
834	}
835
836	if (enq > 0) {
837                /* Set the watchdog */
838                txr->watchdog_check = TRUE;
839	}
840	return (err);
841}
842
843/*
844** Multiqueue capable stack interface, this is not
845** yet truely multiqueue, but that is coming...
846*/
847static int
848em_mq_start(struct ifnet *ifp, struct mbuf *m)
849{
850	struct adapter	*adapter = ifp->if_softc;
851	struct tx_ring	*txr;
852	int 		i, error = 0;
853
854	/* Which queue to use */
855	if ((m->m_flags & M_FLOWID) != 0)
856                i = m->m_pkthdr.flowid % adapter->num_queues;
857	else
858		i = curcpu % adapter->num_queues;
859
860	txr = &adapter->tx_rings[i];
861
862	if (EM_TX_TRYLOCK(txr)) {
863		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
864			error = em_mq_start_locked(ifp, txr, m);
865		EM_TX_UNLOCK(txr);
866	} else
867		error = drbr_enqueue(ifp, txr->br, m);
868
869	return (error);
870}
871
872/*
873** Flush all ring buffers
874*/
875static void
876em_qflush(struct ifnet *ifp)
877{
878	struct adapter  *adapter = ifp->if_softc;
879	struct tx_ring  *txr = adapter->tx_rings;
880	struct mbuf     *m;
881
882	for (int i = 0; i < adapter->num_queues; i++, txr++) {
883		EM_TX_LOCK(txr);
884		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
885			m_freem(m);
886		EM_TX_UNLOCK(txr);
887	}
888	if_qflush(ifp);
889}
890
891#endif /* FreeBSD_version */
892
893static void
894em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
895{
896	struct adapter	*adapter = ifp->if_softc;
897	struct mbuf	*m_head;
898
899	EM_TX_LOCK_ASSERT(txr);
900
901	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
902	    IFF_DRV_RUNNING)
903		return;
904
905	if (!adapter->link_active)
906		return;
907
908	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
909
910                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
911		if (m_head == NULL)
912			break;
913		/*
914		 *  Encapsulation can modify our pointer, and or make it
915		 *  NULL on failure.  In that event, we can't requeue.
916		 */
917		if (em_xmit(txr, &m_head)) {
918			if (m_head == NULL)
919				break;
920			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
921			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
922			break;
923		}
924
925		/* Send a copy of the frame to the BPF listener */
926		ETHER_BPF_MTAP(ifp, m_head);
927
928		/* Set timeout in case hardware has problems transmitting. */
929		txr->watchdog_check = TRUE;
930	}
931
932	return;
933}
934
935static void
936em_start(struct ifnet *ifp)
937{
938	struct adapter	*adapter = ifp->if_softc;
939	struct tx_ring	*txr = adapter->tx_rings;
940
941	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
942		EM_TX_LOCK(txr);
943		em_start_locked(ifp, txr);
944		EM_TX_UNLOCK(txr);
945	}
946	return;
947}
948
949/*********************************************************************
950 *  Ioctl entry point
951 *
952 *  em_ioctl is called when the user wants to configure the
953 *  interface.
954 *
955 *  return 0 on success, positive on failure
956 **********************************************************************/
957
958static int
959em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
960{
961	struct adapter	*adapter = ifp->if_softc;
962	struct ifreq *ifr = (struct ifreq *)data;
963#ifdef INET
964	struct ifaddr *ifa = (struct ifaddr *)data;
965#endif
966	int error = 0;
967
968	if (adapter->in_detach)
969		return (error);
970
971	switch (command) {
972	case SIOCSIFADDR:
973#ifdef INET
974		if (ifa->ifa_addr->sa_family == AF_INET) {
975			/*
976			 * XXX
977			 * Since resetting hardware takes a very long time
978			 * and results in link renegotiation we only
979			 * initialize the hardware only when it is absolutely
980			 * required.
981			 */
982			ifp->if_flags |= IFF_UP;
983			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
984				EM_CORE_LOCK(adapter);
985				em_init_locked(adapter);
986				EM_CORE_UNLOCK(adapter);
987			}
988			arp_ifinit(ifp, ifa);
989		} else
990#endif
991			error = ether_ioctl(ifp, command, data);
992		break;
993	case SIOCSIFMTU:
994	    {
995		int max_frame_size;
996
997		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
998
999		EM_CORE_LOCK(adapter);
1000		switch (adapter->hw.mac.type) {
1001		case e1000_82571:
1002		case e1000_82572:
1003		case e1000_ich9lan:
1004		case e1000_ich10lan:
1005		case e1000_82574:
1006		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1007			max_frame_size = 9234;
1008			break;
1009		case e1000_pchlan:
1010			max_frame_size = 4096;
1011			break;
1012			/* Adapters that do not support jumbo frames */
1013		case e1000_82583:
1014		case e1000_ich8lan:
1015			max_frame_size = ETHER_MAX_LEN;
1016			break;
1017		default:
1018			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1019		}
1020		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1021		    ETHER_CRC_LEN) {
1022			EM_CORE_UNLOCK(adapter);
1023			error = EINVAL;
1024			break;
1025		}
1026
1027		ifp->if_mtu = ifr->ifr_mtu;
1028		adapter->max_frame_size =
1029		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1030		em_init_locked(adapter);
1031		EM_CORE_UNLOCK(adapter);
1032		break;
1033	    }
1034	case SIOCSIFFLAGS:
1035		IOCTL_DEBUGOUT("ioctl rcv'd:\
1036		    SIOCSIFFLAGS (Set Interface Flags)");
1037		EM_CORE_LOCK(adapter);
1038		if (ifp->if_flags & IFF_UP) {
1039			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1040				if ((ifp->if_flags ^ adapter->if_flags) &
1041				    (IFF_PROMISC | IFF_ALLMULTI)) {
1042					em_disable_promisc(adapter);
1043					em_set_promisc(adapter);
1044				}
1045			} else
1046				em_init_locked(adapter);
1047		} else
1048			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1049				em_stop(adapter);
1050		adapter->if_flags = ifp->if_flags;
1051		EM_CORE_UNLOCK(adapter);
1052		break;
1053	case SIOCADDMULTI:
1054	case SIOCDELMULTI:
1055		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1056		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1057			EM_CORE_LOCK(adapter);
1058			em_disable_intr(adapter);
1059			em_set_multi(adapter);
1060#ifdef DEVICE_POLLING
1061			if (!(ifp->if_capenable & IFCAP_POLLING))
1062#endif
1063				em_enable_intr(adapter);
1064			EM_CORE_UNLOCK(adapter);
1065		}
1066		break;
1067	case SIOCSIFMEDIA:
1068		/* Check SOL/IDER usage */
1069		EM_CORE_LOCK(adapter);
1070		if (e1000_check_reset_block(&adapter->hw)) {
1071			EM_CORE_UNLOCK(adapter);
1072			device_printf(adapter->dev, "Media change is"
1073			    " blocked due to SOL/IDER session.\n");
1074			break;
1075		}
1076		EM_CORE_UNLOCK(adapter);
1077	case SIOCGIFMEDIA:
1078		IOCTL_DEBUGOUT("ioctl rcv'd: \
1079		    SIOCxIFMEDIA (Get/Set Interface Media)");
1080		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1081		break;
1082	case SIOCSIFCAP:
1083	    {
1084		int mask, reinit;
1085
1086		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1087		reinit = 0;
1088		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1089#ifdef DEVICE_POLLING
1090		if (mask & IFCAP_POLLING) {
1091			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1092				error = ether_poll_register(em_poll, ifp);
1093				if (error)
1094					return (error);
1095				EM_CORE_LOCK(adapter);
1096				em_disable_intr(adapter);
1097				ifp->if_capenable |= IFCAP_POLLING;
1098				EM_CORE_UNLOCK(adapter);
1099			} else {
1100				error = ether_poll_deregister(ifp);
1101				/* Enable interrupt even in error case */
1102				EM_CORE_LOCK(adapter);
1103				em_enable_intr(adapter);
1104				ifp->if_capenable &= ~IFCAP_POLLING;
1105				EM_CORE_UNLOCK(adapter);
1106			}
1107		}
1108#endif
1109		if (mask & IFCAP_HWCSUM) {
1110			ifp->if_capenable ^= IFCAP_HWCSUM;
1111			reinit = 1;
1112		}
1113		if (mask & IFCAP_TSO4) {
1114			ifp->if_capenable ^= IFCAP_TSO4;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_VLAN_HWTAGGING) {
1118			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1119			reinit = 1;
1120		}
1121		if (mask & IFCAP_VLAN_HWFILTER) {
1122			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1123			reinit = 1;
1124		}
1125		if ((mask & IFCAP_WOL) &&
1126		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1127			if (mask & IFCAP_WOL_MCAST)
1128				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1129			if (mask & IFCAP_WOL_MAGIC)
1130				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1131		}
1132		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1133			em_init(adapter);
1134		VLAN_CAPABILITIES(ifp);
1135		break;
1136	    }
1137
1138	default:
1139		error = ether_ioctl(ifp, command, data);
1140		break;
1141	}
1142
1143	return (error);
1144}
1145
1146
1147/*********************************************************************
1148 *  Init entry point
1149 *
1150 *  This routine is used in two ways. It is used by the stack as
1151 *  init entry point in network interface structure. It is also used
1152 *  by the driver as a hw/sw initialization routine to get to a
1153 *  consistent state.
1154 *
1155 *  return 0 on success, positive on failure
1156 **********************************************************************/
1157
1158static void
1159em_init_locked(struct adapter *adapter)
1160{
1161	struct ifnet	*ifp = adapter->ifp;
1162	device_t	dev = adapter->dev;
1163	u32		pba;
1164
1165	INIT_DEBUGOUT("em_init: begin");
1166
1167	EM_CORE_LOCK_ASSERT(adapter);
1168
1169	em_disable_intr(adapter);
1170	callout_stop(&adapter->timer);
1171
1172	/*
1173	 * Packet Buffer Allocation (PBA)
1174	 * Writing PBA sets the receive portion of the buffer
1175	 * the remainder is used for the transmit buffer.
1176	 */
1177	switch (adapter->hw.mac.type) {
1178	/* Total Packet Buffer on these is 48K */
1179	case e1000_82571:
1180	case e1000_82572:
1181	case e1000_80003es2lan:
1182			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1183		break;
1184	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1185			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1186		break;
1187	case e1000_82574:
1188	case e1000_82583:
1189			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1190		break;
1191	case e1000_ich9lan:
1192	case e1000_ich10lan:
1193	case e1000_pchlan:
1194		pba = E1000_PBA_10K;
1195		break;
1196	case e1000_ich8lan:
1197		pba = E1000_PBA_8K;
1198		break;
1199	default:
1200		if (adapter->max_frame_size > 8192)
1201			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1202		else
1203			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1204	}
1205
1206	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1207	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1208
1209	/* Get the latest mac address, User can use a LAA */
1210        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1211              ETHER_ADDR_LEN);
1212
1213	/* Put the address into the Receive Address Array */
1214	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1215
1216	/*
1217	 * With the 82571 adapter, RAR[0] may be overwritten
1218	 * when the other port is reset, we make a duplicate
1219	 * in RAR[14] for that eventuality, this assures
1220	 * the interface continues to function.
1221	 */
1222	if (adapter->hw.mac.type == e1000_82571) {
1223		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1224		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1225		    E1000_RAR_ENTRIES - 1);
1226	}
1227
1228	/* Initialize the hardware */
1229	em_reset(adapter);
1230	em_update_link_status(adapter);
1231
1232	/* Setup VLAN support, basic and offload if available */
1233	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1234
1235	/* Use real VLAN Filter support? */
1236	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1237		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1238			/* Use real VLAN Filter support */
1239			em_setup_vlan_hw_support(adapter);
1240		else {
1241			u32 ctrl;
1242			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1243			ctrl |= E1000_CTRL_VME;
1244			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1245		}
1246	}
1247
1248	/* Set hardware offload abilities */
1249	ifp->if_hwassist = 0;
1250	if (ifp->if_capenable & IFCAP_TXCSUM)
1251		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1252	if (ifp->if_capenable & IFCAP_TSO4)
1253		ifp->if_hwassist |= CSUM_TSO;
1254
1255	/* Configure for OS presence */
1256	em_init_manageability(adapter);
1257
1258	/* Prepare transmit descriptors and buffers */
1259	em_setup_transmit_structures(adapter);
1260	em_initialize_transmit_unit(adapter);
1261
1262	/* Setup Multicast table */
1263	em_set_multi(adapter);
1264
1265	/* Prepare receive descriptors and buffers */
1266	if (em_setup_receive_structures(adapter)) {
1267		device_printf(dev, "Could not setup receive structures\n");
1268		em_stop(adapter);
1269		return;
1270	}
1271	em_initialize_receive_unit(adapter);
1272
1273	/* Don't lose promiscuous settings */
1274	em_set_promisc(adapter);
1275
1276	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1277	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1278
1279	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1280	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1281
1282	/* MSI/X configuration for 82574 */
1283	if (adapter->hw.mac.type == e1000_82574) {
1284		int tmp;
1285		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1286		tmp |= E1000_CTRL_EXT_PBA_CLR;
1287		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1288		/* Set the IVAR - interrupt vector routing. */
1289		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1290	}
1291
1292#ifdef DEVICE_POLLING
1293	/*
1294	 * Only enable interrupts if we are not polling, make sure
1295	 * they are off otherwise.
1296	 */
1297	if (ifp->if_capenable & IFCAP_POLLING)
1298		em_disable_intr(adapter);
1299	else
1300#endif /* DEVICE_POLLING */
1301		em_enable_intr(adapter);
1302
1303	/* AMT based hardware can now take control from firmware */
1304	if (adapter->has_manage && adapter->has_amt)
1305		em_get_hw_control(adapter);
1306
1307	/* Don't reset the phy next time init gets called */
1308	adapter->hw.phy.reset_disable = TRUE;
1309}
1310
1311static void
1312em_init(void *arg)
1313{
1314	struct adapter *adapter = arg;
1315
1316	EM_CORE_LOCK(adapter);
1317	em_init_locked(adapter);
1318	EM_CORE_UNLOCK(adapter);
1319}
1320
1321
1322#ifdef DEVICE_POLLING
1323/*********************************************************************
1324 *
1325 *  Legacy polling routine: note this only works with single queue
1326 *
1327 *********************************************************************/
1328static int
1329em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1330{
1331	struct adapter *adapter = ifp->if_softc;
1332	struct tx_ring	*txr = adapter->tx_rings;
1333	struct rx_ring	*rxr = adapter->rx_rings;
1334	u32		reg_icr, rx_done = 0;
1335
1336	EM_CORE_LOCK(adapter);
1337	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1338		EM_CORE_UNLOCK(adapter);
1339		return (rx_done);
1340	}
1341
1342	if (cmd == POLL_AND_CHECK_STATUS) {
1343		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1344		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1345			callout_stop(&adapter->timer);
1346			adapter->hw.mac.get_link_status = 1;
1347			em_update_link_status(adapter);
1348			callout_reset(&adapter->timer, hz,
1349			    em_local_timer, adapter);
1350		}
1351	}
1352	EM_CORE_UNLOCK(adapter);
1353
1354	rx_done = em_rxeof(rxr, count);
1355
1356	EM_TX_LOCK(txr);
1357	em_txeof(txr);
1358#if __FreeBSD_version >= 800000
1359	if (!drbr_empty(ifp, txr->br))
1360		em_mq_start_locked(ifp, txr, NULL);
1361#else
1362	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1363		em_start_locked(ifp, txr);
1364#endif
1365	EM_TX_UNLOCK(txr);
1366
1367	return (rx_done);
1368}
1369#endif /* DEVICE_POLLING */
1370
1371
1372/*********************************************************************
1373 *
1374 *  Fast Legacy/MSI Combined Interrupt Service routine
1375 *
1376 *********************************************************************/
1377static int
1378em_irq_fast(void *arg)
1379{
1380	struct adapter	*adapter = arg;
1381	struct ifnet	*ifp;
1382	u32		reg_icr;
1383
1384	ifp = adapter->ifp;
1385
1386	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1387
1388	/* Hot eject?  */
1389	if (reg_icr == 0xffffffff)
1390		return FILTER_STRAY;
1391
1392	/* Definitely not our interrupt.  */
1393	if (reg_icr == 0x0)
1394		return FILTER_STRAY;
1395
1396	/*
1397	 * Starting with the 82571 chip, bit 31 should be used to
1398	 * determine whether the interrupt belongs to us.
1399	 */
1400	if (adapter->hw.mac.type >= e1000_82571 &&
1401	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1402		return FILTER_STRAY;
1403
1404	em_disable_intr(adapter);
1405	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1406
1407	/* Link status change */
1408	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1409		adapter->hw.mac.get_link_status = 1;
1410		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1411	}
1412
1413	if (reg_icr & E1000_ICR_RXO)
1414		adapter->rx_overruns++;
1415	return FILTER_HANDLED;
1416}
1417
1418/* Combined RX/TX handler, used by Legacy and MSI */
1419static void
1420em_handle_que(void *context, int pending)
1421{
1422	struct adapter	*adapter = context;
1423	struct ifnet	*ifp = adapter->ifp;
1424	struct tx_ring	*txr = adapter->tx_rings;
1425	struct rx_ring	*rxr = adapter->rx_rings;
1426	u32		loop = EM_MAX_LOOP;
1427	bool		more_rx, more_tx;
1428
1429
1430	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1431		EM_TX_LOCK(txr);
1432		do {
1433			more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1434			more_tx = em_txeof(txr);
1435		} while (loop-- && (more_rx || more_tx));
1436
1437#if __FreeBSD_version >= 800000
1438		if (!drbr_empty(ifp, txr->br))
1439			em_mq_start_locked(ifp, txr, NULL);
1440#else
1441		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1442			em_start_locked(ifp, txr);
1443#endif
1444		if (more_rx || more_tx)
1445			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1446
1447		EM_TX_UNLOCK(txr);
1448	}
1449
1450	em_enable_intr(adapter);
1451	return;
1452}
1453
1454
1455/*********************************************************************
1456 *
1457 *  MSIX Interrupt Service Routines
1458 *
1459 **********************************************************************/
1460static void
1461em_msix_tx(void *arg)
1462{
1463	struct tx_ring *txr = arg;
1464	struct adapter *adapter = txr->adapter;
1465	bool		more;
1466
1467	++txr->tx_irq;
1468	EM_TX_LOCK(txr);
1469	more = em_txeof(txr);
1470	EM_TX_UNLOCK(txr);
1471	if (more)
1472		taskqueue_enqueue(txr->tq, &txr->tx_task);
1473	else
1474		/* Reenable this interrupt */
1475		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1476	return;
1477}
1478
1479/*********************************************************************
1480 *
1481 *  MSIX RX Interrupt Service routine
1482 *
1483 **********************************************************************/
1484
1485static void
1486em_msix_rx(void *arg)
1487{
1488	struct rx_ring	*rxr = arg;
1489	struct adapter	*adapter = rxr->adapter;
1490	bool		more;
1491
1492	++rxr->rx_irq;
1493	more = em_rxeof(rxr, adapter->rx_process_limit);
1494	if (more)
1495		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1496	else
1497		/* Reenable this interrupt */
1498		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1499	return;
1500}
1501
1502/*********************************************************************
1503 *
1504 *  MSIX Link Fast Interrupt Service routine
1505 *
1506 **********************************************************************/
1507static void
1508em_msix_link(void *arg)
1509{
1510	struct adapter	*adapter = arg;
1511	u32		reg_icr;
1512
1513	++adapter->link_irq;
1514	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1515
1516	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1517		adapter->hw.mac.get_link_status = 1;
1518		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1519	} else
1520		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1521		    EM_MSIX_LINK | E1000_IMS_LSC);
1522	return;
1523}
1524
1525static void
1526em_handle_rx(void *context, int pending)
1527{
1528	struct rx_ring	*rxr = context;
1529	struct adapter	*adapter = rxr->adapter;
1530	u32		loop = EM_MAX_LOOP;
1531        bool            more;
1532
1533        do {
1534		more = em_rxeof(rxr, adapter->rx_process_limit);
1535        } while (loop-- && more);
1536        /* Reenable this interrupt */
1537	E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1538}
1539
1540static void
1541em_handle_tx(void *context, int pending)
1542{
1543	struct tx_ring	*txr = context;
1544	struct adapter	*adapter = txr->adapter;
1545	struct ifnet	*ifp = adapter->ifp;
1546	u32		loop = EM_MAX_LOOP;
1547        bool            more;
1548
1549	if (!EM_TX_TRYLOCK(txr))
1550		return;
1551	do {
1552		more = em_txeof(txr);
1553	} while (loop-- && more);
1554
1555#if __FreeBSD_version >= 800000
1556	if (!drbr_empty(ifp, txr->br))
1557		em_mq_start_locked(ifp, txr, NULL);
1558#else
1559	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1560		em_start_locked(ifp, txr);
1561#endif
1562	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1563	EM_TX_UNLOCK(txr);
1564}
1565
1566static void
1567em_handle_link(void *context, int pending)
1568{
1569	struct adapter	*adapter = context;
1570	struct ifnet *ifp = adapter->ifp;
1571
1572	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1573		return;
1574
1575	EM_CORE_LOCK(adapter);
1576	callout_stop(&adapter->timer);
1577	em_update_link_status(adapter);
1578	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1579	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1580	    EM_MSIX_LINK | E1000_IMS_LSC);
1581	EM_CORE_UNLOCK(adapter);
1582}
1583
1584
1585/*********************************************************************
1586 *
1587 *  Media Ioctl callback
1588 *
1589 *  This routine is called whenever the user queries the status of
1590 *  the interface using ifconfig.
1591 *
1592 **********************************************************************/
1593static void
1594em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1595{
1596	struct adapter *adapter = ifp->if_softc;
1597	u_char fiber_type = IFM_1000_SX;
1598
1599	INIT_DEBUGOUT("em_media_status: begin");
1600
1601	EM_CORE_LOCK(adapter);
1602	em_update_link_status(adapter);
1603
1604	ifmr->ifm_status = IFM_AVALID;
1605	ifmr->ifm_active = IFM_ETHER;
1606
1607	if (!adapter->link_active) {
1608		EM_CORE_UNLOCK(adapter);
1609		return;
1610	}
1611
1612	ifmr->ifm_status |= IFM_ACTIVE;
1613
1614	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1615	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1616		ifmr->ifm_active |= fiber_type | IFM_FDX;
1617	} else {
1618		switch (adapter->link_speed) {
1619		case 10:
1620			ifmr->ifm_active |= IFM_10_T;
1621			break;
1622		case 100:
1623			ifmr->ifm_active |= IFM_100_TX;
1624			break;
1625		case 1000:
1626			ifmr->ifm_active |= IFM_1000_T;
1627			break;
1628		}
1629		if (adapter->link_duplex == FULL_DUPLEX)
1630			ifmr->ifm_active |= IFM_FDX;
1631		else
1632			ifmr->ifm_active |= IFM_HDX;
1633	}
1634	EM_CORE_UNLOCK(adapter);
1635}
1636
1637/*********************************************************************
1638 *
1639 *  Media Ioctl callback
1640 *
1641 *  This routine is called when the user changes speed/duplex using
1642 *  media/mediopt option with ifconfig.
1643 *
1644 **********************************************************************/
1645static int
1646em_media_change(struct ifnet *ifp)
1647{
1648	struct adapter *adapter = ifp->if_softc;
1649	struct ifmedia  *ifm = &adapter->media;
1650
1651	INIT_DEBUGOUT("em_media_change: begin");
1652
1653	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1654		return (EINVAL);
1655
1656	EM_CORE_LOCK(adapter);
1657	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1658	case IFM_AUTO:
1659		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1660		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1661		break;
1662	case IFM_1000_LX:
1663	case IFM_1000_SX:
1664	case IFM_1000_T:
1665		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1666		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1667		break;
1668	case IFM_100_TX:
1669		adapter->hw.mac.autoneg = FALSE;
1670		adapter->hw.phy.autoneg_advertised = 0;
1671		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1672			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1673		else
1674			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1675		break;
1676	case IFM_10_T:
1677		adapter->hw.mac.autoneg = FALSE;
1678		adapter->hw.phy.autoneg_advertised = 0;
1679		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1680			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1681		else
1682			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1683		break;
1684	default:
1685		device_printf(adapter->dev, "Unsupported media type\n");
1686	}
1687
1688	/* As the speed/duplex settings my have changed we need to
1689	 * reset the PHY.
1690	 */
1691	adapter->hw.phy.reset_disable = FALSE;
1692
1693	em_init_locked(adapter);
1694	EM_CORE_UNLOCK(adapter);
1695
1696	return (0);
1697}
1698
1699/*********************************************************************
1700 *
1701 *  This routine maps the mbufs to tx descriptors.
1702 *
1703 *  return 0 on success, positive on failure
1704 **********************************************************************/
1705
1706static int
1707em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1708{
1709	struct adapter		*adapter = txr->adapter;
1710	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1711	bus_dmamap_t		map;
1712	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1713	struct e1000_tx_desc	*ctxd = NULL;
1714	struct mbuf		*m_head;
1715	u32			txd_upper, txd_lower, txd_used, txd_saved;
1716	int			nsegs, i, j, first, last = 0;
1717	int			error, do_tso, tso_desc = 0;
1718
1719	m_head = *m_headp;
1720	txd_upper = txd_lower = txd_used = txd_saved = 0;
1721	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1722
1723        /*
1724         * Force a cleanup if number of TX descriptors
1725         * available hits the threshold
1726         */
1727	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1728		em_txeof(txr);
1729
1730	/*
1731	 * TSO workaround:
1732	 *  If an mbuf is only header we need
1733	 *     to pull 4 bytes of data into it.
1734	 */
1735	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1736		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1737		*m_headp = m_head;
1738		if (m_head == NULL)
1739			return (ENOBUFS);
1740	}
1741
1742	/*
1743	 * Map the packet for DMA
1744	 *
1745	 * Capture the first descriptor index,
1746	 * this descriptor will have the index
1747	 * of the EOP which is the only one that
1748	 * now gets a DONE bit writeback.
1749	 */
1750	first = txr->next_avail_desc;
1751	tx_buffer = &txr->tx_buffers[first];
1752	tx_buffer_mapped = tx_buffer;
1753	map = tx_buffer->map;
1754
1755	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1756	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1757
1758	/*
1759	 * There are two types of errors we can (try) to handle:
1760	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1761	 *   out of segments.  Defragment the mbuf chain and try again.
1762	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1763	 *   at this point in time.  Defer sending and try again later.
1764	 * All other errors, in particular EINVAL, are fatal and prevent the
1765	 * mbuf chain from ever going through.  Drop it and report error.
1766	 */
1767	if (error == EFBIG) {
1768		struct mbuf *m;
1769
1770		m = m_defrag(*m_headp, M_DONTWAIT);
1771		if (m == NULL) {
1772			adapter->mbuf_alloc_failed++;
1773			m_freem(*m_headp);
1774			*m_headp = NULL;
1775			return (ENOBUFS);
1776		}
1777		*m_headp = m;
1778
1779		/* Try it again */
1780		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1781		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1782
1783		if (error) {
1784			adapter->no_tx_dma_setup++;
1785			m_freem(*m_headp);
1786			*m_headp = NULL;
1787			return (error);
1788		}
1789	} else if (error != 0) {
1790		adapter->no_tx_dma_setup++;
1791		return (error);
1792	}
1793
1794	/*
1795	 * TSO Hardware workaround, if this packet is not
1796	 * TSO, and is only a single descriptor long, and
1797	 * it follows a TSO burst, then we need to add a
1798	 * sentinel descriptor to prevent premature writeback.
1799	 */
1800	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1801		if (nsegs == 1)
1802			tso_desc = TRUE;
1803		txr->tx_tso = FALSE;
1804	}
1805
1806        if (nsegs > (txr->tx_avail - 2)) {
1807                txr->no_desc_avail++;
1808		bus_dmamap_unload(txr->txtag, map);
1809		return (ENOBUFS);
1810        }
1811	m_head = *m_headp;
1812
1813	/* Do hardware assists */
1814#if __FreeBSD_version >= 700000
1815	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1816		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1817		if (error != TRUE)
1818			return (ENXIO); /* something foobar */
1819		/* we need to make a final sentinel transmit desc */
1820		tso_desc = TRUE;
1821	} else
1822#endif
1823	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1824		em_transmit_checksum_setup(txr,  m_head,
1825		    &txd_upper, &txd_lower);
1826
1827	i = txr->next_avail_desc;
1828
1829	/* Set up our transmit descriptors */
1830	for (j = 0; j < nsegs; j++) {
1831		bus_size_t seg_len;
1832		bus_addr_t seg_addr;
1833
1834		tx_buffer = &txr->tx_buffers[i];
1835		ctxd = &txr->tx_base[i];
1836		seg_addr = segs[j].ds_addr;
1837		seg_len  = segs[j].ds_len;
1838		/*
1839		** TSO Workaround:
1840		** If this is the last descriptor, we want to
1841		** split it so we have a small final sentinel
1842		*/
1843		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1844			seg_len -= 4;
1845			ctxd->buffer_addr = htole64(seg_addr);
1846			ctxd->lower.data = htole32(
1847			adapter->txd_cmd | txd_lower | seg_len);
1848			ctxd->upper.data =
1849			    htole32(txd_upper);
1850			if (++i == adapter->num_tx_desc)
1851				i = 0;
1852			/* Now make the sentinel */
1853			++txd_used; /* using an extra txd */
1854			ctxd = &txr->tx_base[i];
1855			tx_buffer = &txr->tx_buffers[i];
1856			ctxd->buffer_addr =
1857			    htole64(seg_addr + seg_len);
1858			ctxd->lower.data = htole32(
1859			adapter->txd_cmd | txd_lower | 4);
1860			ctxd->upper.data =
1861			    htole32(txd_upper);
1862			last = i;
1863			if (++i == adapter->num_tx_desc)
1864				i = 0;
1865		} else {
1866			ctxd->buffer_addr = htole64(seg_addr);
1867			ctxd->lower.data = htole32(
1868			adapter->txd_cmd | txd_lower | seg_len);
1869			ctxd->upper.data =
1870			    htole32(txd_upper);
1871			last = i;
1872			if (++i == adapter->num_tx_desc)
1873				i = 0;
1874		}
1875		tx_buffer->m_head = NULL;
1876		tx_buffer->next_eop = -1;
1877	}
1878
1879	txr->next_avail_desc = i;
1880	txr->tx_avail -= nsegs;
1881	if (tso_desc) /* TSO used an extra for sentinel */
1882		txr->tx_avail -= txd_used;
1883
1884	if (m_head->m_flags & M_VLANTAG) {
1885		/* Set the vlan id. */
1886		ctxd->upper.fields.special =
1887		    htole16(m_head->m_pkthdr.ether_vtag);
1888                /* Tell hardware to add tag */
1889                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1890        }
1891
1892        tx_buffer->m_head = m_head;
1893	tx_buffer_mapped->map = tx_buffer->map;
1894	tx_buffer->map = map;
1895        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1896
1897        /*
1898         * Last Descriptor of Packet
1899	 * needs End Of Packet (EOP)
1900	 * and Report Status (RS)
1901         */
1902        ctxd->lower.data |=
1903	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1904	/*
1905	 * Keep track in the first buffer which
1906	 * descriptor will be written back
1907	 */
1908	tx_buffer = &txr->tx_buffers[first];
1909	tx_buffer->next_eop = last;
1910
1911	/*
1912	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1913	 * that this frame is available to transmit.
1914	 */
1915	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1916	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1917	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1918
1919	return (0);
1920}
1921
1922static void
1923em_set_promisc(struct adapter *adapter)
1924{
1925	struct ifnet	*ifp = adapter->ifp;
1926	u32		reg_rctl;
1927
1928	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1929
1930	if (ifp->if_flags & IFF_PROMISC) {
1931		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1932		/* Turn this on if you want to see bad packets */
1933		if (em_debug_sbp)
1934			reg_rctl |= E1000_RCTL_SBP;
1935		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1936	} else if (ifp->if_flags & IFF_ALLMULTI) {
1937		reg_rctl |= E1000_RCTL_MPE;
1938		reg_rctl &= ~E1000_RCTL_UPE;
1939		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1940	}
1941}
1942
1943static void
1944em_disable_promisc(struct adapter *adapter)
1945{
1946	u32	reg_rctl;
1947
1948	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1949
1950	reg_rctl &=  (~E1000_RCTL_UPE);
1951	reg_rctl &=  (~E1000_RCTL_MPE);
1952	reg_rctl &=  (~E1000_RCTL_SBP);
1953	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1954}
1955
1956
1957/*********************************************************************
1958 *  Multicast Update
1959 *
1960 *  This routine is called whenever multicast address list is updated.
1961 *
1962 **********************************************************************/
1963
1964static void
1965em_set_multi(struct adapter *adapter)
1966{
1967	struct ifnet	*ifp = adapter->ifp;
1968	struct ifmultiaddr *ifma;
1969	u32 reg_rctl = 0;
1970	u8  *mta; /* Multicast array memory */
1971	int mcnt = 0;
1972
1973	IOCTL_DEBUGOUT("em_set_multi: begin");
1974
1975	if (adapter->hw.mac.type == e1000_82542 &&
1976	    adapter->hw.revision_id == E1000_REVISION_2) {
1977		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1978		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1979			e1000_pci_clear_mwi(&adapter->hw);
1980		reg_rctl |= E1000_RCTL_RST;
1981		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1982		msec_delay(5);
1983	}
1984
1985	/* Allocate temporary memory to setup array */
1986	mta = malloc(sizeof(u8) *
1987	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
1988	    M_DEVBUF, M_NOWAIT | M_ZERO);
1989	if (mta == NULL)
1990		panic("em_set_multi memory failure\n");
1991
1992#if __FreeBSD_version < 800000
1993	IF_ADDR_LOCK(ifp);
1994#else
1995	if_maddr_rlock(ifp);
1996#endif
1997	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1998		if (ifma->ifma_addr->sa_family != AF_LINK)
1999			continue;
2000
2001		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2002			break;
2003
2004		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2005		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2006		mcnt++;
2007	}
2008#if __FreeBSD_version < 800000
2009	IF_ADDR_UNLOCK(ifp);
2010#else
2011	if_maddr_runlock(ifp);
2012#endif
2013	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2014		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2015		reg_rctl |= E1000_RCTL_MPE;
2016		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2017	} else
2018		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2019
2020	if (adapter->hw.mac.type == e1000_82542 &&
2021	    adapter->hw.revision_id == E1000_REVISION_2) {
2022		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2023		reg_rctl &= ~E1000_RCTL_RST;
2024		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2025		msec_delay(5);
2026		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2027			e1000_pci_set_mwi(&adapter->hw);
2028	}
2029	free(mta, M_DEVBUF);
2030}
2031
2032
2033/*********************************************************************
2034 *  Timer routine
2035 *
2036 *  This routine checks for link status and updates statistics.
2037 *
2038 **********************************************************************/
2039
2040static void
2041em_local_timer(void *arg)
2042{
2043	struct adapter	*adapter = arg;
2044	struct ifnet	*ifp = adapter->ifp;
2045	struct tx_ring	*txr = adapter->tx_rings;
2046
2047	EM_CORE_LOCK_ASSERT(adapter);
2048
2049	em_update_link_status(adapter);
2050	em_update_stats_counters(adapter);
2051
2052	/* Reset LAA into RAR[0] on 82571 */
2053	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2054		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2055
2056	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2057		em_print_hw_stats(adapter);
2058
2059	/*
2060	** Check for time since any descriptor was cleaned
2061	*/
2062	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2063		EM_TX_LOCK(txr);
2064		if (txr->watchdog_check == FALSE) {
2065			EM_TX_UNLOCK(txr);
2066			continue;
2067		}
2068		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2069			goto hung;
2070		EM_TX_UNLOCK(txr);
2071	}
2072
2073	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2074	return;
2075hung:
2076	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2077	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2078	adapter->watchdog_events++;
2079	EM_TX_UNLOCK(txr);
2080	em_init_locked(adapter);
2081}
2082
2083
2084static void
2085em_update_link_status(struct adapter *adapter)
2086{
2087	struct e1000_hw *hw = &adapter->hw;
2088	struct ifnet *ifp = adapter->ifp;
2089	device_t dev = adapter->dev;
2090	u32 link_check = 0;
2091
2092	/* Get the cached link value or read phy for real */
2093	switch (hw->phy.media_type) {
2094	case e1000_media_type_copper:
2095		if (hw->mac.get_link_status) {
2096			/* Do the work to read phy */
2097			e1000_check_for_link(hw);
2098			link_check = !hw->mac.get_link_status;
2099			if (link_check) /* ESB2 fix */
2100				e1000_cfg_on_link_up(hw);
2101		} else
2102			link_check = TRUE;
2103		break;
2104	case e1000_media_type_fiber:
2105		e1000_check_for_link(hw);
2106		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2107                                 E1000_STATUS_LU);
2108		break;
2109	case e1000_media_type_internal_serdes:
2110		e1000_check_for_link(hw);
2111		link_check = adapter->hw.mac.serdes_has_link;
2112		break;
2113	default:
2114	case e1000_media_type_unknown:
2115		break;
2116	}
2117
2118	/* Now check for a transition */
2119	if (link_check && (adapter->link_active == 0)) {
2120		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2121		    &adapter->link_duplex);
2122		/* Check if we must disable SPEED_MODE bit on PCI-E */
2123		if ((adapter->link_speed != SPEED_1000) &&
2124		    ((hw->mac.type == e1000_82571) ||
2125		    (hw->mac.type == e1000_82572))) {
2126			int tarc0;
2127			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2128			tarc0 &= ~SPEED_MODE_BIT;
2129			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2130		}
2131		if (bootverbose)
2132			device_printf(dev, "Link is up %d Mbps %s\n",
2133			    adapter->link_speed,
2134			    ((adapter->link_duplex == FULL_DUPLEX) ?
2135			    "Full Duplex" : "Half Duplex"));
2136		adapter->link_active = 1;
2137		adapter->smartspeed = 0;
2138		ifp->if_baudrate = adapter->link_speed * 1000000;
2139		if_link_state_change(ifp, LINK_STATE_UP);
2140	} else if (!link_check && (adapter->link_active == 1)) {
2141		ifp->if_baudrate = adapter->link_speed = 0;
2142		adapter->link_duplex = 0;
2143		if (bootverbose)
2144			device_printf(dev, "Link is Down\n");
2145		adapter->link_active = 0;
2146		/* Link down, disable watchdog */
2147		// JFV change later
2148		//adapter->watchdog_check = FALSE;
2149		if_link_state_change(ifp, LINK_STATE_DOWN);
2150	}
2151}
2152
2153/*********************************************************************
2154 *
2155 *  This routine disables all traffic on the adapter by issuing a
2156 *  global reset on the MAC and deallocates TX/RX buffers.
2157 *
2158 *  This routine should always be called with BOTH the CORE
2159 *  and TX locks.
2160 **********************************************************************/
2161
2162static void
2163em_stop(void *arg)
2164{
2165	struct adapter	*adapter = arg;
2166	struct ifnet	*ifp = adapter->ifp;
2167	struct tx_ring	*txr = adapter->tx_rings;
2168
2169	EM_CORE_LOCK_ASSERT(adapter);
2170
2171	INIT_DEBUGOUT("em_stop: begin");
2172
2173	em_disable_intr(adapter);
2174	callout_stop(&adapter->timer);
2175
2176	/* Tell the stack that the interface is no longer active */
2177	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2178
2179        /* Unarm watchdog timer. */
2180	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2181		EM_TX_LOCK(txr);
2182		txr->watchdog_check = FALSE;
2183		EM_TX_UNLOCK(txr);
2184	}
2185
2186	e1000_reset_hw(&adapter->hw);
2187	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2188
2189	e1000_led_off(&adapter->hw);
2190	e1000_cleanup_led(&adapter->hw);
2191}
2192
2193
2194/*********************************************************************
2195 *
2196 *  Determine hardware revision.
2197 *
2198 **********************************************************************/
2199static void
2200em_identify_hardware(struct adapter *adapter)
2201{
2202	device_t dev = adapter->dev;
2203
2204	/* Make sure our PCI config space has the necessary stuff set */
2205	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2206	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2207	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2208		device_printf(dev, "Memory Access and/or Bus Master bits "
2209		    "were not set!\n");
2210		adapter->hw.bus.pci_cmd_word |=
2211		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2212		pci_write_config(dev, PCIR_COMMAND,
2213		    adapter->hw.bus.pci_cmd_word, 2);
2214	}
2215
2216	/* Save off the information about this board */
2217	adapter->hw.vendor_id = pci_get_vendor(dev);
2218	adapter->hw.device_id = pci_get_device(dev);
2219	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2220	adapter->hw.subsystem_vendor_id =
2221	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2222	adapter->hw.subsystem_device_id =
2223	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2224
2225	/* Do Shared Code Init and Setup */
2226	if (e1000_set_mac_type(&adapter->hw)) {
2227		device_printf(dev, "Setup init failure\n");
2228		return;
2229	}
2230}
2231
2232static int
2233em_allocate_pci_resources(struct adapter *adapter)
2234{
2235	device_t	dev = adapter->dev;
2236	int		rid;
2237
2238	rid = PCIR_BAR(0);
2239	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2240	    &rid, RF_ACTIVE);
2241	if (adapter->memory == NULL) {
2242		device_printf(dev, "Unable to allocate bus resource: memory\n");
2243		return (ENXIO);
2244	}
2245	adapter->osdep.mem_bus_space_tag =
2246	    rman_get_bustag(adapter->memory);
2247	adapter->osdep.mem_bus_space_handle =
2248	    rman_get_bushandle(adapter->memory);
2249	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2250
2251	/* Default to a single queue */
2252	adapter->num_queues = 1;
2253
2254	/*
2255	 * Setup MSI/X or MSI if PCI Express
2256	 */
2257	adapter->msix = em_setup_msix(adapter);
2258
2259	adapter->hw.back = &adapter->osdep;
2260
2261	return (0);
2262}
2263
2264/*********************************************************************
2265 *
2266 *  Setup the Legacy or MSI Interrupt handler
2267 *
2268 **********************************************************************/
2269int
2270em_allocate_legacy(struct adapter *adapter)
2271{
2272	device_t dev = adapter->dev;
2273	int error, rid = 0;
2274
2275	/* Manually turn off all interrupts */
2276	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2277
2278	if (adapter->msix == 1) /* using MSI */
2279		rid = 1;
2280	/* We allocate a single interrupt resource */
2281	adapter->res = bus_alloc_resource_any(dev,
2282	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2283	if (adapter->res == NULL) {
2284		device_printf(dev, "Unable to allocate bus resource: "
2285		    "interrupt\n");
2286		return (ENXIO);
2287	}
2288
2289	/*
2290	 * Allocate a fast interrupt and the associated
2291	 * deferred processing contexts.
2292	 */
2293	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2294	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2295	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2296	    taskqueue_thread_enqueue, &adapter->tq);
2297	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2298	    device_get_nameunit(adapter->dev));
2299	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2300	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2301		device_printf(dev, "Failed to register fast interrupt "
2302			    "handler: %d\n", error);
2303		taskqueue_free(adapter->tq);
2304		adapter->tq = NULL;
2305		return (error);
2306	}
2307
2308	return (0);
2309}
2310
2311/*********************************************************************
2312 *
2313 *  Setup the MSIX Interrupt handlers
2314 *   This is not really Multiqueue, rather
2315 *   its just multiple interrupt vectors.
2316 *
2317 **********************************************************************/
2318int
2319em_allocate_msix(struct adapter *adapter)
2320{
2321	device_t	dev = adapter->dev;
2322	struct		tx_ring *txr = adapter->tx_rings;
2323	struct		rx_ring *rxr = adapter->rx_rings;
2324	int		error, rid, vector = 0;
2325
2326
2327	/* Make sure all interrupts are disabled */
2328	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2329
2330	/* First set up ring resources */
2331	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2332
2333		/* RX ring */
2334		rid = vector + 1;
2335
2336		rxr->res = bus_alloc_resource_any(dev,
2337		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2338		if (rxr->res == NULL) {
2339			device_printf(dev,
2340			    "Unable to allocate bus resource: "
2341			    "RX MSIX Interrupt %d\n", i);
2342			return (ENXIO);
2343		}
2344		if ((error = bus_setup_intr(dev, rxr->res,
2345		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2346		    rxr, &rxr->tag)) != 0) {
2347			device_printf(dev, "Failed to register RX handler");
2348			return (error);
2349		}
2350		rxr->msix = vector++; /* NOTE increment vector for TX */
2351		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2352		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2353		    taskqueue_thread_enqueue, &rxr->tq);
2354		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2355		    device_get_nameunit(adapter->dev));
2356		/*
2357		** Set the bit to enable interrupt
2358		** in E1000_IMS -- bits 20 and 21
2359		** are for RX0 and RX1, note this has
2360		** NOTHING to do with the MSIX vector
2361		*/
2362		rxr->ims = 1 << (20 + i);
2363		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2364
2365		/* TX ring */
2366		rid = vector + 1;
2367		txr->res = bus_alloc_resource_any(dev,
2368		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2369		if (txr->res == NULL) {
2370			device_printf(dev,
2371			    "Unable to allocate bus resource: "
2372			    "TX MSIX Interrupt %d\n", i);
2373			return (ENXIO);
2374		}
2375		if ((error = bus_setup_intr(dev, txr->res,
2376		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2377		    txr, &txr->tag)) != 0) {
2378			device_printf(dev, "Failed to register TX handler");
2379			return (error);
2380		}
2381		txr->msix = vector++; /* Increment vector for next pass */
2382		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2383		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2384		    taskqueue_thread_enqueue, &txr->tq);
2385		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2386		    device_get_nameunit(adapter->dev));
2387		/*
2388		** Set the bit to enable interrupt
2389		** in E1000_IMS -- bits 22 and 23
2390		** are for TX0 and TX1, note this has
2391		** NOTHING to do with the MSIX vector
2392		*/
2393		txr->ims = 1 << (22 + i);
2394		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2395	}
2396
2397	/* Link interrupt */
2398	++rid;
2399	adapter->res = bus_alloc_resource_any(dev,
2400	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2401	if (!adapter->res) {
2402		device_printf(dev,"Unable to allocate "
2403		    "bus resource: Link interrupt [%d]\n", rid);
2404		return (ENXIO);
2405        }
2406	/* Set the link handler function */
2407	error = bus_setup_intr(dev, adapter->res,
2408	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2409	    em_msix_link, adapter, &adapter->tag);
2410	if (error) {
2411		adapter->res = NULL;
2412		device_printf(dev, "Failed to register LINK handler");
2413		return (error);
2414	}
2415	adapter->linkvec = vector;
2416	adapter->ivars |=  (8 | vector) << 16;
2417	adapter->ivars |= 0x80000000;
2418	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2419	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2420	    taskqueue_thread_enqueue, &adapter->tq);
2421	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422	    device_get_nameunit(adapter->dev));
2423
2424	return (0);
2425}
2426
2427
2428static void
2429em_free_pci_resources(struct adapter *adapter)
2430{
2431	device_t	dev = adapter->dev;
2432	struct tx_ring	*txr;
2433	struct rx_ring	*rxr;
2434	int		rid;
2435
2436
2437	/*
2438	** Release all the queue interrupt resources:
2439	*/
2440	for (int i = 0; i < adapter->num_queues; i++) {
2441		txr = &adapter->tx_rings[i];
2442		rxr = &adapter->rx_rings[i];
2443		rid = txr->msix +1;
2444		if (txr->tag != NULL) {
2445			bus_teardown_intr(dev, txr->res, txr->tag);
2446			txr->tag = NULL;
2447		}
2448		if (txr->res != NULL)
2449			bus_release_resource(dev, SYS_RES_IRQ,
2450			    rid, txr->res);
2451		rid = rxr->msix +1;
2452		if (rxr->tag != NULL) {
2453			bus_teardown_intr(dev, rxr->res, rxr->tag);
2454			rxr->tag = NULL;
2455		}
2456		if (rxr->res != NULL)
2457			bus_release_resource(dev, SYS_RES_IRQ,
2458			    rid, rxr->res);
2459	}
2460
2461        if (adapter->linkvec) /* we are doing MSIX */
2462                rid = adapter->linkvec + 1;
2463        else
2464                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2465
2466	if (adapter->tag != NULL) {
2467		bus_teardown_intr(dev, adapter->res, adapter->tag);
2468		adapter->tag = NULL;
2469	}
2470
2471	if (adapter->res != NULL)
2472		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2473
2474
2475	if (adapter->msix)
2476		pci_release_msi(dev);
2477
2478	if (adapter->msix_mem != NULL)
2479		bus_release_resource(dev, SYS_RES_MEMORY,
2480		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2481
2482	if (adapter->memory != NULL)
2483		bus_release_resource(dev, SYS_RES_MEMORY,
2484		    PCIR_BAR(0), adapter->memory);
2485
2486	if (adapter->flash != NULL)
2487		bus_release_resource(dev, SYS_RES_MEMORY,
2488		    EM_FLASH, adapter->flash);
2489}
2490
2491/*
2492 * Setup MSI or MSI/X
2493 */
2494static int
2495em_setup_msix(struct adapter *adapter)
2496{
2497	device_t dev = adapter->dev;
2498	int val = 0;
2499
2500
2501	/* Setup MSI/X for Hartwell */
2502	if ((adapter->hw.mac.type == e1000_82574) &&
2503	    (em_enable_msix == TRUE)) {
2504		/* Map the MSIX BAR */
2505		int rid = PCIR_BAR(EM_MSIX_BAR);
2506		adapter->msix_mem = bus_alloc_resource_any(dev,
2507		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2508       		if (!adapter->msix_mem) {
2509			/* May not be enabled */
2510               		device_printf(adapter->dev,
2511			    "Unable to map MSIX table \n");
2512			goto msi;
2513       		}
2514		val = pci_msix_count(dev);
2515		if (val != 5) {
2516			bus_release_resource(dev, SYS_RES_MEMORY,
2517			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2518			adapter->msix_mem = NULL;
2519               		device_printf(adapter->dev,
2520			    "MSIX vectors wrong, using MSI \n");
2521			goto msi;
2522		}
2523		if (em_msix_queues == 2) {
2524			val = 5;
2525			adapter->num_queues = 2;
2526		} else {
2527			val = 3;
2528			adapter->num_queues = 1;
2529		}
2530		if (pci_alloc_msix(dev, &val) == 0) {
2531			device_printf(adapter->dev,
2532			    "Using MSIX interrupts "
2533			    "with %d vectors\n", val);
2534		}
2535
2536		return (val);
2537	}
2538msi:
2539       	val = pci_msi_count(dev);
2540       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2541               	adapter->msix = 1;
2542               	device_printf(adapter->dev,"Using MSI interrupt\n");
2543		return (val);
2544	}
2545	/* Should only happen due to manual invention */
2546	device_printf(adapter->dev,"Setup MSIX failure\n");
2547	return (0);
2548}
2549
2550
2551/*********************************************************************
2552 *
2553 *  Initialize the hardware to a configuration
2554 *  as specified by the adapter structure.
2555 *
2556 **********************************************************************/
2557static void
2558em_reset(struct adapter *adapter)
2559{
2560	device_t	dev = adapter->dev;
2561	struct e1000_hw	*hw = &adapter->hw;
2562	u16		rx_buffer_size;
2563
2564	INIT_DEBUGOUT("em_reset: begin");
2565
2566	/* Set up smart power down as default off on newer adapters. */
2567	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2568	    hw->mac.type == e1000_82572)) {
2569		u16 phy_tmp = 0;
2570
2571		/* Speed up time to link by disabling smart power down. */
2572		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2573		phy_tmp &= ~IGP02E1000_PM_SPD;
2574		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2575	}
2576
2577	/*
2578	 * These parameters control the automatic generation (Tx) and
2579	 * response (Rx) to Ethernet PAUSE frames.
2580	 * - High water mark should allow for at least two frames to be
2581	 *   received after sending an XOFF.
2582	 * - Low water mark works best when it is very near the high water mark.
2583	 *   This allows the receiver to restart by sending XON when it has
2584	 *   drained a bit. Here we use an arbitary value of 1500 which will
2585	 *   restart after one full frame is pulled from the buffer. There
2586	 *   could be several smaller frames in the buffer and if so they will
2587	 *   not trigger the XON until their total number reduces the buffer
2588	 *   by 1500.
2589	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2590	 */
2591	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2592
2593	hw->fc.high_water = rx_buffer_size -
2594	    roundup2(adapter->max_frame_size, 1024);
2595	hw->fc.low_water = hw->fc.high_water - 1500;
2596
2597	if (hw->mac.type == e1000_80003es2lan)
2598		hw->fc.pause_time = 0xFFFF;
2599	else
2600		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2601
2602	hw->fc.send_xon = TRUE;
2603
2604        /* Set Flow control, use the tunable location if sane */
2605        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2606		hw->fc.requested_mode = em_fc_setting;
2607	else
2608		hw->fc.requested_mode = e1000_fc_none;
2609
2610	/* Override - workaround for PCHLAN issue */
2611	if (hw->mac.type == e1000_pchlan)
2612                hw->fc.requested_mode = e1000_fc_rx_pause;
2613
2614	/* Issue a global reset */
2615	e1000_reset_hw(hw);
2616	E1000_WRITE_REG(hw, E1000_WUC, 0);
2617
2618	if (e1000_init_hw(hw) < 0) {
2619		device_printf(dev, "Hardware Initialization Failed\n");
2620		return;
2621	}
2622
2623	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2624	e1000_get_phy_info(hw);
2625	e1000_check_for_link(hw);
2626	return;
2627}
2628
2629/*********************************************************************
2630 *
2631 *  Setup networking device structure and register an interface.
2632 *
2633 **********************************************************************/
2634static void
2635em_setup_interface(device_t dev, struct adapter *adapter)
2636{
2637	struct ifnet   *ifp;
2638
2639	INIT_DEBUGOUT("em_setup_interface: begin");
2640
2641	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2642	if (ifp == NULL)
2643		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2644	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2645	ifp->if_mtu = ETHERMTU;
2646	ifp->if_init =  em_init;
2647	ifp->if_softc = adapter;
2648	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2649	ifp->if_ioctl = em_ioctl;
2650	ifp->if_start = em_start;
2651	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2652	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2653	IFQ_SET_READY(&ifp->if_snd);
2654
2655	ether_ifattach(ifp, adapter->hw.mac.addr);
2656
2657	ifp->if_capabilities = ifp->if_capenable = 0;
2658
2659#if __FreeBSD_version >= 800000
2660	/* Multiqueue tx functions */
2661	ifp->if_transmit = em_mq_start;
2662	ifp->if_qflush = em_qflush;
2663#endif
2664
2665	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2666	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2667
2668	/* Enable TSO by default, can disable with ifconfig */
2669	ifp->if_capabilities |= IFCAP_TSO4;
2670	ifp->if_capenable |= IFCAP_TSO4;
2671
2672	/*
2673	 * Tell the upper layer(s) we
2674	 * support full VLAN capability
2675	 */
2676	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2677	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2678	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2679
2680	/*
2681	** Dont turn this on by default, if vlans are
2682	** created on another pseudo device (eg. lagg)
2683	** then vlan events are not passed thru, breaking
2684	** operation, but with HW FILTER off it works. If
2685	** using vlans directly on the em driver you can
2686	** enable this and get full hardware tag filtering.
2687	*/
2688	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2689
2690#ifdef DEVICE_POLLING
2691	ifp->if_capabilities |= IFCAP_POLLING;
2692#endif
2693
2694	/* Enable All WOL methods by default */
2695	if (adapter->wol) {
2696		ifp->if_capabilities |= IFCAP_WOL;
2697		ifp->if_capenable |= IFCAP_WOL;
2698	}
2699
2700	/*
2701	 * Specify the media types supported by this adapter and register
2702	 * callbacks to update media and link information
2703	 */
2704	ifmedia_init(&adapter->media, IFM_IMASK,
2705	    em_media_change, em_media_status);
2706	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2707	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2708		u_char fiber_type = IFM_1000_SX;	/* default type */
2709
2710		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2711			    0, NULL);
2712		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2713	} else {
2714		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2715		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2716			    0, NULL);
2717		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2718			    0, NULL);
2719		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2720			    0, NULL);
2721		if (adapter->hw.phy.type != e1000_phy_ife) {
2722			ifmedia_add(&adapter->media,
2723				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2724			ifmedia_add(&adapter->media,
2725				IFM_ETHER | IFM_1000_T, 0, NULL);
2726		}
2727	}
2728	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2729	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2730}
2731
2732
2733/*
2734 * Manage DMA'able memory.
2735 */
2736static void
2737em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2738{
2739	if (error)
2740		return;
2741	*(bus_addr_t *) arg = segs[0].ds_addr;
2742}
2743
2744static int
2745em_dma_malloc(struct adapter *adapter, bus_size_t size,
2746        struct em_dma_alloc *dma, int mapflags)
2747{
2748	int error;
2749
2750	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2751				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2752				BUS_SPACE_MAXADDR,	/* lowaddr */
2753				BUS_SPACE_MAXADDR,	/* highaddr */
2754				NULL, NULL,		/* filter, filterarg */
2755				size,			/* maxsize */
2756				1,			/* nsegments */
2757				size,			/* maxsegsize */
2758				0,			/* flags */
2759				NULL,			/* lockfunc */
2760				NULL,			/* lockarg */
2761				&dma->dma_tag);
2762	if (error) {
2763		device_printf(adapter->dev,
2764		    "%s: bus_dma_tag_create failed: %d\n",
2765		    __func__, error);
2766		goto fail_0;
2767	}
2768
2769	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2770	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2771	if (error) {
2772		device_printf(adapter->dev,
2773		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2774		    __func__, (uintmax_t)size, error);
2775		goto fail_2;
2776	}
2777
2778	dma->dma_paddr = 0;
2779	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2780	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2781	if (error || dma->dma_paddr == 0) {
2782		device_printf(adapter->dev,
2783		    "%s: bus_dmamap_load failed: %d\n",
2784		    __func__, error);
2785		goto fail_3;
2786	}
2787
2788	return (0);
2789
2790fail_3:
2791	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2792fail_2:
2793	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2794	bus_dma_tag_destroy(dma->dma_tag);
2795fail_0:
2796	dma->dma_map = NULL;
2797	dma->dma_tag = NULL;
2798
2799	return (error);
2800}
2801
2802static void
2803em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2804{
2805	if (dma->dma_tag == NULL)
2806		return;
2807	if (dma->dma_map != NULL) {
2808		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2809		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2810		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2811		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2812		dma->dma_map = NULL;
2813	}
2814	bus_dma_tag_destroy(dma->dma_tag);
2815	dma->dma_tag = NULL;
2816}
2817
2818
2819/*********************************************************************
2820 *
2821 *  Allocate memory for the transmit and receive rings, and then
2822 *  the descriptors associated with each, called only once at attach.
2823 *
2824 **********************************************************************/
2825static int
2826em_allocate_queues(struct adapter *adapter)
2827{
2828	device_t		dev = adapter->dev;
2829	struct tx_ring		*txr = NULL;
2830	struct rx_ring		*rxr = NULL;
2831	int rsize, tsize, error = E1000_SUCCESS;
2832	int txconf = 0, rxconf = 0;
2833
2834
2835	/* Allocate the TX ring struct memory */
2836	if (!(adapter->tx_rings =
2837	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2838	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2839		device_printf(dev, "Unable to allocate TX ring memory\n");
2840		error = ENOMEM;
2841		goto fail;
2842	}
2843
2844	/* Now allocate the RX */
2845	if (!(adapter->rx_rings =
2846	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2847	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2848		device_printf(dev, "Unable to allocate RX ring memory\n");
2849		error = ENOMEM;
2850		goto rx_fail;
2851	}
2852
2853	tsize = roundup2(adapter->num_tx_desc *
2854	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2855	/*
2856	 * Now set up the TX queues, txconf is needed to handle the
2857	 * possibility that things fail midcourse and we need to
2858	 * undo memory gracefully
2859	 */
2860	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2861		/* Set up some basics */
2862		txr = &adapter->tx_rings[i];
2863		txr->adapter = adapter;
2864		txr->me = i;
2865
2866		/* Initialize the TX lock */
2867		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2868		    device_get_nameunit(dev), txr->me);
2869		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2870
2871		if (em_dma_malloc(adapter, tsize,
2872			&txr->txdma, BUS_DMA_NOWAIT)) {
2873			device_printf(dev,
2874			    "Unable to allocate TX Descriptor memory\n");
2875			error = ENOMEM;
2876			goto err_tx_desc;
2877		}
2878		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2879		bzero((void *)txr->tx_base, tsize);
2880
2881        	if (em_allocate_transmit_buffers(txr)) {
2882			device_printf(dev,
2883			    "Critical Failure setting up transmit buffers\n");
2884			error = ENOMEM;
2885			goto err_tx_desc;
2886        	}
2887#if __FreeBSD_version >= 800000
2888		/* Allocate a buf ring */
2889		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2890		    M_WAITOK, &txr->tx_mtx);
2891#endif
2892	}
2893
2894	/*
2895	 * Next the RX queues...
2896	 */
2897	rsize = roundup2(adapter->num_rx_desc *
2898	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2899	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2900		rxr = &adapter->rx_rings[i];
2901		rxr->adapter = adapter;
2902		rxr->me = i;
2903
2904		/* Initialize the RX lock */
2905		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2906		    device_get_nameunit(dev), txr->me);
2907		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2908
2909		if (em_dma_malloc(adapter, rsize,
2910			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2911			device_printf(dev,
2912			    "Unable to allocate RxDescriptor memory\n");
2913			error = ENOMEM;
2914			goto err_rx_desc;
2915		}
2916		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2917		bzero((void *)rxr->rx_base, rsize);
2918
2919        	/* Allocate receive buffers for the ring*/
2920		if (em_allocate_receive_buffers(rxr)) {
2921			device_printf(dev,
2922			    "Critical Failure setting up receive buffers\n");
2923			error = ENOMEM;
2924			goto err_rx_desc;
2925		}
2926	}
2927
2928	return (0);
2929
2930err_rx_desc:
2931	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2932		em_dma_free(adapter, &rxr->rxdma);
2933err_tx_desc:
2934	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2935		em_dma_free(adapter, &txr->txdma);
2936	free(adapter->rx_rings, M_DEVBUF);
2937rx_fail:
2938	buf_ring_free(txr->br, M_DEVBUF);
2939	free(adapter->tx_rings, M_DEVBUF);
2940fail:
2941	return (error);
2942}
2943
2944
2945/*********************************************************************
2946 *
2947 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2948 *  the information needed to transmit a packet on the wire. This is
2949 *  called only once at attach, setup is done every reset.
2950 *
2951 **********************************************************************/
2952static int
2953em_allocate_transmit_buffers(struct tx_ring *txr)
2954{
2955	struct adapter *adapter = txr->adapter;
2956	device_t dev = adapter->dev;
2957	struct em_buffer *txbuf;
2958	int error, i;
2959
2960	/*
2961	 * Setup DMA descriptor areas.
2962	 */
2963	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2964			       1, 0,			/* alignment, bounds */
2965			       BUS_SPACE_MAXADDR,	/* lowaddr */
2966			       BUS_SPACE_MAXADDR,	/* highaddr */
2967			       NULL, NULL,		/* filter, filterarg */
2968			       EM_TSO_SIZE,		/* maxsize */
2969			       EM_MAX_SCATTER,		/* nsegments */
2970			       PAGE_SIZE,		/* maxsegsize */
2971			       0,			/* flags */
2972			       NULL,			/* lockfunc */
2973			       NULL,			/* lockfuncarg */
2974			       &txr->txtag))) {
2975		device_printf(dev,"Unable to allocate TX DMA tag\n");
2976		goto fail;
2977	}
2978
2979	if (!(txr->tx_buffers =
2980	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2981	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2982		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2983		error = ENOMEM;
2984		goto fail;
2985	}
2986
2987        /* Create the descriptor buffer dma maps */
2988	txbuf = txr->tx_buffers;
2989	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2990		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2991		if (error != 0) {
2992			device_printf(dev, "Unable to create TX DMA map\n");
2993			goto fail;
2994		}
2995	}
2996
2997	return 0;
2998fail:
2999	/* We free all, it handles case where we are in the middle */
3000	em_free_transmit_structures(adapter);
3001	return (error);
3002}
3003
3004/*********************************************************************
3005 *
3006 *  Initialize a transmit ring.
3007 *
3008 **********************************************************************/
3009static void
3010em_setup_transmit_ring(struct tx_ring *txr)
3011{
3012	struct adapter *adapter = txr->adapter;
3013	struct em_buffer *txbuf;
3014	int i;
3015
3016	/* Clear the old descriptor contents */
3017	EM_TX_LOCK(txr);
3018	bzero((void *)txr->tx_base,
3019	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3020	/* Reset indices */
3021	txr->next_avail_desc = 0;
3022	txr->next_to_clean = 0;
3023
3024	/* Free any existing tx buffers. */
3025        txbuf = txr->tx_buffers;
3026	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3027		if (txbuf->m_head != NULL) {
3028			bus_dmamap_sync(txr->txtag, txbuf->map,
3029			    BUS_DMASYNC_POSTWRITE);
3030			bus_dmamap_unload(txr->txtag, txbuf->map);
3031			m_freem(txbuf->m_head);
3032			txbuf->m_head = NULL;
3033		}
3034		/* clear the watch index */
3035		txbuf->next_eop = -1;
3036        }
3037
3038	/* Set number of descriptors available */
3039	txr->tx_avail = adapter->num_tx_desc;
3040
3041	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3042	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3043	EM_TX_UNLOCK(txr);
3044}
3045
3046/*********************************************************************
3047 *
3048 *  Initialize all transmit rings.
3049 *
3050 **********************************************************************/
3051static void
3052em_setup_transmit_structures(struct adapter *adapter)
3053{
3054	struct tx_ring *txr = adapter->tx_rings;
3055
3056	for (int i = 0; i < adapter->num_queues; i++, txr++)
3057		em_setup_transmit_ring(txr);
3058
3059	return;
3060}
3061
3062/*********************************************************************
3063 *
3064 *  Enable transmit unit.
3065 *
3066 **********************************************************************/
3067static void
3068em_initialize_transmit_unit(struct adapter *adapter)
3069{
3070	struct tx_ring	*txr = adapter->tx_rings;
3071	struct e1000_hw	*hw = &adapter->hw;
3072	u32	tctl, tarc, tipg = 0;
3073
3074	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3075
3076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3077		u64 bus_addr = txr->txdma.dma_paddr;
3078		/* Base and Len of TX Ring */
3079		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3080	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3081		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3082	    	    (u32)(bus_addr >> 32));
3083		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3084	    	    (u32)bus_addr);
3085		/* Init the HEAD/TAIL indices */
3086		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3087		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3088
3089		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3090		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3091		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3092
3093		txr->watchdog_check = FALSE;
3094	}
3095
3096	/* Set the default values for the Tx Inter Packet Gap timer */
3097	switch (adapter->hw.mac.type) {
3098	case e1000_82542:
3099		tipg = DEFAULT_82542_TIPG_IPGT;
3100		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3101		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3102		break;
3103	case e1000_80003es2lan:
3104		tipg = DEFAULT_82543_TIPG_IPGR1;
3105		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3106		    E1000_TIPG_IPGR2_SHIFT;
3107		break;
3108	default:
3109		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3110		    (adapter->hw.phy.media_type ==
3111		    e1000_media_type_internal_serdes))
3112			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3113		else
3114			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3115		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3116		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3117	}
3118
3119	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3120	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3121
3122	if(adapter->hw.mac.type >= e1000_82540)
3123		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3124		    adapter->tx_abs_int_delay.value);
3125
3126	if ((adapter->hw.mac.type == e1000_82571) ||
3127	    (adapter->hw.mac.type == e1000_82572)) {
3128		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3129		tarc |= SPEED_MODE_BIT;
3130		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3131	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3132		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3133		tarc |= 1;
3134		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3135		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3136		tarc |= 1;
3137		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3138	}
3139
3140	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3141	if (adapter->tx_int_delay.value > 0)
3142		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3143
3144	/* Program the Transmit Control Register */
3145	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3146	tctl &= ~E1000_TCTL_CT;
3147	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3148		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3149
3150	if (adapter->hw.mac.type >= e1000_82571)
3151		tctl |= E1000_TCTL_MULR;
3152
3153	/* This write will effectively turn on the transmit unit. */
3154	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3155
3156}
3157
3158
3159/*********************************************************************
3160 *
3161 *  Free all transmit rings.
3162 *
3163 **********************************************************************/
3164static void
3165em_free_transmit_structures(struct adapter *adapter)
3166{
3167	struct tx_ring *txr = adapter->tx_rings;
3168
3169	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3170		EM_TX_LOCK(txr);
3171		em_free_transmit_buffers(txr);
3172		em_dma_free(adapter, &txr->txdma);
3173		EM_TX_UNLOCK(txr);
3174		EM_TX_LOCK_DESTROY(txr);
3175	}
3176
3177	free(adapter->tx_rings, M_DEVBUF);
3178}
3179
3180/*********************************************************************
3181 *
3182 *  Free transmit ring related data structures.
3183 *
3184 **********************************************************************/
3185static void
3186em_free_transmit_buffers(struct tx_ring *txr)
3187{
3188	struct adapter		*adapter = txr->adapter;
3189	struct em_buffer	*txbuf;
3190
3191	INIT_DEBUGOUT("free_transmit_ring: begin");
3192
3193	if (txr->tx_buffers == NULL)
3194		return;
3195
3196	for (int i = 0; i < adapter->num_tx_desc; i++) {
3197		txbuf = &txr->tx_buffers[i];
3198		if (txbuf->m_head != NULL) {
3199			bus_dmamap_sync(txr->txtag, txbuf->map,
3200			    BUS_DMASYNC_POSTWRITE);
3201			bus_dmamap_unload(txr->txtag,
3202			    txbuf->map);
3203			m_freem(txbuf->m_head);
3204			txbuf->m_head = NULL;
3205			if (txbuf->map != NULL) {
3206				bus_dmamap_destroy(txr->txtag,
3207				    txbuf->map);
3208				txbuf->map = NULL;
3209			}
3210		} else if (txbuf->map != NULL) {
3211			bus_dmamap_unload(txr->txtag,
3212			    txbuf->map);
3213			bus_dmamap_destroy(txr->txtag,
3214			    txbuf->map);
3215			txbuf->map = NULL;
3216		}
3217	}
3218#if __FreeBSD_version >= 800000
3219	if (txr->br != NULL)
3220		buf_ring_free(txr->br, M_DEVBUF);
3221#endif
3222	if (txr->tx_buffers != NULL) {
3223		free(txr->tx_buffers, M_DEVBUF);
3224		txr->tx_buffers = NULL;
3225	}
3226	if (txr->txtag != NULL) {
3227		bus_dma_tag_destroy(txr->txtag);
3228		txr->txtag = NULL;
3229	}
3230	return;
3231}
3232
3233
3234/*********************************************************************
3235 *
3236 *  The offload context needs to be set when we transfer the first
3237 *  packet of a particular protocol (TCP/UDP). This routine has been
3238 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3239 *
3240 *  Added back the old method of keeping the current context type
3241 *  and not setting if unnecessary, as this is reported to be a
3242 *  big performance win.  -jfv
3243 **********************************************************************/
3244static void
3245em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3246    u32 *txd_upper, u32 *txd_lower)
3247{
3248	struct adapter			*adapter = txr->adapter;
3249	struct e1000_context_desc	*TXD = NULL;
3250	struct em_buffer *tx_buffer;
3251	struct ether_vlan_header *eh;
3252	struct ip *ip = NULL;
3253	struct ip6_hdr *ip6;
3254	int cur, ehdrlen;
3255	u32 cmd, hdr_len, ip_hlen;
3256	u16 etype;
3257	u8 ipproto;
3258
3259
3260	cmd = hdr_len = ipproto = 0;
3261	cur = txr->next_avail_desc;
3262
3263	/*
3264	 * Determine where frame payload starts.
3265	 * Jump over vlan headers if already present,
3266	 * helpful for QinQ too.
3267	 */
3268	eh = mtod(mp, struct ether_vlan_header *);
3269	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3270		etype = ntohs(eh->evl_proto);
3271		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3272	} else {
3273		etype = ntohs(eh->evl_encap_proto);
3274		ehdrlen = ETHER_HDR_LEN;
3275	}
3276
3277	/*
3278	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3279	 * TODO: Support SCTP too when it hits the tree.
3280	 */
3281	switch (etype) {
3282	case ETHERTYPE_IP:
3283		ip = (struct ip *)(mp->m_data + ehdrlen);
3284		ip_hlen = ip->ip_hl << 2;
3285
3286		/* Setup of IP header checksum. */
3287		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3288			/*
3289			 * Start offset for header checksum calculation.
3290			 * End offset for header checksum calculation.
3291			 * Offset of place to put the checksum.
3292			 */
3293			TXD = (struct e1000_context_desc *)
3294			    &txr->tx_base[cur];
3295			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3296			TXD->lower_setup.ip_fields.ipcse =
3297			    htole16(ehdrlen + ip_hlen);
3298			TXD->lower_setup.ip_fields.ipcso =
3299			    ehdrlen + offsetof(struct ip, ip_sum);
3300			cmd |= E1000_TXD_CMD_IP;
3301			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3302		}
3303
3304		if (mp->m_len < ehdrlen + ip_hlen)
3305			return;	/* failure */
3306
3307		hdr_len = ehdrlen + ip_hlen;
3308		ipproto = ip->ip_p;
3309
3310		break;
3311	case ETHERTYPE_IPV6:
3312		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3313		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3314
3315		if (mp->m_len < ehdrlen + ip_hlen)
3316			return;	/* failure */
3317
3318		/* IPv6 doesn't have a header checksum. */
3319
3320		hdr_len = ehdrlen + ip_hlen;
3321		ipproto = ip6->ip6_nxt;
3322
3323		break;
3324	default:
3325		*txd_upper = 0;
3326		*txd_lower = 0;
3327		return;
3328	}
3329
3330	switch (ipproto) {
3331	case IPPROTO_TCP:
3332		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3333			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3334			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3335			/* no need for context if already set */
3336			if (txr->last_hw_offload == CSUM_TCP)
3337				return;
3338			txr->last_hw_offload = CSUM_TCP;
3339			/*
3340			 * Start offset for payload checksum calculation.
3341			 * End offset for payload checksum calculation.
3342			 * Offset of place to put the checksum.
3343			 */
3344			TXD = (struct e1000_context_desc *)
3345			    &txr->tx_base[cur];
3346			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3347			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3348			TXD->upper_setup.tcp_fields.tucso =
3349			    hdr_len + offsetof(struct tcphdr, th_sum);
3350			cmd |= E1000_TXD_CMD_TCP;
3351		}
3352		break;
3353	case IPPROTO_UDP:
3354	{
3355		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3356			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3357			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3358			/* no need for context if already set */
3359			if (txr->last_hw_offload == CSUM_UDP)
3360				return;
3361			txr->last_hw_offload = CSUM_UDP;
3362			/*
3363			 * Start offset for header checksum calculation.
3364			 * End offset for header checksum calculation.
3365			 * Offset of place to put the checksum.
3366			 */
3367			TXD = (struct e1000_context_desc *)
3368			    &txr->tx_base[cur];
3369			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3370			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3371			TXD->upper_setup.tcp_fields.tucso =
3372			    hdr_len + offsetof(struct udphdr, uh_sum);
3373		}
3374		/* Fall Thru */
3375	}
3376	default:
3377		break;
3378	}
3379
3380	TXD->tcp_seg_setup.data = htole32(0);
3381	TXD->cmd_and_length =
3382	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3383	tx_buffer = &txr->tx_buffers[cur];
3384	tx_buffer->m_head = NULL;
3385	tx_buffer->next_eop = -1;
3386
3387	if (++cur == adapter->num_tx_desc)
3388		cur = 0;
3389
3390	txr->tx_avail--;
3391	txr->next_avail_desc = cur;
3392}
3393
3394
3395/**********************************************************************
3396 *
3397 *  Setup work for hardware segmentation offload (TSO)
3398 *
3399 **********************************************************************/
3400static bool
3401em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3402   u32 *txd_lower)
3403{
3404	struct adapter			*adapter = txr->adapter;
3405	struct e1000_context_desc	*TXD;
3406	struct em_buffer		*tx_buffer;
3407	struct ether_vlan_header	*eh;
3408	struct ip			*ip;
3409	struct ip6_hdr			*ip6;
3410	struct tcphdr			*th;
3411	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3412	u16 etype;
3413
3414	/*
3415	 * This function could/should be extended to support IP/IPv6
3416	 * fragmentation as well.  But as they say, one step at a time.
3417	 */
3418
3419	/*
3420	 * Determine where frame payload starts.
3421	 * Jump over vlan headers if already present,
3422	 * helpful for QinQ too.
3423	 */
3424	eh = mtod(mp, struct ether_vlan_header *);
3425	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3426		etype = ntohs(eh->evl_proto);
3427		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3428	} else {
3429		etype = ntohs(eh->evl_encap_proto);
3430		ehdrlen = ETHER_HDR_LEN;
3431	}
3432
3433	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3434	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3435		return FALSE;	/* -1 */
3436
3437	/*
3438	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3439	 * TODO: Support SCTP too when it hits the tree.
3440	 */
3441	switch (etype) {
3442	case ETHERTYPE_IP:
3443		isip6 = 0;
3444		ip = (struct ip *)(mp->m_data + ehdrlen);
3445		if (ip->ip_p != IPPROTO_TCP)
3446			return FALSE;	/* 0 */
3447		ip->ip_len = 0;
3448		ip->ip_sum = 0;
3449		ip_hlen = ip->ip_hl << 2;
3450		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3451			return FALSE;	/* -1 */
3452		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3453#if 1
3454		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3455		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3456#else
3457		th->th_sum = mp->m_pkthdr.csum_data;
3458#endif
3459		break;
3460	case ETHERTYPE_IPV6:
3461		isip6 = 1;
3462		return FALSE;			/* Not supported yet. */
3463		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3464		if (ip6->ip6_nxt != IPPROTO_TCP)
3465			return FALSE;	/* 0 */
3466		ip6->ip6_plen = 0;
3467		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3468		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3469			return FALSE;	/* -1 */
3470		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3471#if 0
3472		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3473		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3474#else
3475		th->th_sum = mp->m_pkthdr.csum_data;
3476#endif
3477		break;
3478	default:
3479		return FALSE;
3480	}
3481	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3482
3483	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3484		      E1000_TXD_DTYP_D |	/* Data descr type */
3485		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3486
3487	/* IP and/or TCP header checksum calculation and insertion. */
3488	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3489		      E1000_TXD_POPTS_TXSM) << 8;
3490
3491	cur = txr->next_avail_desc;
3492	tx_buffer = &txr->tx_buffers[cur];
3493	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3494
3495	/* IPv6 doesn't have a header checksum. */
3496	if (!isip6) {
3497		/*
3498		 * Start offset for header checksum calculation.
3499		 * End offset for header checksum calculation.
3500		 * Offset of place put the checksum.
3501		 */
3502		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3503		TXD->lower_setup.ip_fields.ipcse =
3504		    htole16(ehdrlen + ip_hlen - 1);
3505		TXD->lower_setup.ip_fields.ipcso =
3506		    ehdrlen + offsetof(struct ip, ip_sum);
3507	}
3508	/*
3509	 * Start offset for payload checksum calculation.
3510	 * End offset for payload checksum calculation.
3511	 * Offset of place to put the checksum.
3512	 */
3513	TXD->upper_setup.tcp_fields.tucss =
3514	    ehdrlen + ip_hlen;
3515	TXD->upper_setup.tcp_fields.tucse = 0;
3516	TXD->upper_setup.tcp_fields.tucso =
3517	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3518	/*
3519	 * Payload size per packet w/o any headers.
3520	 * Length of all headers up to payload.
3521	 */
3522	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3523	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3524
3525	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3526				E1000_TXD_CMD_DEXT |	/* Extended descr */
3527				E1000_TXD_CMD_TSE |	/* TSE context */
3528				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3529				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3530				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3531
3532	tx_buffer->m_head = NULL;
3533	tx_buffer->next_eop = -1;
3534
3535	if (++cur == adapter->num_tx_desc)
3536		cur = 0;
3537
3538	txr->tx_avail--;
3539	txr->next_avail_desc = cur;
3540	txr->tx_tso = TRUE;
3541
3542	return TRUE;
3543}
3544
3545
3546/**********************************************************************
3547 *
3548 *  Examine each tx_buffer in the used queue. If the hardware is done
3549 *  processing the packet then free associated resources. The
3550 *  tx_buffer is put back on the free queue.
3551 *
3552 **********************************************************************/
3553static bool
3554em_txeof(struct tx_ring *txr)
3555{
3556	struct adapter	*adapter = txr->adapter;
3557        int first, last, done, num_avail;
3558        struct em_buffer *tx_buffer;
3559        struct e1000_tx_desc   *tx_desc, *eop_desc;
3560	struct ifnet   *ifp = adapter->ifp;
3561
3562	EM_TX_LOCK_ASSERT(txr);
3563
3564        if (txr->tx_avail == adapter->num_tx_desc)
3565                return (FALSE);
3566
3567        num_avail = txr->tx_avail;
3568        first = txr->next_to_clean;
3569        tx_desc = &txr->tx_base[first];
3570        tx_buffer = &txr->tx_buffers[first];
3571	last = tx_buffer->next_eop;
3572        eop_desc = &txr->tx_base[last];
3573
3574	/*
3575	 * What this does is get the index of the
3576	 * first descriptor AFTER the EOP of the
3577	 * first packet, that way we can do the
3578	 * simple comparison on the inner while loop.
3579	 */
3580	if (++last == adapter->num_tx_desc)
3581 		last = 0;
3582	done = last;
3583
3584        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3585            BUS_DMASYNC_POSTREAD);
3586
3587        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3588		/* We clean the range of the packet */
3589		while (first != done) {
3590                	tx_desc->upper.data = 0;
3591                	tx_desc->lower.data = 0;
3592                	tx_desc->buffer_addr = 0;
3593                	++num_avail;
3594
3595			if (tx_buffer->m_head) {
3596				ifp->if_opackets++;
3597				bus_dmamap_sync(txr->txtag,
3598				    tx_buffer->map,
3599				    BUS_DMASYNC_POSTWRITE);
3600				bus_dmamap_unload(txr->txtag,
3601				    tx_buffer->map);
3602
3603                        	m_freem(tx_buffer->m_head);
3604                        	tx_buffer->m_head = NULL;
3605                	}
3606			tx_buffer->next_eop = -1;
3607			txr->watchdog_time = ticks;
3608
3609	                if (++first == adapter->num_tx_desc)
3610				first = 0;
3611
3612	                tx_buffer = &txr->tx_buffers[first];
3613			tx_desc = &txr->tx_base[first];
3614		}
3615		/* See if we can continue to the next packet */
3616		last = tx_buffer->next_eop;
3617		if (last != -1) {
3618        		eop_desc = &txr->tx_base[last];
3619			/* Get new done point */
3620			if (++last == adapter->num_tx_desc) last = 0;
3621			done = last;
3622		} else
3623			break;
3624        }
3625        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3626            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3627
3628        txr->next_to_clean = first;
3629
3630        /*
3631         * If we have enough room, clear IFF_DRV_OACTIVE to
3632         * tell the stack that it is OK to send packets.
3633         * If there are no pending descriptors, clear the watchdog.
3634         */
3635        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3636                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3637                if (num_avail == adapter->num_tx_desc) {
3638			txr->watchdog_check = FALSE;
3639        		txr->tx_avail = num_avail;
3640			return (FALSE);
3641		}
3642        }
3643
3644        txr->tx_avail = num_avail;
3645	return (TRUE);
3646}
3647
3648
3649/*********************************************************************
3650 *
3651 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3652 *
3653 **********************************************************************/
3654static void
3655em_refresh_mbufs(struct rx_ring *rxr, int limit)
3656{
3657	struct adapter		*adapter = rxr->adapter;
3658	struct mbuf		*m;
3659	bus_dma_segment_t	segs[1];
3660	bus_dmamap_t		map;
3661	struct em_buffer	*rxbuf;
3662	int			i, error, nsegs, cleaned;
3663
3664	i = rxr->next_to_refresh;
3665	cleaned = -1;
3666	while (i != limit) {
3667		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3668		if (m == NULL)
3669			goto update;
3670		m->m_len = m->m_pkthdr.len = MCLBYTES;
3671
3672		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3673			m_adj(m, ETHER_ALIGN);
3674
3675		/*
3676		 * Using memory from the mbuf cluster pool, invoke the
3677		 * bus_dma machinery to arrange the memory mapping.
3678		 */
3679		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3680		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3681		if (error != 0) {
3682			m_free(m);
3683			goto update;
3684		}
3685
3686		/* If nsegs is wrong then the stack is corrupt. */
3687		KASSERT(nsegs == 1, ("Too many segments returned!"));
3688
3689		rxbuf = &rxr->rx_buffers[i];
3690		if (rxbuf->m_head != NULL)
3691			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3692
3693		map = rxbuf->map;
3694		rxbuf->map = rxr->rx_sparemap;
3695		rxr->rx_sparemap = map;
3696		bus_dmamap_sync(rxr->rxtag,
3697		    rxbuf->map, BUS_DMASYNC_PREREAD);
3698		rxbuf->m_head = m;
3699		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3700
3701		cleaned = i;
3702		/* Calculate next index */
3703		if (++i == adapter->num_rx_desc)
3704			i = 0;
3705		/* This is the work marker for refresh */
3706		rxr->next_to_refresh = i;
3707	}
3708update:
3709	if (cleaned != -1) /* Update tail index */
3710		E1000_WRITE_REG(&adapter->hw,
3711		    E1000_RDT(rxr->me), cleaned);
3712
3713	return;
3714}
3715
3716
3717/*********************************************************************
3718 *
3719 *  Allocate memory for rx_buffer structures. Since we use one
3720 *  rx_buffer per received packet, the maximum number of rx_buffer's
3721 *  that we'll need is equal to the number of receive descriptors
3722 *  that we've allocated.
3723 *
3724 **********************************************************************/
3725static int
3726em_allocate_receive_buffers(struct rx_ring *rxr)
3727{
3728	struct adapter		*adapter = rxr->adapter;
3729	device_t		dev = adapter->dev;
3730	struct em_buffer	*rxbuf;
3731	int			error;
3732
3733	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3734	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3735	if (rxr->rx_buffers == NULL) {
3736		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3737		return (ENOMEM);
3738	}
3739
3740	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3741				1, 0,			/* alignment, bounds */
3742				BUS_SPACE_MAXADDR,	/* lowaddr */
3743				BUS_SPACE_MAXADDR,	/* highaddr */
3744				NULL, NULL,		/* filter, filterarg */
3745				MCLBYTES,		/* maxsize */
3746				1,			/* nsegments */
3747				MCLBYTES,		/* maxsegsize */
3748				0,			/* flags */
3749				NULL,			/* lockfunc */
3750				NULL,			/* lockarg */
3751				&rxr->rxtag);
3752	if (error) {
3753		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3754		    __func__, error);
3755		goto fail;
3756	}
3757
3758	/* Create the spare map (used by getbuf) */
3759	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3760	     &rxr->rx_sparemap);
3761	if (error) {
3762		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3763		    __func__, error);
3764		goto fail;
3765	}
3766
3767	rxbuf = rxr->rx_buffers;
3768	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3769		rxbuf = &rxr->rx_buffers[i];
3770		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3771		    &rxbuf->map);
3772		if (error) {
3773			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3774			    __func__, error);
3775			goto fail;
3776		}
3777	}
3778
3779	return (0);
3780
3781fail:
3782	em_free_receive_structures(adapter);
3783	return (error);
3784}
3785
3786
3787/*********************************************************************
3788 *
3789 *  Initialize a receive ring and its buffers.
3790 *
3791 **********************************************************************/
3792static int
3793em_setup_receive_ring(struct rx_ring *rxr)
3794{
3795	struct	adapter 	*adapter = rxr->adapter;
3796	struct em_buffer	*rxbuf;
3797	bus_dma_segment_t	seg[1];
3798	int			rsize, nsegs, error;
3799
3800
3801	/* Clear the ring contents */
3802	EM_RX_LOCK(rxr);
3803	rsize = roundup2(adapter->num_rx_desc *
3804	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3805	bzero((void *)rxr->rx_base, rsize);
3806
3807	/*
3808	** Free current RX buffer structs and their mbufs
3809	*/
3810	for (int i = 0; i < adapter->num_rx_desc; i++) {
3811		rxbuf = &rxr->rx_buffers[i];
3812		if (rxbuf->m_head != NULL) {
3813			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3814			    BUS_DMASYNC_POSTREAD);
3815			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3816			m_freem(rxbuf->m_head);
3817		}
3818	}
3819
3820	/* Now replenish the mbufs */
3821	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3822
3823		rxbuf = &rxr->rx_buffers[j];
3824		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3825		if (rxbuf->m_head == NULL)
3826			panic("RX ring hdr initialization failed!\n");
3827		rxbuf->m_head->m_len = MCLBYTES;
3828		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3829		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3830
3831		/* Get the memory mapping */
3832		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3833		    rxbuf->map, rxbuf->m_head, seg,
3834		    &nsegs, BUS_DMA_NOWAIT);
3835		if (error != 0)
3836			panic("RX ring dma initialization failed!\n");
3837		bus_dmamap_sync(rxr->rxtag,
3838		    rxbuf->map, BUS_DMASYNC_PREREAD);
3839
3840		/* Update descriptor */
3841		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3842	}
3843
3844
3845	/* Setup our descriptor indices */
3846	rxr->next_to_check = 0;
3847	rxr->next_to_refresh = 0;
3848
3849	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3850	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3851
3852	EM_RX_UNLOCK(rxr);
3853	return (0);
3854}
3855
3856/*********************************************************************
3857 *
3858 *  Initialize all receive rings.
3859 *
3860 **********************************************************************/
3861static int
3862em_setup_receive_structures(struct adapter *adapter)
3863{
3864	struct rx_ring *rxr = adapter->rx_rings;
3865	int j;
3866
3867	for (j = 0; j < adapter->num_queues; j++, rxr++)
3868		if (em_setup_receive_ring(rxr))
3869			goto fail;
3870
3871	return (0);
3872fail:
3873	/*
3874	 * Free RX buffers allocated so far, we will only handle
3875	 * the rings that completed, the failing case will have
3876	 * cleaned up for itself. 'j' failed, so its the terminus.
3877	 */
3878	for (int i = 0; i < j; ++i) {
3879		rxr = &adapter->rx_rings[i];
3880		for (int n = 0; n < adapter->num_rx_desc; n++) {
3881			struct em_buffer *rxbuf;
3882			rxbuf = &rxr->rx_buffers[n];
3883			if (rxbuf->m_head != NULL) {
3884				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3885			  	  BUS_DMASYNC_POSTREAD);
3886				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3887				m_freem(rxbuf->m_head);
3888				rxbuf->m_head = NULL;
3889			}
3890		}
3891	}
3892
3893	return (ENOBUFS);
3894}
3895
3896/*********************************************************************
3897 *
3898 *  Free all receive rings.
3899 *
3900 **********************************************************************/
3901static void
3902em_free_receive_structures(struct adapter *adapter)
3903{
3904	struct rx_ring *rxr = adapter->rx_rings;
3905
3906	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3907		em_free_receive_buffers(rxr);
3908		/* Free the ring memory as well */
3909		em_dma_free(adapter, &rxr->rxdma);
3910		EM_RX_LOCK_DESTROY(rxr);
3911	}
3912
3913	free(adapter->rx_rings, M_DEVBUF);
3914}
3915
3916
3917/*********************************************************************
3918 *
3919 *  Free receive ring data structures
3920 *
3921 **********************************************************************/
3922static void
3923em_free_receive_buffers(struct rx_ring *rxr)
3924{
3925	struct adapter		*adapter = rxr->adapter;
3926	struct em_buffer	*rxbuf = NULL;
3927
3928	INIT_DEBUGOUT("free_receive_buffers: begin");
3929
3930	if (rxr->rx_sparemap) {
3931		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3932		rxr->rx_sparemap = NULL;
3933	}
3934
3935	if (rxr->rx_buffers != NULL) {
3936		for (int i = 0; i < adapter->num_rx_desc; i++) {
3937			rxbuf = &rxr->rx_buffers[i];
3938			if (rxbuf->map != NULL) {
3939				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3940				    BUS_DMASYNC_POSTREAD);
3941				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3942				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3943			}
3944			if (rxbuf->m_head != NULL) {
3945				m_freem(rxbuf->m_head);
3946				rxbuf->m_head = NULL;
3947			}
3948		}
3949		free(rxr->rx_buffers, M_DEVBUF);
3950		rxr->rx_buffers = NULL;
3951	}
3952
3953	if (rxr->rxtag != NULL) {
3954		bus_dma_tag_destroy(rxr->rxtag);
3955		rxr->rxtag = NULL;
3956	}
3957
3958	return;
3959}
3960
3961
3962/*********************************************************************
3963 *
3964 *  Enable receive unit.
3965 *
3966 **********************************************************************/
3967#define MAX_INTS_PER_SEC	8000
3968#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3969
3970static void
3971em_initialize_receive_unit(struct adapter *adapter)
3972{
3973	struct rx_ring	*rxr = adapter->rx_rings;
3974	struct ifnet	*ifp = adapter->ifp;
3975	struct e1000_hw	*hw = &adapter->hw;
3976	u64	bus_addr;
3977	u32	rctl, rxcsum;
3978
3979	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3980
3981	/*
3982	 * Make sure receives are disabled while setting
3983	 * up the descriptor ring
3984	 */
3985	rctl = E1000_READ_REG(hw, E1000_RCTL);
3986	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3987
3988	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3989	    adapter->rx_abs_int_delay.value);
3990	/*
3991	 * Set the interrupt throttling rate. Value is calculated
3992	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3993	 */
3994	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
3995
3996	/*
3997	** When using MSIX interrupts we need to throttle
3998	** using the EITR register (82574 only)
3999	*/
4000	if (adapter->msix)
4001		for (int i = 0; i < 4; i++)
4002			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4003			    DEFAULT_ITR);
4004
4005	/* Disable accelerated ackknowledge */
4006	if (adapter->hw.mac.type == e1000_82574)
4007		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4008
4009	if (ifp->if_capenable & IFCAP_RXCSUM) {
4010		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4011		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4012		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4013	}
4014
4015	/*
4016	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4017	** long latencies are observed, like Lenovo X60. This
4018	** change eliminates the problem, but since having positive
4019	** values in RDTR is a known source of problems on other
4020	** platforms another solution is being sought.
4021	*/
4022	if (hw->mac.type == e1000_82573)
4023		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4024
4025	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4026		/* Setup the Base and Length of the Rx Descriptor Ring */
4027		bus_addr = rxr->rxdma.dma_paddr;
4028		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4029		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4030		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4031		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4032		/* Setup the Head and Tail Descriptor Pointers */
4033		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4034		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4035	}
4036
4037	/* Setup the Receive Control Register */
4038	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4040	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4041	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042
4043        /* Make sure VLAN Filters are off */
4044        rctl &= ~E1000_RCTL_VFE;
4045	rctl &= ~E1000_RCTL_SBP;
4046	rctl |= E1000_RCTL_SZ_2048;
4047	if (ifp->if_mtu > ETHERMTU)
4048		rctl |= E1000_RCTL_LPE;
4049	else
4050		rctl &= ~E1000_RCTL_LPE;
4051
4052	/* Write out the settings */
4053	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4054
4055	return;
4056}
4057
4058
4059/*********************************************************************
4060 *
4061 *  This routine executes in interrupt context. It replenishes
4062 *  the mbufs in the descriptor and sends data which has been
4063 *  dma'ed into host memory to upper layer.
4064 *
4065 *  We loop at most count times if count is > 0, or until done if
4066 *  count < 0.
4067 *
4068 *  For polling we also now return the number of cleaned packets
4069 *********************************************************************/
4070static int
4071em_rxeof(struct rx_ring *rxr, int count)
4072{
4073	struct adapter		*adapter = rxr->adapter;
4074	struct ifnet		*ifp = adapter->ifp;;
4075	struct mbuf		*mp, *sendmp;
4076	u8			status;
4077	u16 			len;
4078	int			i, processed, rxdone = 0;
4079	bool			eop;
4080	struct e1000_rx_desc	*cur;
4081
4082	EM_RX_LOCK(rxr);
4083
4084	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4085
4086		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4087			break;
4088
4089		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4090		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4091
4092		cur = &rxr->rx_base[i];
4093		status = cur->status;
4094		mp = sendmp = NULL;
4095
4096		if ((status & E1000_RXD_STAT_DD) == 0)
4097			break;
4098
4099		len = le16toh(cur->length);
4100		eop = (status & E1000_RXD_STAT_EOP) != 0;
4101		count--;
4102
4103		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4104
4105			/* Assign correct length to the current fragment */
4106			mp = rxr->rx_buffers[i].m_head;
4107			mp->m_len = len;
4108
4109			if (rxr->fmp == NULL) {
4110				mp->m_pkthdr.len = len;
4111				rxr->fmp = mp; /* Store the first mbuf */
4112				rxr->lmp = mp;
4113			} else {
4114				/* Chain mbuf's together */
4115				mp->m_flags &= ~M_PKTHDR;
4116				rxr->lmp->m_next = mp;
4117				rxr->lmp = rxr->lmp->m_next;
4118				rxr->fmp->m_pkthdr.len += len;
4119			}
4120
4121			if (eop) {
4122				rxr->fmp->m_pkthdr.rcvif = ifp;
4123				ifp->if_ipackets++;
4124				em_receive_checksum(cur, rxr->fmp);
4125#ifndef __NO_STRICT_ALIGNMENT
4126				if (adapter->max_frame_size >
4127				    (MCLBYTES - ETHER_ALIGN) &&
4128				    em_fixup_rx(rxr) != 0)
4129					goto skip;
4130#endif
4131				if (status & E1000_RXD_STAT_VP) {
4132					rxr->fmp->m_pkthdr.ether_vtag =
4133					    (le16toh(cur->special) &
4134					    E1000_RXD_SPC_VLAN_MASK);
4135					rxr->fmp->m_flags |= M_VLANTAG;
4136				}
4137#ifndef __NO_STRICT_ALIGNMENT
4138skip:
4139#endif
4140				sendmp = rxr->fmp;
4141				rxr->fmp = NULL;
4142				rxr->lmp = NULL;
4143			}
4144		} else {
4145			ifp->if_ierrors++;
4146			/* Reuse loaded DMA map and just update mbuf chain */
4147			mp = rxr->rx_buffers[i].m_head;
4148			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4149			mp->m_data = mp->m_ext.ext_buf;
4150			mp->m_next = NULL;
4151			if (adapter->max_frame_size <=
4152			    (MCLBYTES - ETHER_ALIGN))
4153				m_adj(mp, ETHER_ALIGN);
4154			if (rxr->fmp != NULL) {
4155				m_freem(rxr->fmp);
4156				rxr->fmp = NULL;
4157				rxr->lmp = NULL;
4158			}
4159			sendmp = NULL;
4160		}
4161
4162		/* Zero out the receive descriptors status. */
4163		cur->status = 0;
4164		++rxdone;	/* cumulative for POLL */
4165		++processed;
4166
4167		/* Advance our pointers to the next descriptor. */
4168		if (++i == adapter->num_rx_desc)
4169			i = 0;
4170
4171		/* Send to the stack */
4172		if (sendmp != NULL)
4173			(*ifp->if_input)(ifp, sendmp);
4174
4175		/* Only refresh mbufs every 8 descriptors */
4176		if (processed == 8) {
4177			em_refresh_mbufs(rxr, i);
4178			processed = 0;
4179		}
4180	}
4181
4182	/* Catch any remaining refresh work */
4183	if (processed != 0) {
4184		em_refresh_mbufs(rxr, i);
4185		processed = 0;
4186	}
4187
4188	rxr->next_to_check = i;
4189
4190	EM_RX_UNLOCK(rxr);
4191	return (rxdone);
4192}
4193
4194#ifndef __NO_STRICT_ALIGNMENT
4195/*
4196 * When jumbo frames are enabled we should realign entire payload on
4197 * architecures with strict alignment. This is serious design mistake of 8254x
4198 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4199 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4200 * payload. On architecures without strict alignment restrictions 8254x still
4201 * performs unaligned memory access which would reduce the performance too.
4202 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4203 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4204 * existing mbuf chain.
4205 *
4206 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4207 * not used at all on architectures with strict alignment.
4208 */
4209static int
4210em_fixup_rx(struct rx_ring *rxr)
4211{
4212	struct adapter *adapter = rxr->adapter;
4213	struct mbuf *m, *n;
4214	int error;
4215
4216	error = 0;
4217	m = rxr->fmp;
4218	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4219		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4220		m->m_data += ETHER_HDR_LEN;
4221	} else {
4222		MGETHDR(n, M_DONTWAIT, MT_DATA);
4223		if (n != NULL) {
4224			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4225			m->m_data += ETHER_HDR_LEN;
4226			m->m_len -= ETHER_HDR_LEN;
4227			n->m_len = ETHER_HDR_LEN;
4228			M_MOVE_PKTHDR(n, m);
4229			n->m_next = m;
4230			rxr->fmp = n;
4231		} else {
4232			adapter->dropped_pkts++;
4233			m_freem(rxr->fmp);
4234			rxr->fmp = NULL;
4235			error = ENOMEM;
4236		}
4237	}
4238
4239	return (error);
4240}
4241#endif
4242
4243/*********************************************************************
4244 *
4245 *  Verify that the hardware indicated that the checksum is valid.
4246 *  Inform the stack about the status of checksum so that stack
4247 *  doesn't spend time verifying the checksum.
4248 *
4249 *********************************************************************/
4250static void
4251em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4252{
4253	/* Ignore Checksum bit is set */
4254	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4255		mp->m_pkthdr.csum_flags = 0;
4256		return;
4257	}
4258
4259	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4260		/* Did it pass? */
4261		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4262			/* IP Checksum Good */
4263			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4264			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4265
4266		} else {
4267			mp->m_pkthdr.csum_flags = 0;
4268		}
4269	}
4270
4271	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4272		/* Did it pass? */
4273		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4274			mp->m_pkthdr.csum_flags |=
4275			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4276			mp->m_pkthdr.csum_data = htons(0xffff);
4277		}
4278	}
4279}
4280
4281/*
4282 * This routine is run via an vlan
4283 * config EVENT
4284 */
4285static void
4286em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4287{
4288	struct adapter	*adapter = ifp->if_softc;
4289	u32		index, bit;
4290
4291	if (ifp->if_softc !=  arg)   /* Not our event */
4292		return;
4293
4294	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4295                return;
4296
4297	index = (vtag >> 5) & 0x7F;
4298	bit = vtag & 0x1F;
4299	em_shadow_vfta[index] |= (1 << bit);
4300	++adapter->num_vlans;
4301	/* Re-init to load the changes */
4302	em_init(adapter);
4303}
4304
4305/*
4306 * This routine is run via an vlan
4307 * unconfig EVENT
4308 */
4309static void
4310em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4311{
4312	struct adapter	*adapter = ifp->if_softc;
4313	u32		index, bit;
4314
4315	if (ifp->if_softc !=  arg)
4316		return;
4317
4318	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4319                return;
4320
4321	index = (vtag >> 5) & 0x7F;
4322	bit = vtag & 0x1F;
4323	em_shadow_vfta[index] &= ~(1 << bit);
4324	--adapter->num_vlans;
4325	/* Re-init to load the changes */
4326	em_init(adapter);
4327}
4328
4329static void
4330em_setup_vlan_hw_support(struct adapter *adapter)
4331{
4332	struct e1000_hw *hw = &adapter->hw;
4333	u32             reg;
4334
4335	/*
4336	** We get here thru init_locked, meaning
4337	** a soft reset, this has already cleared
4338	** the VFTA and other state, so if there
4339	** have been no vlan's registered do nothing.
4340	*/
4341	if (adapter->num_vlans == 0)
4342                return;
4343
4344	/*
4345	** A soft reset zero's out the VFTA, so
4346	** we need to repopulate it now.
4347	*/
4348	for (int i = 0; i < EM_VFTA_SIZE; i++)
4349                if (em_shadow_vfta[i] != 0)
4350			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4351                            i, em_shadow_vfta[i]);
4352
4353	reg = E1000_READ_REG(hw, E1000_CTRL);
4354	reg |= E1000_CTRL_VME;
4355	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4356
4357	/* Enable the Filter Table */
4358	reg = E1000_READ_REG(hw, E1000_RCTL);
4359	reg &= ~E1000_RCTL_CFIEN;
4360	reg |= E1000_RCTL_VFE;
4361	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4362
4363	/* Update the frame size */
4364	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4365	    adapter->max_frame_size + VLAN_TAG_SIZE);
4366}
4367
4368static void
4369em_enable_intr(struct adapter *adapter)
4370{
4371	struct e1000_hw *hw = &adapter->hw;
4372	u32 ims_mask = IMS_ENABLE_MASK;
4373
4374	if (adapter->msix) {
4375		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4376		ims_mask |= EM_MSIX_MASK;
4377	}
4378	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4379}
4380
4381static void
4382em_disable_intr(struct adapter *adapter)
4383{
4384	struct e1000_hw *hw = &adapter->hw;
4385
4386	if (adapter->msix)
4387		E1000_WRITE_REG(hw, EM_EIAC, 0);
4388	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4389}
4390
4391/*
4392 * Bit of a misnomer, what this really means is
4393 * to enable OS management of the system... aka
4394 * to disable special hardware management features
4395 */
4396static void
4397em_init_manageability(struct adapter *adapter)
4398{
4399	/* A shared code workaround */
4400#define E1000_82542_MANC2H E1000_MANC2H
4401	if (adapter->has_manage) {
4402		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4403		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4404
4405		/* disable hardware interception of ARP */
4406		manc &= ~(E1000_MANC_ARP_EN);
4407
4408                /* enable receiving management packets to the host */
4409		manc |= E1000_MANC_EN_MNG2HOST;
4410#define E1000_MNG2HOST_PORT_623 (1 << 5)
4411#define E1000_MNG2HOST_PORT_664 (1 << 6)
4412		manc2h |= E1000_MNG2HOST_PORT_623;
4413		manc2h |= E1000_MNG2HOST_PORT_664;
4414		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4415		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4416	}
4417}
4418
4419/*
4420 * Give control back to hardware management
4421 * controller if there is one.
4422 */
4423static void
4424em_release_manageability(struct adapter *adapter)
4425{
4426	if (adapter->has_manage) {
4427		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4428
4429		/* re-enable hardware interception of ARP */
4430		manc |= E1000_MANC_ARP_EN;
4431		manc &= ~E1000_MANC_EN_MNG2HOST;
4432
4433		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4434	}
4435}
4436
4437/*
4438 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4439 * For ASF and Pass Through versions of f/w this means
4440 * that the driver is loaded. For AMT version type f/w
4441 * this means that the network i/f is open.
4442 */
4443static void
4444em_get_hw_control(struct adapter *adapter)
4445{
4446	u32 ctrl_ext, swsm;
4447
4448	if (adapter->hw.mac.type == e1000_82573) {
4449		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4450		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4451		    swsm | E1000_SWSM_DRV_LOAD);
4452		return;
4453	}
4454	/* else */
4455	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4456	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4457	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4458	return;
4459}
4460
4461/*
4462 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4463 * For ASF and Pass Through versions of f/w this means that
4464 * the driver is no longer loaded. For AMT versions of the
4465 * f/w this means that the network i/f is closed.
4466 */
4467static void
4468em_release_hw_control(struct adapter *adapter)
4469{
4470	u32 ctrl_ext, swsm;
4471
4472	if (!adapter->has_manage)
4473		return;
4474
4475	if (adapter->hw.mac.type == e1000_82573) {
4476		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4477		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4478		    swsm & ~E1000_SWSM_DRV_LOAD);
4479		return;
4480	}
4481	/* else */
4482	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4483	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4484	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4485	return;
4486}
4487
4488static int
4489em_is_valid_ether_addr(u8 *addr)
4490{
4491	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4492
4493	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4494		return (FALSE);
4495	}
4496
4497	return (TRUE);
4498}
4499
4500/*
4501** Parse the interface capabilities with regard
4502** to both system management and wake-on-lan for
4503** later use.
4504*/
4505static void
4506em_get_wakeup(device_t dev)
4507{
4508	struct adapter	*adapter = device_get_softc(dev);
4509	u16		eeprom_data = 0, device_id, apme_mask;
4510
4511	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4512	apme_mask = EM_EEPROM_APME;
4513
4514	switch (adapter->hw.mac.type) {
4515	case e1000_82573:
4516	case e1000_82583:
4517		adapter->has_amt = TRUE;
4518		/* Falls thru */
4519	case e1000_82571:
4520	case e1000_82572:
4521	case e1000_80003es2lan:
4522		if (adapter->hw.bus.func == 1) {
4523			e1000_read_nvm(&adapter->hw,
4524			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4525			break;
4526		} else
4527			e1000_read_nvm(&adapter->hw,
4528			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4529		break;
4530	case e1000_ich8lan:
4531	case e1000_ich9lan:
4532	case e1000_ich10lan:
4533	case e1000_pchlan:
4534		apme_mask = E1000_WUC_APME;
4535		adapter->has_amt = TRUE;
4536		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4537		break;
4538	default:
4539		e1000_read_nvm(&adapter->hw,
4540		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4541		break;
4542	}
4543	if (eeprom_data & apme_mask)
4544		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4545	/*
4546         * We have the eeprom settings, now apply the special cases
4547         * where the eeprom may be wrong or the board won't support
4548         * wake on lan on a particular port
4549	 */
4550	device_id = pci_get_device(dev);
4551        switch (device_id) {
4552	case E1000_DEV_ID_82571EB_FIBER:
4553		/* Wake events only supported on port A for dual fiber
4554		 * regardless of eeprom setting */
4555		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4556		    E1000_STATUS_FUNC_1)
4557			adapter->wol = 0;
4558		break;
4559	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4560	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4561	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4562                /* if quad port adapter, disable WoL on all but port A */
4563		if (global_quad_port_a != 0)
4564			adapter->wol = 0;
4565		/* Reset for multiple quad port adapters */
4566		if (++global_quad_port_a == 4)
4567			global_quad_port_a = 0;
4568                break;
4569	}
4570	return;
4571}
4572
4573
4574/*
4575 * Enable PCI Wake On Lan capability
4576 */
4577static void
4578em_enable_wakeup(device_t dev)
4579{
4580	struct adapter	*adapter = device_get_softc(dev);
4581	struct ifnet	*ifp = adapter->ifp;
4582	u32		pmc, ctrl, ctrl_ext, rctl;
4583	u16     	status;
4584
4585	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4586		return;
4587
4588	/* Advertise the wakeup capability */
4589	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4590	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4591	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4592	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4593
4594	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4595	    (adapter->hw.mac.type == e1000_pchlan) ||
4596	    (adapter->hw.mac.type == e1000_ich9lan) ||
4597	    (adapter->hw.mac.type == e1000_ich10lan)) {
4598		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4599		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4600	}
4601
4602	/* Keep the laser running on Fiber adapters */
4603	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4604	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4605		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4606		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4607		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4608	}
4609
4610	/*
4611	** Determine type of Wakeup: note that wol
4612	** is set with all bits on by default.
4613	*/
4614	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4615		adapter->wol &= ~E1000_WUFC_MAG;
4616
4617	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4618		adapter->wol &= ~E1000_WUFC_MC;
4619	else {
4620		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4621		rctl |= E1000_RCTL_MPE;
4622		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4623	}
4624
4625	if (adapter->hw.mac.type == e1000_pchlan) {
4626		if (em_enable_phy_wakeup(adapter))
4627			return;
4628	} else {
4629		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4630		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4631	}
4632
4633	if (adapter->hw.phy.type == e1000_phy_igp_3)
4634		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4635
4636        /* Request PME */
4637        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4638	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4639	if (ifp->if_capenable & IFCAP_WOL)
4640		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4641        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4642
4643	return;
4644}
4645
4646/*
4647** WOL in the newer chipset interfaces (pchlan)
4648** require thing to be copied into the phy
4649*/
4650static int
4651em_enable_phy_wakeup(struct adapter *adapter)
4652{
4653	struct e1000_hw *hw = &adapter->hw;
4654	u32 mreg, ret = 0;
4655	u16 preg;
4656
4657	/* copy MAC RARs to PHY RARs */
4658	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4659		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4660		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4661		e1000_write_phy_reg(hw, BM_RAR_M(i),
4662		    (u16)((mreg >> 16) & 0xFFFF));
4663		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4664		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4665		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4666		    (u16)((mreg >> 16) & 0xFFFF));
4667	}
4668
4669	/* copy MAC MTA to PHY MTA */
4670	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4671		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4672		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4673		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4674		    (u16)((mreg >> 16) & 0xFFFF));
4675	}
4676
4677	/* configure PHY Rx Control register */
4678	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4679	mreg = E1000_READ_REG(hw, E1000_RCTL);
4680	if (mreg & E1000_RCTL_UPE)
4681		preg |= BM_RCTL_UPE;
4682	if (mreg & E1000_RCTL_MPE)
4683		preg |= BM_RCTL_MPE;
4684	preg &= ~(BM_RCTL_MO_MASK);
4685	if (mreg & E1000_RCTL_MO_3)
4686		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4687				<< BM_RCTL_MO_SHIFT);
4688	if (mreg & E1000_RCTL_BAM)
4689		preg |= BM_RCTL_BAM;
4690	if (mreg & E1000_RCTL_PMCF)
4691		preg |= BM_RCTL_PMCF;
4692	mreg = E1000_READ_REG(hw, E1000_CTRL);
4693	if (mreg & E1000_CTRL_RFCE)
4694		preg |= BM_RCTL_RFCE;
4695	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4696
4697	/* enable PHY wakeup in MAC register */
4698	E1000_WRITE_REG(hw, E1000_WUC,
4699	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4700	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4701
4702	/* configure and enable PHY wakeup in PHY registers */
4703	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4704	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4705
4706	/* activate PHY wakeup */
4707	ret = hw->phy.ops.acquire(hw);
4708	if (ret) {
4709		printf("Could not acquire PHY\n");
4710		return ret;
4711	}
4712	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4713	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4714	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4715	if (ret) {
4716		printf("Could not read PHY page 769\n");
4717		goto out;
4718	}
4719	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4720	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4721	if (ret)
4722		printf("Could not set PHY Host Wakeup bit\n");
4723out:
4724	hw->phy.ops.release(hw);
4725
4726	return ret;
4727}
4728
4729static void
4730em_led_func(void *arg, int onoff)
4731{
4732	struct adapter	*adapter = arg;
4733
4734	EM_CORE_LOCK(adapter);
4735	if (onoff) {
4736		e1000_setup_led(&adapter->hw);
4737		e1000_led_on(&adapter->hw);
4738	} else {
4739		e1000_led_off(&adapter->hw);
4740		e1000_cleanup_led(&adapter->hw);
4741	}
4742	EM_CORE_UNLOCK(adapter);
4743}
4744
4745/**********************************************************************
4746 *
4747 *  Update the board statistics counters.
4748 *
4749 **********************************************************************/
4750static void
4751em_update_stats_counters(struct adapter *adapter)
4752{
4753	struct ifnet   *ifp;
4754
4755	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4756	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4757		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4758		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4759	}
4760	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4761	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4762	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4763	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4764
4765	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4766	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4767	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4768	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4769	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4770	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4771	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4772	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4773	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4774	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4775	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4776	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4777	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4778	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4779	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4780	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4781	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4782	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4783	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4784	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4785
4786	/* For the 64-bit byte counters the low dword must be read first. */
4787	/* Both registers clear on the read of the high dword */
4788
4789	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4790	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4791
4792	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4793	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4794	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4795	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4796	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4797
4798	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4799	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4800
4801	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4802	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4803	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4804	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4805	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4806	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4807	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4808	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4809	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4810	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4811
4812	if (adapter->hw.mac.type >= e1000_82543) {
4813		adapter->stats.algnerrc +=
4814		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4815		adapter->stats.rxerrc +=
4816		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4817		adapter->stats.tncrs +=
4818		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4819		adapter->stats.cexterr +=
4820		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4821		adapter->stats.tsctc +=
4822		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4823		adapter->stats.tsctfc +=
4824		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4825	}
4826	ifp = adapter->ifp;
4827
4828	ifp->if_collisions = adapter->stats.colc;
4829
4830	/* Rx Errors */
4831	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4832	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4833	    adapter->stats.ruc + adapter->stats.roc +
4834	    adapter->stats.mpc + adapter->stats.cexterr;
4835
4836	/* Tx Errors */
4837	ifp->if_oerrors = adapter->stats.ecol +
4838	    adapter->stats.latecol + adapter->watchdog_events;
4839}
4840
4841
4842/**********************************************************************
4843 *
4844 *  This routine is called only when em_display_debug_stats is enabled.
4845 *  This routine provides a way to take a look at important statistics
4846 *  maintained by the driver and hardware.
4847 *
4848 **********************************************************************/
4849static void
4850em_print_debug_info(struct adapter *adapter)
4851{
4852	device_t dev = adapter->dev;
4853	u8 *hw_addr = adapter->hw.hw_addr;
4854	struct rx_ring *rxr = adapter->rx_rings;
4855	struct tx_ring *txr = adapter->tx_rings;
4856
4857	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4858	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4859	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4860	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4861	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4862	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4863	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4864	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4865	    adapter->hw.fc.high_water,
4866	    adapter->hw.fc.low_water);
4867	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4868	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4869	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4870	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4871	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4872	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4873
4874	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4875		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4876		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4877		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4878		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4879		    txr->me, txr->no_desc_avail);
4880		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4881		    txr->me, txr->tx_irq);
4882		device_printf(dev, "Num Tx descriptors avail = %d\n",
4883		    txr->tx_avail);
4884		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4885		    txr->no_desc_avail);
4886	}
4887	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4888		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4889		    rxr->me, rxr->rx_irq);
4890		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4891		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4892		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4893	}
4894	device_printf(dev, "Std mbuf failed = %ld\n",
4895	    adapter->mbuf_alloc_failed);
4896	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4897	    adapter->mbuf_cluster_failed);
4898	device_printf(dev, "Driver dropped packets = %ld\n",
4899	    adapter->dropped_pkts);
4900}
4901
4902static void
4903em_print_hw_stats(struct adapter *adapter)
4904{
4905	device_t dev = adapter->dev;
4906
4907	device_printf(dev, "Excessive collisions = %lld\n",
4908	    (long long)adapter->stats.ecol);
4909#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4910	device_printf(dev, "Symbol errors = %lld\n",
4911	    (long long)adapter->stats.symerrs);
4912#endif
4913	device_printf(dev, "Sequence errors = %lld\n",
4914	    (long long)adapter->stats.sec);
4915	device_printf(dev, "Defer count = %lld\n",
4916	    (long long)adapter->stats.dc);
4917	device_printf(dev, "Missed Packets = %lld\n",
4918	    (long long)adapter->stats.mpc);
4919	device_printf(dev, "Receive No Buffers = %lld\n",
4920	    (long long)adapter->stats.rnbc);
4921	/* RLEC is inaccurate on some hardware, calculate our own. */
4922	device_printf(dev, "Receive Length Errors = %lld\n",
4923	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4924	device_printf(dev, "Receive errors = %lld\n",
4925	    (long long)adapter->stats.rxerrc);
4926	device_printf(dev, "Crc errors = %lld\n",
4927	    (long long)adapter->stats.crcerrs);
4928	device_printf(dev, "Alignment errors = %lld\n",
4929	    (long long)adapter->stats.algnerrc);
4930	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4931	    (long long)adapter->stats.cexterr);
4932	device_printf(dev, "watchdog timeouts = %ld\n",
4933	    adapter->watchdog_events);
4934	device_printf(dev, "XON Rcvd = %lld\n",
4935	    (long long)adapter->stats.xonrxc);
4936	device_printf(dev, "XON Xmtd = %lld\n",
4937	    (long long)adapter->stats.xontxc);
4938	device_printf(dev, "XOFF Rcvd = %lld\n",
4939	    (long long)adapter->stats.xoffrxc);
4940	device_printf(dev, "XOFF Xmtd = %lld\n",
4941	    (long long)adapter->stats.xofftxc);
4942	device_printf(dev, "Good Packets Rcvd = %lld\n",
4943	    (long long)adapter->stats.gprc);
4944	device_printf(dev, "Good Packets Xmtd = %lld\n",
4945	    (long long)adapter->stats.gptc);
4946	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4947	    (long long)adapter->stats.tsctc);
4948	device_printf(dev, "TSO Contexts Failed = %lld\n",
4949	    (long long)adapter->stats.tsctfc);
4950}
4951
4952/**********************************************************************
4953 *
4954 *  This routine provides a way to dump out the adapter eeprom,
4955 *  often a useful debug/service tool. This only dumps the first
4956 *  32 words, stuff that matters is in that extent.
4957 *
4958 **********************************************************************/
4959static void
4960em_print_nvm_info(struct adapter *adapter)
4961{
4962	u16	eeprom_data;
4963	int	i, j, row = 0;
4964
4965	/* Its a bit crude, but it gets the job done */
4966	printf("\nInterface EEPROM Dump:\n");
4967	printf("Offset\n0x0000  ");
4968	for (i = 0, j = 0; i < 32; i++, j++) {
4969		if (j == 8) { /* Make the offset block */
4970			j = 0; ++row;
4971			printf("\n0x00%x0  ",row);
4972		}
4973		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4974		printf("%04x ", eeprom_data);
4975	}
4976	printf("\n");
4977}
4978
4979static int
4980em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4981{
4982	struct adapter *adapter;
4983	int error;
4984	int result;
4985
4986	result = -1;
4987	error = sysctl_handle_int(oidp, &result, 0, req);
4988
4989	if (error || !req->newptr)
4990		return (error);
4991
4992	if (result == 1) {
4993		adapter = (struct adapter *)arg1;
4994		em_print_debug_info(adapter);
4995	}
4996	/*
4997	 * This value will cause a hex dump of the
4998	 * first 32 16-bit words of the EEPROM to
4999	 * the screen.
5000	 */
5001	if (result == 2) {
5002		adapter = (struct adapter *)arg1;
5003		em_print_nvm_info(adapter);
5004        }
5005
5006	return (error);
5007}
5008
5009
5010static int
5011em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5012{
5013	struct adapter *adapter;
5014	int error;
5015	int result;
5016
5017	result = -1;
5018	error = sysctl_handle_int(oidp, &result, 0, req);
5019
5020	if (error || !req->newptr)
5021		return (error);
5022
5023	if (result == 1) {
5024		adapter = (struct adapter *)arg1;
5025		em_print_hw_stats(adapter);
5026	}
5027
5028	return (error);
5029}
5030
5031static int
5032em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5033{
5034	struct em_int_delay_info *info;
5035	struct adapter *adapter;
5036	u32 regval;
5037	int error, usecs, ticks;
5038
5039	info = (struct em_int_delay_info *)arg1;
5040	usecs = info->value;
5041	error = sysctl_handle_int(oidp, &usecs, 0, req);
5042	if (error != 0 || req->newptr == NULL)
5043		return (error);
5044	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5045		return (EINVAL);
5046	info->value = usecs;
5047	ticks = EM_USECS_TO_TICKS(usecs);
5048
5049	adapter = info->adapter;
5050
5051	EM_CORE_LOCK(adapter);
5052	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5053	regval = (regval & ~0xffff) | (ticks & 0xffff);
5054	/* Handle a few special cases. */
5055	switch (info->offset) {
5056	case E1000_RDTR:
5057		break;
5058	case E1000_TIDV:
5059		if (ticks == 0) {
5060			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5061			/* Don't write 0 into the TIDV register. */
5062			regval++;
5063		} else
5064			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5065		break;
5066	}
5067	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5068	EM_CORE_UNLOCK(adapter);
5069	return (0);
5070}
5071
5072static void
5073em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5074	const char *description, struct em_int_delay_info *info,
5075	int offset, int value)
5076{
5077	info->adapter = adapter;
5078	info->offset = offset;
5079	info->value = value;
5080	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5081	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5082	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5083	    info, 0, em_sysctl_int_delay, "I", description);
5084}
5085
5086static void
5087em_add_rx_process_limit(struct adapter *adapter, const char *name,
5088	const char *description, int *limit, int value)
5089{
5090	*limit = value;
5091	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5092	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5093	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5094}
5095
5096
5097