if_em.c revision 206629
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 206629 2010-04-14 20:55:33Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static int	em_rxeof(struct rx_ring *, int);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350static int em_enable_msix = TRUE;
351static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
352TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
353TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
354
355/* How many packets rxeof tries to clean at a time */
356static int em_rx_process_limit = 100;
357TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
358
359/* Flow control setting - default to FULL */
360static int em_fc_setting = e1000_fc_full;
361TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
362
363/*
364** Shadow VFTA table, this is needed because
365** the real vlan filter table gets cleared during
366** a soft reset and the driver needs to be able
367** to repopulate it.
368*/
369static u32 em_shadow_vfta[EM_VFTA_SIZE];
370
371/* Global used in WOL setup with multiport cards */
372static int global_quad_port_a = 0;
373
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  em_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384em_probe(device_t dev)
385{
386	char		adapter_name[60];
387	u16		pci_vendor_id = 0;
388	u16		pci_device_id = 0;
389	u16		pci_subvendor_id = 0;
390	u16		pci_subdevice_id = 0;
391	em_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("em_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != EM_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = em_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				em_strings[ent->index],
415				em_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436em_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440
441	INIT_DEBUGOUT("em_attach: begin");
442
443	adapter = device_get_softc(dev);
444	adapter->dev = adapter->osdep.dev = dev;
445	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
446
447	/* SYSCTL stuff */
448	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
449	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
450	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
451	    em_sysctl_debug_info, "I", "Debug Information");
452
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_stats, "I", "Statistics");
457
458	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
459
460	/* Determine hardware and mac info */
461	em_identify_hardware(adapter);
462
463	/* Setup PCI resources */
464	if (em_allocate_pci_resources(adapter)) {
465		device_printf(dev, "Allocation of PCI resources failed\n");
466		error = ENXIO;
467		goto err_pci;
468	}
469
470	/*
471	** For ICH8 and family we need to
472	** map the flash memory, and this
473	** must happen after the MAC is
474	** identified
475	*/
476	if ((adapter->hw.mac.type == e1000_ich8lan) ||
477	    (adapter->hw.mac.type == e1000_pchlan) ||
478	    (adapter->hw.mac.type == e1000_ich9lan) ||
479	    (adapter->hw.mac.type == e1000_ich10lan)) {
480		int rid = EM_BAR_TYPE_FLASH;
481		adapter->flash = bus_alloc_resource_any(dev,
482		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
483		if (adapter->flash == NULL) {
484			device_printf(dev, "Mapping of Flash failed\n");
485			error = ENXIO;
486			goto err_pci;
487		}
488		/* This is used in the shared code */
489		adapter->hw.flash_address = (u8 *)adapter->flash;
490		adapter->osdep.flash_bus_space_tag =
491		    rman_get_bustag(adapter->flash);
492		adapter->osdep.flash_bus_space_handle =
493		    rman_get_bushandle(adapter->flash);
494	}
495
496	/* Do Shared Code initialization */
497	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
498		device_printf(dev, "Setup of Shared code failed\n");
499		error = ENXIO;
500		goto err_pci;
501	}
502
503	e1000_get_bus_info(&adapter->hw);
504
505	/* Set up some sysctls for the tunable interrupt delays */
506	em_add_int_delay_sysctl(adapter, "rx_int_delay",
507	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
508	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
509	em_add_int_delay_sysctl(adapter, "tx_int_delay",
510	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
511	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
512	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
513	    "receive interrupt delay limit in usecs",
514	    &adapter->rx_abs_int_delay,
515	    E1000_REGISTER(&adapter->hw, E1000_RADV),
516	    em_rx_abs_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
518	    "transmit interrupt delay limit in usecs",
519	    &adapter->tx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_TADV),
521	    em_tx_abs_int_delay_dflt);
522
523	/* Sysctls for limiting the amount of work done in the taskqueue */
524	em_add_rx_process_limit(adapter, "rx_processing_limit",
525	    "max number of rx packets to process", &adapter->rx_process_limit,
526	    em_rx_process_limit);
527
528	/*
529	 * Validate number of transmit and receive descriptors. It
530	 * must not exceed hardware maximum, and must be multiple
531	 * of E1000_DBA_ALIGN.
532	 */
533	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
534	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
535		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
536		    EM_DEFAULT_TXD, em_txd);
537		adapter->num_tx_desc = EM_DEFAULT_TXD;
538	} else
539		adapter->num_tx_desc = em_txd;
540
541	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
542	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
543		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
544		    EM_DEFAULT_RXD, em_rxd);
545		adapter->num_rx_desc = EM_DEFAULT_RXD;
546	} else
547		adapter->num_rx_desc = em_rxd;
548
549	adapter->hw.mac.autoneg = DO_AUTO_NEG;
550	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
551	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
552
553	/* Copper options */
554	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
555		adapter->hw.phy.mdix = AUTO_ALL_MODES;
556		adapter->hw.phy.disable_polarity_correction = FALSE;
557		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
558	}
559
560	/*
561	 * Set the frame limits assuming
562	 * standard ethernet sized frames.
563	 */
564	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
565	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
566
567	/*
568	 * This controls when hardware reports transmit completion
569	 * status.
570	 */
571	adapter->hw.mac.report_tx_early = 1;
572
573	/*
574	** Get queue/ring memory
575	*/
576	if (em_allocate_queues(adapter)) {
577		error = ENOMEM;
578		goto err_pci;
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/* Copy the permanent MAC address out of the EEPROM */
604	if (e1000_read_mac_addr(&adapter->hw) < 0) {
605		device_printf(dev, "EEPROM read error while reading MAC"
606		    " address\n");
607		error = EIO;
608		goto err_late;
609	}
610
611	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
612		device_printf(dev, "Invalid MAC address\n");
613		error = EIO;
614		goto err_late;
615	}
616
617	/*
618	**  Do interrupt configuration
619	*/
620	if (adapter->msix > 1) /* Do MSIX */
621		error = em_allocate_msix(adapter);
622	else  /* MSI or Legacy */
623		error = em_allocate_legacy(adapter);
624	if (error)
625		goto err_late;
626
627	/*
628	 * Get Wake-on-Lan and Management info for later use
629	 */
630	em_get_wakeup(dev);
631
632	/* Setup OS specific network interface */
633	em_setup_interface(dev, adapter);
634
635	em_reset(adapter);
636
637	/* Initialize statistics */
638	em_update_stats_counters(adapter);
639
640	adapter->hw.mac.get_link_status = 1;
641	em_update_link_status(adapter);
642
643	/* Indicate SOL/IDER usage */
644	if (e1000_check_reset_block(&adapter->hw))
645		device_printf(dev,
646		    "PHY reset is blocked due to SOL/IDER session.\n");
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	/* Non-AMT based hardware can now take control from firmware */
655	if (adapter->has_manage && !adapter->has_amt)
656		em_get_hw_control(adapter);
657
658	/* Tell the stack that the interface is not active */
659	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
660
661	adapter->led_dev = led_create(em_led_func, adapter,
662	    device_get_nameunit(dev));
663
664	INIT_DEBUGOUT("em_attach: end");
665
666	return (0);
667
668err_late:
669	em_free_transmit_structures(adapter);
670	em_free_receive_structures(adapter);
671	em_release_hw_control(adapter);
672err_pci:
673	em_free_pci_resources(adapter);
674	EM_CORE_LOCK_DESTROY(adapter);
675
676	return (error);
677}
678
679/*********************************************************************
680 *  Device removal routine
681 *
682 *  The detach entry point is called when the driver is being removed.
683 *  This routine stops the adapter and deallocates all the resources
684 *  that were allocated for driver operation.
685 *
686 *  return 0 on success, positive on failure
687 *********************************************************************/
688
689static int
690em_detach(device_t dev)
691{
692	struct adapter	*adapter = device_get_softc(dev);
693	struct ifnet	*ifp = adapter->ifp;
694
695	INIT_DEBUGOUT("em_detach: begin");
696
697	/* Make sure VLANS are not using driver */
698	if (adapter->ifp->if_vlantrunk != NULL) {
699		device_printf(dev,"Vlan in use, detach first\n");
700		return (EBUSY);
701	}
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	EM_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	em_stop(adapter);
711	EM_CORE_UNLOCK(adapter);
712	EM_CORE_LOCK_DESTROY(adapter);
713
714	e1000_phy_hw_reset(&adapter->hw);
715
716	em_release_manageability(adapter);
717	em_release_hw_control(adapter);
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	ether_ifdetach(adapter->ifp);
726	callout_drain(&adapter->timer);
727
728	em_free_pci_resources(adapter);
729	bus_generic_detach(dev);
730	if_free(ifp);
731
732	em_free_transmit_structures(adapter);
733	em_free_receive_structures(adapter);
734
735	em_release_hw_control(adapter);
736
737	return (0);
738}
739
740/*********************************************************************
741 *
742 *  Shutdown entry point
743 *
744 **********************************************************************/
745
746static int
747em_shutdown(device_t dev)
748{
749	return em_suspend(dev);
750}
751
752/*
753 * Suspend/resume device methods.
754 */
755static int
756em_suspend(device_t dev)
757{
758	struct adapter *adapter = device_get_softc(dev);
759
760	EM_CORE_LOCK(adapter);
761
762        em_release_manageability(adapter);
763	em_release_hw_control(adapter);
764	em_enable_wakeup(dev);
765
766	EM_CORE_UNLOCK(adapter);
767
768	return bus_generic_suspend(dev);
769}
770
771static int
772em_resume(device_t dev)
773{
774	struct adapter *adapter = device_get_softc(dev);
775	struct ifnet *ifp = adapter->ifp;
776
777	if (adapter->led_dev != NULL)
778		led_destroy(adapter->led_dev);
779
780	EM_CORE_LOCK(adapter);
781	em_init_locked(adapter);
782	em_init_manageability(adapter);
783	EM_CORE_UNLOCK(adapter);
784	em_start(ifp);
785
786	return bus_generic_resume(dev);
787}
788
789
790/*********************************************************************
791 *  Transmit entry point
792 *
793 *  em_start is called by the stack to initiate a transmit.
794 *  The driver will remain in this routine as long as there are
795 *  packets to transmit and transmit resources are available.
796 *  In case resources are not available stack is notified and
797 *  the packet is requeued.
798 **********************************************************************/
799
800#ifdef EM_MULTIQUEUE
801static int
802em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
803{
804	struct adapter  *adapter = txr->adapter;
805        struct mbuf     *next;
806        int             err = 0, enq = 0;
807
808	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
809	    IFF_DRV_RUNNING || adapter->link_active == 0) {
810		if (m != NULL)
811			err = drbr_enqueue(ifp, txr->br, m);
812		return (err);
813	}
814
815        /* Call cleanup if number of TX descriptors low */
816	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
817		em_txeof(txr);
818
819	enq = 0;
820	if (m == NULL) {
821		next = drbr_dequeue(ifp, txr->br);
822	} else if (drbr_needs_enqueue(ifp, txr->br)) {
823		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
824			return (err);
825		next = drbr_dequeue(ifp, txr->br);
826	} else
827		next = m;
828
829	/* Process the queue */
830	while (next != NULL) {
831		if ((err = em_xmit(txr, &next)) != 0) {
832                        if (next != NULL)
833                                err = drbr_enqueue(ifp, txr->br, next);
834                        break;
835		}
836		enq++;
837		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
838		ETHER_BPF_MTAP(ifp, next);
839		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
840                        break;
841		if (txr->tx_avail < EM_MAX_SCATTER) {
842			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
843			break;
844		}
845		next = drbr_dequeue(ifp, txr->br);
846	}
847
848	if (enq > 0) {
849                /* Set the watchdog */
850                txr->watchdog_check = TRUE;
851		txr->watchdog_time = ticks;
852	}
853	return (err);
854}
855
856/*
857** Multiqueue capable stack interface, this is not
858** yet truely multiqueue, but that is coming...
859*/
860static int
861em_mq_start(struct ifnet *ifp, struct mbuf *m)
862{
863	struct adapter	*adapter = ifp->if_softc;
864	struct tx_ring	*txr;
865	int 		i, error = 0;
866
867	/* Which queue to use */
868	if ((m->m_flags & M_FLOWID) != 0)
869                i = m->m_pkthdr.flowid % adapter->num_queues;
870	else
871		i = curcpu % adapter->num_queues;
872
873	txr = &adapter->tx_rings[i];
874
875	if (EM_TX_TRYLOCK(txr)) {
876		error = em_mq_start_locked(ifp, txr, m);
877		EM_TX_UNLOCK(txr);
878	} else
879		error = drbr_enqueue(ifp, txr->br, m);
880
881	return (error);
882}
883
884/*
885** Flush all ring buffers
886*/
887static void
888em_qflush(struct ifnet *ifp)
889{
890	struct adapter  *adapter = ifp->if_softc;
891	struct tx_ring  *txr = adapter->tx_rings;
892	struct mbuf     *m;
893
894	for (int i = 0; i < adapter->num_queues; i++, txr++) {
895		EM_TX_LOCK(txr);
896		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
897			m_freem(m);
898		EM_TX_UNLOCK(txr);
899	}
900	if_qflush(ifp);
901}
902
903#endif /* EM_MULTIQUEUE */
904
905static void
906em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
907{
908	struct adapter	*adapter = ifp->if_softc;
909	struct mbuf	*m_head;
910
911	EM_TX_LOCK_ASSERT(txr);
912
913	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
914	    IFF_DRV_RUNNING)
915		return;
916
917	if (!adapter->link_active)
918		return;
919
920        /* Call cleanup if number of TX descriptors low */
921	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
922		em_txeof(txr);
923
924	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
925		if (txr->tx_avail < EM_MAX_SCATTER) {
926			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
927			break;
928		}
929                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930		if (m_head == NULL)
931			break;
932		/*
933		 *  Encapsulation can modify our pointer, and or make it
934		 *  NULL on failure.  In that event, we can't requeue.
935		 */
936		if (em_xmit(txr, &m_head)) {
937			if (m_head == NULL)
938				break;
939			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941			break;
942		}
943
944		/* Send a copy of the frame to the BPF listener */
945		ETHER_BPF_MTAP(ifp, m_head);
946
947		/* Set timeout in case hardware has problems transmitting. */
948		txr->watchdog_time = ticks;
949		txr->watchdog_check = TRUE;
950	}
951
952	return;
953}
954
955static void
956em_start(struct ifnet *ifp)
957{
958	struct adapter	*adapter = ifp->if_softc;
959	struct tx_ring	*txr = adapter->tx_rings;
960
961	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
962		EM_TX_LOCK(txr);
963		em_start_locked(ifp, txr);
964		EM_TX_UNLOCK(txr);
965	}
966	return;
967}
968
969/*********************************************************************
970 *  Ioctl entry point
971 *
972 *  em_ioctl is called when the user wants to configure the
973 *  interface.
974 *
975 *  return 0 on success, positive on failure
976 **********************************************************************/
977
978static int
979em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
980{
981	struct adapter	*adapter = ifp->if_softc;
982	struct ifreq *ifr = (struct ifreq *)data;
983#ifdef INET
984	struct ifaddr *ifa = (struct ifaddr *)data;
985#endif
986	int error = 0;
987
988	if (adapter->in_detach)
989		return (error);
990
991	switch (command) {
992	case SIOCSIFADDR:
993#ifdef INET
994		if (ifa->ifa_addr->sa_family == AF_INET) {
995			/*
996			 * XXX
997			 * Since resetting hardware takes a very long time
998			 * and results in link renegotiation we only
999			 * initialize the hardware only when it is absolutely
1000			 * required.
1001			 */
1002			ifp->if_flags |= IFF_UP;
1003			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1004				EM_CORE_LOCK(adapter);
1005				em_init_locked(adapter);
1006				EM_CORE_UNLOCK(adapter);
1007			}
1008			arp_ifinit(ifp, ifa);
1009		} else
1010#endif
1011			error = ether_ioctl(ifp, command, data);
1012		break;
1013	case SIOCSIFMTU:
1014	    {
1015		int max_frame_size;
1016
1017		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1018
1019		EM_CORE_LOCK(adapter);
1020		switch (adapter->hw.mac.type) {
1021		case e1000_82571:
1022		case e1000_82572:
1023		case e1000_ich9lan:
1024		case e1000_ich10lan:
1025		case e1000_82574:
1026		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1027			max_frame_size = 9234;
1028			break;
1029		case e1000_pchlan:
1030			max_frame_size = 4096;
1031			break;
1032			/* Adapters that do not support jumbo frames */
1033		case e1000_82583:
1034		case e1000_ich8lan:
1035			max_frame_size = ETHER_MAX_LEN;
1036			break;
1037		default:
1038			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039		}
1040		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041		    ETHER_CRC_LEN) {
1042			EM_CORE_UNLOCK(adapter);
1043			error = EINVAL;
1044			break;
1045		}
1046
1047		ifp->if_mtu = ifr->ifr_mtu;
1048		adapter->max_frame_size =
1049		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050		em_init_locked(adapter);
1051		EM_CORE_UNLOCK(adapter);
1052		break;
1053	    }
1054	case SIOCSIFFLAGS:
1055		IOCTL_DEBUGOUT("ioctl rcv'd:\
1056		    SIOCSIFFLAGS (Set Interface Flags)");
1057		EM_CORE_LOCK(adapter);
1058		if (ifp->if_flags & IFF_UP) {
1059			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1060				if ((ifp->if_flags ^ adapter->if_flags) &
1061				    (IFF_PROMISC | IFF_ALLMULTI)) {
1062					em_disable_promisc(adapter);
1063					em_set_promisc(adapter);
1064				}
1065			} else
1066				em_init_locked(adapter);
1067		} else
1068			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1069				em_stop(adapter);
1070		adapter->if_flags = ifp->if_flags;
1071		EM_CORE_UNLOCK(adapter);
1072		break;
1073	case SIOCADDMULTI:
1074	case SIOCDELMULTI:
1075		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1076		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1077			EM_CORE_LOCK(adapter);
1078			em_disable_intr(adapter);
1079			em_set_multi(adapter);
1080#ifdef DEVICE_POLLING
1081			if (!(ifp->if_capenable & IFCAP_POLLING))
1082#endif
1083				em_enable_intr(adapter);
1084			EM_CORE_UNLOCK(adapter);
1085		}
1086		break;
1087	case SIOCSIFMEDIA:
1088		/* Check SOL/IDER usage */
1089		EM_CORE_LOCK(adapter);
1090		if (e1000_check_reset_block(&adapter->hw)) {
1091			EM_CORE_UNLOCK(adapter);
1092			device_printf(adapter->dev, "Media change is"
1093			    " blocked due to SOL/IDER session.\n");
1094			break;
1095		}
1096		EM_CORE_UNLOCK(adapter);
1097	case SIOCGIFMEDIA:
1098		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101		break;
1102	case SIOCSIFCAP:
1103	    {
1104		int mask, reinit;
1105
1106		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107		reinit = 0;
1108		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109#ifdef DEVICE_POLLING
1110		if (mask & IFCAP_POLLING) {
1111			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112				error = ether_poll_register(em_poll, ifp);
1113				if (error)
1114					return (error);
1115				EM_CORE_LOCK(adapter);
1116				em_disable_intr(adapter);
1117				ifp->if_capenable |= IFCAP_POLLING;
1118				EM_CORE_UNLOCK(adapter);
1119			} else {
1120				error = ether_poll_deregister(ifp);
1121				/* Enable interrupt even in error case */
1122				EM_CORE_LOCK(adapter);
1123				em_enable_intr(adapter);
1124				ifp->if_capenable &= ~IFCAP_POLLING;
1125				EM_CORE_UNLOCK(adapter);
1126			}
1127		}
1128#endif
1129		if (mask & IFCAP_HWCSUM) {
1130			ifp->if_capenable ^= IFCAP_HWCSUM;
1131			reinit = 1;
1132		}
1133		if (mask & IFCAP_TSO4) {
1134			ifp->if_capenable ^= IFCAP_TSO4;
1135			reinit = 1;
1136		}
1137		if (mask & IFCAP_VLAN_HWTAGGING) {
1138			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139			reinit = 1;
1140		}
1141		if (mask & IFCAP_VLAN_HWFILTER) {
1142			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143			reinit = 1;
1144		}
1145		if ((mask & IFCAP_WOL) &&
1146		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147			if (mask & IFCAP_WOL_MCAST)
1148				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149			if (mask & IFCAP_WOL_MAGIC)
1150				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151		}
1152		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153			em_init(adapter);
1154		VLAN_CAPABILITIES(ifp);
1155		break;
1156	    }
1157
1158	default:
1159		error = ether_ioctl(ifp, command, data);
1160		break;
1161	}
1162
1163	return (error);
1164}
1165
1166
1167/*********************************************************************
1168 *  Init entry point
1169 *
1170 *  This routine is used in two ways. It is used by the stack as
1171 *  init entry point in network interface structure. It is also used
1172 *  by the driver as a hw/sw initialization routine to get to a
1173 *  consistent state.
1174 *
1175 *  return 0 on success, positive on failure
1176 **********************************************************************/
1177
1178static void
1179em_init_locked(struct adapter *adapter)
1180{
1181	struct ifnet	*ifp = adapter->ifp;
1182	device_t	dev = adapter->dev;
1183	u32		pba;
1184
1185	INIT_DEBUGOUT("em_init: begin");
1186
1187	EM_CORE_LOCK_ASSERT(adapter);
1188
1189	em_disable_intr(adapter);
1190	callout_stop(&adapter->timer);
1191
1192	/*
1193	 * Packet Buffer Allocation (PBA)
1194	 * Writing PBA sets the receive portion of the buffer
1195	 * the remainder is used for the transmit buffer.
1196	 */
1197	switch (adapter->hw.mac.type) {
1198	/* Total Packet Buffer on these is 48K */
1199	case e1000_82571:
1200	case e1000_82572:
1201	case e1000_80003es2lan:
1202			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1203		break;
1204	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1205			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1206		break;
1207	case e1000_82574:
1208	case e1000_82583:
1209			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1210		break;
1211	case e1000_ich9lan:
1212	case e1000_ich10lan:
1213	case e1000_pchlan:
1214		pba = E1000_PBA_10K;
1215		break;
1216	case e1000_ich8lan:
1217		pba = E1000_PBA_8K;
1218		break;
1219	default:
1220		if (adapter->max_frame_size > 8192)
1221			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1222		else
1223			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1224	}
1225
1226	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1227	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1228
1229	/* Get the latest mac address, User can use a LAA */
1230        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1231              ETHER_ADDR_LEN);
1232
1233	/* Put the address into the Receive Address Array */
1234	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1235
1236	/*
1237	 * With the 82571 adapter, RAR[0] may be overwritten
1238	 * when the other port is reset, we make a duplicate
1239	 * in RAR[14] for that eventuality, this assures
1240	 * the interface continues to function.
1241	 */
1242	if (adapter->hw.mac.type == e1000_82571) {
1243		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1244		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1245		    E1000_RAR_ENTRIES - 1);
1246	}
1247
1248	/* Initialize the hardware */
1249	em_reset(adapter);
1250	em_update_link_status(adapter);
1251
1252	/* Setup VLAN support, basic and offload if available */
1253	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1254
1255	/* Use real VLAN Filter support? */
1256	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1257		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1258			/* Use real VLAN Filter support */
1259			em_setup_vlan_hw_support(adapter);
1260		else {
1261			u32 ctrl;
1262			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1263			ctrl |= E1000_CTRL_VME;
1264			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1265		}
1266	}
1267
1268	/* Set hardware offload abilities */
1269	ifp->if_hwassist = 0;
1270	if (ifp->if_capenable & IFCAP_TXCSUM)
1271		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	em_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	em_setup_transmit_structures(adapter);
1280	em_initialize_transmit_unit(adapter);
1281
1282	/* Setup Multicast table */
1283	em_set_multi(adapter);
1284
1285	/* Prepare receive descriptors and buffers */
1286	if (em_setup_receive_structures(adapter)) {
1287		device_printf(dev, "Could not setup receive structures\n");
1288		em_stop(adapter);
1289		return;
1290	}
1291	em_initialize_receive_unit(adapter);
1292
1293	/* Don't lose promiscuous settings */
1294	em_set_promisc(adapter);
1295
1296	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1297	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1298
1299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1300	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1301
1302	/* MSI/X configuration for 82574 */
1303	if (adapter->hw.mac.type == e1000_82574) {
1304		int tmp;
1305		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1306		tmp |= E1000_CTRL_EXT_PBA_CLR;
1307		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1308		/* Set the IVAR - interrupt vector routing. */
1309		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1310	}
1311
1312#ifdef DEVICE_POLLING
1313	/*
1314	 * Only enable interrupts if we are not polling, make sure
1315	 * they are off otherwise.
1316	 */
1317	if (ifp->if_capenable & IFCAP_POLLING)
1318		em_disable_intr(adapter);
1319	else
1320#endif /* DEVICE_POLLING */
1321		em_enable_intr(adapter);
1322
1323	/* AMT based hardware can now take control from firmware */
1324	if (adapter->has_manage && adapter->has_amt)
1325		em_get_hw_control(adapter);
1326
1327	/* Don't reset the phy next time init gets called */
1328	adapter->hw.phy.reset_disable = TRUE;
1329}
1330
1331static void
1332em_init(void *arg)
1333{
1334	struct adapter *adapter = arg;
1335
1336	EM_CORE_LOCK(adapter);
1337	em_init_locked(adapter);
1338	EM_CORE_UNLOCK(adapter);
1339}
1340
1341
1342#ifdef DEVICE_POLLING
1343/*********************************************************************
1344 *
1345 *  Legacy polling routine: note this only works with single queue
1346 *
1347 *********************************************************************/
1348static int
1349em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1350{
1351	struct adapter *adapter = ifp->if_softc;
1352	struct tx_ring	*txr = adapter->tx_rings;
1353	struct rx_ring	*rxr = adapter->rx_rings;
1354	u32		reg_icr, rx_done = 0;
1355
1356	EM_CORE_LOCK(adapter);
1357	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1358		EM_CORE_UNLOCK(adapter);
1359		return (rx_done);
1360	}
1361
1362	if (cmd == POLL_AND_CHECK_STATUS) {
1363		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1365			callout_stop(&adapter->timer);
1366			adapter->hw.mac.get_link_status = 1;
1367			em_update_link_status(adapter);
1368			callout_reset(&adapter->timer, hz,
1369			    em_local_timer, adapter);
1370		}
1371	}
1372	EM_CORE_UNLOCK(adapter);
1373
1374	EM_RX_LOCK(rxr);
1375	rx_done = em_rxeof(rxr, count);
1376	EM_RX_UNLOCK(rxr);
1377
1378	EM_TX_LOCK(txr);
1379	em_txeof(txr);
1380#ifdef EM_MULTIQUEUE
1381	if (!drbr_empty(ifp, txr->br))
1382		em_mq_start_locked(ifp, txr, NULL);
1383#else
1384	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385		em_start_locked(ifp, txr);
1386#endif
1387	EM_TX_UNLOCK(txr);
1388
1389	return (rx_done);
1390}
1391#endif /* DEVICE_POLLING */
1392
1393
1394/*********************************************************************
1395 *
1396 *  Fast Legacy/MSI Combined Interrupt Service routine
1397 *
1398 *********************************************************************/
1399static int
1400em_irq_fast(void *arg)
1401{
1402	struct adapter	*adapter = arg;
1403	struct ifnet	*ifp;
1404	u32		reg_icr;
1405
1406	ifp = adapter->ifp;
1407
1408	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1409
1410	/* Hot eject?  */
1411	if (reg_icr == 0xffffffff)
1412		return FILTER_STRAY;
1413
1414	/* Definitely not our interrupt.  */
1415	if (reg_icr == 0x0)
1416		return FILTER_STRAY;
1417
1418	/*
1419	 * Starting with the 82571 chip, bit 31 should be used to
1420	 * determine whether the interrupt belongs to us.
1421	 */
1422	if (adapter->hw.mac.type >= e1000_82571 &&
1423	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1424		return FILTER_STRAY;
1425
1426	em_disable_intr(adapter);
1427	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1428
1429	/* Link status change */
1430	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1431		adapter->hw.mac.get_link_status = 1;
1432		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1433	}
1434
1435	if (reg_icr & E1000_ICR_RXO)
1436		adapter->rx_overruns++;
1437	return FILTER_HANDLED;
1438}
1439
1440/* Combined RX/TX handler, used by Legacy and MSI */
1441static void
1442em_handle_que(void *context, int pending)
1443{
1444	struct adapter	*adapter = context;
1445	struct ifnet	*ifp = adapter->ifp;
1446	struct tx_ring	*txr = adapter->tx_rings;
1447	struct rx_ring	*rxr = adapter->rx_rings;
1448	bool		more_rx;
1449
1450
1451	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1452		EM_RX_LOCK(rxr);
1453		more_rx = em_rxeof(rxr, adapter->rx_process_limit);
1454		EM_RX_UNLOCK(rxr);
1455
1456		EM_TX_LOCK(txr);
1457		em_txeof(txr);
1458#ifdef EM_MULTIQUEUE
1459		if (!drbr_empty(ifp, txr->br))
1460			em_mq_start_locked(ifp, txr, NULL);
1461#else
1462		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1463			em_start_locked(ifp, txr);
1464#endif
1465		EM_TX_UNLOCK(txr);
1466		if (more_rx) {
1467			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1468			return;
1469		}
1470	}
1471
1472	em_enable_intr(adapter);
1473	return;
1474}
1475
1476
1477/*********************************************************************
1478 *
1479 *  MSIX Interrupt Service Routines
1480 *
1481 **********************************************************************/
1482static void
1483em_msix_tx(void *arg)
1484{
1485	struct tx_ring *txr = arg;
1486	struct adapter *adapter = txr->adapter;
1487	bool		more;
1488
1489	++txr->tx_irq;
1490	EM_TX_LOCK(txr);
1491	more = em_txeof(txr);
1492	EM_TX_UNLOCK(txr);
1493	if (more)
1494		taskqueue_enqueue(txr->tq, &txr->tx_task);
1495	else
1496		/* Reenable this interrupt */
1497		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1498	return;
1499}
1500
1501/*********************************************************************
1502 *
1503 *  MSIX RX Interrupt Service routine
1504 *
1505 **********************************************************************/
1506
1507static void
1508em_msix_rx(void *arg)
1509{
1510	struct rx_ring	*rxr = arg;
1511	struct adapter	*adapter = rxr->adapter;
1512	bool		more;
1513
1514	EM_RX_LOCK(rxr);
1515	++rxr->rx_irq;
1516	more = em_rxeof(rxr, adapter->rx_process_limit);
1517	EM_RX_UNLOCK(rxr);
1518	if (more)
1519		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1520	else
1521		/* Reenable this interrupt */
1522		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1523	return;
1524}
1525
1526/*********************************************************************
1527 *
1528 *  MSIX Link Fast Interrupt Service routine
1529 *
1530 **********************************************************************/
1531static void
1532em_msix_link(void *arg)
1533{
1534	struct adapter	*adapter = arg;
1535	u32		reg_icr;
1536
1537	++adapter->link_irq;
1538	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539
1540	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1541		adapter->hw.mac.get_link_status = 1;
1542		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1543	} else
1544		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1545		    EM_MSIX_LINK | E1000_IMS_LSC);
1546	return;
1547}
1548
1549static void
1550em_handle_rx(void *context, int pending)
1551{
1552	struct rx_ring	*rxr = context;
1553	struct adapter	*adapter = rxr->adapter;
1554        bool            more;
1555
1556	EM_RX_LOCK(rxr);
1557	more = em_rxeof(rxr, adapter->rx_process_limit);
1558	EM_RX_UNLOCK(rxr);
1559	if (more)
1560		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1561	else
1562		/* Reenable this interrupt */
1563		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1564}
1565
1566static void
1567em_handle_tx(void *context, int pending)
1568{
1569	struct tx_ring	*txr = context;
1570	struct adapter	*adapter = txr->adapter;
1571	struct ifnet	*ifp = adapter->ifp;
1572
1573	if (!EM_TX_TRYLOCK(txr))
1574		return;
1575
1576	em_txeof(txr);
1577
1578#ifdef EM_MULTIQUEUE
1579	if (!drbr_empty(ifp, txr->br))
1580		em_mq_start_locked(ifp, txr, NULL);
1581#else
1582	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1583		em_start_locked(ifp, txr);
1584#endif
1585	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1586	EM_TX_UNLOCK(txr);
1587}
1588
1589static void
1590em_handle_link(void *context, int pending)
1591{
1592	struct adapter	*adapter = context;
1593	struct ifnet *ifp = adapter->ifp;
1594
1595	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1596		return;
1597
1598	EM_CORE_LOCK(adapter);
1599	callout_stop(&adapter->timer);
1600	em_update_link_status(adapter);
1601	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1602	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1603	    EM_MSIX_LINK | E1000_IMS_LSC);
1604	EM_CORE_UNLOCK(adapter);
1605}
1606
1607
1608/*********************************************************************
1609 *
1610 *  Media Ioctl callback
1611 *
1612 *  This routine is called whenever the user queries the status of
1613 *  the interface using ifconfig.
1614 *
1615 **********************************************************************/
1616static void
1617em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1618{
1619	struct adapter *adapter = ifp->if_softc;
1620	u_char fiber_type = IFM_1000_SX;
1621
1622	INIT_DEBUGOUT("em_media_status: begin");
1623
1624	EM_CORE_LOCK(adapter);
1625	em_update_link_status(adapter);
1626
1627	ifmr->ifm_status = IFM_AVALID;
1628	ifmr->ifm_active = IFM_ETHER;
1629
1630	if (!adapter->link_active) {
1631		EM_CORE_UNLOCK(adapter);
1632		return;
1633	}
1634
1635	ifmr->ifm_status |= IFM_ACTIVE;
1636
1637	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1638	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1639		ifmr->ifm_active |= fiber_type | IFM_FDX;
1640	} else {
1641		switch (adapter->link_speed) {
1642		case 10:
1643			ifmr->ifm_active |= IFM_10_T;
1644			break;
1645		case 100:
1646			ifmr->ifm_active |= IFM_100_TX;
1647			break;
1648		case 1000:
1649			ifmr->ifm_active |= IFM_1000_T;
1650			break;
1651		}
1652		if (adapter->link_duplex == FULL_DUPLEX)
1653			ifmr->ifm_active |= IFM_FDX;
1654		else
1655			ifmr->ifm_active |= IFM_HDX;
1656	}
1657	EM_CORE_UNLOCK(adapter);
1658}
1659
1660/*********************************************************************
1661 *
1662 *  Media Ioctl callback
1663 *
1664 *  This routine is called when the user changes speed/duplex using
1665 *  media/mediopt option with ifconfig.
1666 *
1667 **********************************************************************/
1668static int
1669em_media_change(struct ifnet *ifp)
1670{
1671	struct adapter *adapter = ifp->if_softc;
1672	struct ifmedia  *ifm = &adapter->media;
1673
1674	INIT_DEBUGOUT("em_media_change: begin");
1675
1676	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1677		return (EINVAL);
1678
1679	EM_CORE_LOCK(adapter);
1680	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1681	case IFM_AUTO:
1682		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1683		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1684		break;
1685	case IFM_1000_LX:
1686	case IFM_1000_SX:
1687	case IFM_1000_T:
1688		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1689		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1690		break;
1691	case IFM_100_TX:
1692		adapter->hw.mac.autoneg = FALSE;
1693		adapter->hw.phy.autoneg_advertised = 0;
1694		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1695			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1696		else
1697			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1698		break;
1699	case IFM_10_T:
1700		adapter->hw.mac.autoneg = FALSE;
1701		adapter->hw.phy.autoneg_advertised = 0;
1702		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1703			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1704		else
1705			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1706		break;
1707	default:
1708		device_printf(adapter->dev, "Unsupported media type\n");
1709	}
1710
1711	/* As the speed/duplex settings my have changed we need to
1712	 * reset the PHY.
1713	 */
1714	adapter->hw.phy.reset_disable = FALSE;
1715
1716	em_init_locked(adapter);
1717	EM_CORE_UNLOCK(adapter);
1718
1719	return (0);
1720}
1721
1722/*********************************************************************
1723 *
1724 *  This routine maps the mbufs to tx descriptors.
1725 *
1726 *  return 0 on success, positive on failure
1727 **********************************************************************/
1728
1729static int
1730em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1731{
1732	struct adapter		*adapter = txr->adapter;
1733	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1734	bus_dmamap_t		map;
1735	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1736	struct e1000_tx_desc	*ctxd = NULL;
1737	struct mbuf		*m_head;
1738	u32			txd_upper, txd_lower, txd_used, txd_saved;
1739	int			nsegs, i, j, first, last = 0;
1740	int			error, do_tso, tso_desc = 0;
1741
1742	m_head = *m_headp;
1743	txd_upper = txd_lower = txd_used = txd_saved = 0;
1744	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1745
1746	/*
1747	 * TSO workaround:
1748	 *  If an mbuf is only header we need
1749	 *     to pull 4 bytes of data into it.
1750	 */
1751	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1752		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1753		*m_headp = m_head;
1754		if (m_head == NULL)
1755			return (ENOBUFS);
1756	}
1757
1758	/*
1759	 * Map the packet for DMA
1760	 *
1761	 * Capture the first descriptor index,
1762	 * this descriptor will have the index
1763	 * of the EOP which is the only one that
1764	 * now gets a DONE bit writeback.
1765	 */
1766	first = txr->next_avail_desc;
1767	tx_buffer = &txr->tx_buffers[first];
1768	tx_buffer_mapped = tx_buffer;
1769	map = tx_buffer->map;
1770
1771	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1772	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1773
1774	/*
1775	 * There are two types of errors we can (try) to handle:
1776	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1777	 *   out of segments.  Defragment the mbuf chain and try again.
1778	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1779	 *   at this point in time.  Defer sending and try again later.
1780	 * All other errors, in particular EINVAL, are fatal and prevent the
1781	 * mbuf chain from ever going through.  Drop it and report error.
1782	 */
1783	if (error == EFBIG) {
1784		struct mbuf *m;
1785
1786		m = m_defrag(*m_headp, M_DONTWAIT);
1787		if (m == NULL) {
1788			adapter->mbuf_alloc_failed++;
1789			m_freem(*m_headp);
1790			*m_headp = NULL;
1791			return (ENOBUFS);
1792		}
1793		*m_headp = m;
1794
1795		/* Try it again */
1796		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1797		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1798
1799		if (error) {
1800			adapter->no_tx_dma_setup++;
1801			m_freem(*m_headp);
1802			*m_headp = NULL;
1803			return (error);
1804		}
1805	} else if (error != 0) {
1806		adapter->no_tx_dma_setup++;
1807		return (error);
1808	}
1809
1810	/*
1811	 * TSO Hardware workaround, if this packet is not
1812	 * TSO, and is only a single descriptor long, and
1813	 * it follows a TSO burst, then we need to add a
1814	 * sentinel descriptor to prevent premature writeback.
1815	 */
1816	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1817		if (nsegs == 1)
1818			tso_desc = TRUE;
1819		txr->tx_tso = FALSE;
1820	}
1821
1822        if (nsegs > (txr->tx_avail - 2)) {
1823                txr->no_desc_avail++;
1824		bus_dmamap_unload(txr->txtag, map);
1825		return (ENOBUFS);
1826        }
1827	m_head = *m_headp;
1828
1829	/* Do hardware assists */
1830#if __FreeBSD_version >= 700000
1831	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1832		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1833		if (error != TRUE)
1834			return (ENXIO); /* something foobar */
1835		/* we need to make a final sentinel transmit desc */
1836		tso_desc = TRUE;
1837	} else
1838#endif
1839	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1840		em_transmit_checksum_setup(txr,  m_head,
1841		    &txd_upper, &txd_lower);
1842
1843	i = txr->next_avail_desc;
1844
1845	/* Set up our transmit descriptors */
1846	for (j = 0; j < nsegs; j++) {
1847		bus_size_t seg_len;
1848		bus_addr_t seg_addr;
1849
1850		tx_buffer = &txr->tx_buffers[i];
1851		ctxd = &txr->tx_base[i];
1852		seg_addr = segs[j].ds_addr;
1853		seg_len  = segs[j].ds_len;
1854		/*
1855		** TSO Workaround:
1856		** If this is the last descriptor, we want to
1857		** split it so we have a small final sentinel
1858		*/
1859		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1860			seg_len -= 4;
1861			ctxd->buffer_addr = htole64(seg_addr);
1862			ctxd->lower.data = htole32(
1863			adapter->txd_cmd | txd_lower | seg_len);
1864			ctxd->upper.data =
1865			    htole32(txd_upper);
1866			if (++i == adapter->num_tx_desc)
1867				i = 0;
1868			/* Now make the sentinel */
1869			++txd_used; /* using an extra txd */
1870			ctxd = &txr->tx_base[i];
1871			tx_buffer = &txr->tx_buffers[i];
1872			ctxd->buffer_addr =
1873			    htole64(seg_addr + seg_len);
1874			ctxd->lower.data = htole32(
1875			adapter->txd_cmd | txd_lower | 4);
1876			ctxd->upper.data =
1877			    htole32(txd_upper);
1878			last = i;
1879			if (++i == adapter->num_tx_desc)
1880				i = 0;
1881		} else {
1882			ctxd->buffer_addr = htole64(seg_addr);
1883			ctxd->lower.data = htole32(
1884			adapter->txd_cmd | txd_lower | seg_len);
1885			ctxd->upper.data =
1886			    htole32(txd_upper);
1887			last = i;
1888			if (++i == adapter->num_tx_desc)
1889				i = 0;
1890		}
1891		tx_buffer->m_head = NULL;
1892		tx_buffer->next_eop = -1;
1893	}
1894
1895	txr->next_avail_desc = i;
1896	txr->tx_avail -= nsegs;
1897	if (tso_desc) /* TSO used an extra for sentinel */
1898		txr->tx_avail -= txd_used;
1899
1900	if (m_head->m_flags & M_VLANTAG) {
1901		/* Set the vlan id. */
1902		ctxd->upper.fields.special =
1903		    htole16(m_head->m_pkthdr.ether_vtag);
1904                /* Tell hardware to add tag */
1905                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1906        }
1907
1908        tx_buffer->m_head = m_head;
1909	tx_buffer_mapped->map = tx_buffer->map;
1910	tx_buffer->map = map;
1911        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1912
1913        /*
1914         * Last Descriptor of Packet
1915	 * needs End Of Packet (EOP)
1916	 * and Report Status (RS)
1917         */
1918        ctxd->lower.data |=
1919	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1920	/*
1921	 * Keep track in the first buffer which
1922	 * descriptor will be written back
1923	 */
1924	tx_buffer = &txr->tx_buffers[first];
1925	tx_buffer->next_eop = last;
1926
1927	/*
1928	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1929	 * that this frame is available to transmit.
1930	 */
1931	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1932	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1933	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1934
1935	return (0);
1936}
1937
1938static void
1939em_set_promisc(struct adapter *adapter)
1940{
1941	struct ifnet	*ifp = adapter->ifp;
1942	u32		reg_rctl;
1943
1944	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1945
1946	if (ifp->if_flags & IFF_PROMISC) {
1947		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1948		/* Turn this on if you want to see bad packets */
1949		if (em_debug_sbp)
1950			reg_rctl |= E1000_RCTL_SBP;
1951		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1952	} else if (ifp->if_flags & IFF_ALLMULTI) {
1953		reg_rctl |= E1000_RCTL_MPE;
1954		reg_rctl &= ~E1000_RCTL_UPE;
1955		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1956	}
1957}
1958
1959static void
1960em_disable_promisc(struct adapter *adapter)
1961{
1962	u32	reg_rctl;
1963
1964	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1965
1966	reg_rctl &=  (~E1000_RCTL_UPE);
1967	reg_rctl &=  (~E1000_RCTL_MPE);
1968	reg_rctl &=  (~E1000_RCTL_SBP);
1969	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1970}
1971
1972
1973/*********************************************************************
1974 *  Multicast Update
1975 *
1976 *  This routine is called whenever multicast address list is updated.
1977 *
1978 **********************************************************************/
1979
1980static void
1981em_set_multi(struct adapter *adapter)
1982{
1983	struct ifnet	*ifp = adapter->ifp;
1984	struct ifmultiaddr *ifma;
1985	u32 reg_rctl = 0;
1986	u8  *mta; /* Multicast array memory */
1987	int mcnt = 0;
1988
1989	IOCTL_DEBUGOUT("em_set_multi: begin");
1990
1991	if (adapter->hw.mac.type == e1000_82542 &&
1992	    adapter->hw.revision_id == E1000_REVISION_2) {
1993		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1994		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1995			e1000_pci_clear_mwi(&adapter->hw);
1996		reg_rctl |= E1000_RCTL_RST;
1997		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1998		msec_delay(5);
1999	}
2000
2001	/* Allocate temporary memory to setup array */
2002	mta = malloc(sizeof(u8) *
2003	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2004	    M_DEVBUF, M_NOWAIT | M_ZERO);
2005	if (mta == NULL)
2006		panic("em_set_multi memory failure\n");
2007
2008#if __FreeBSD_version < 800000
2009	IF_ADDR_LOCK(ifp);
2010#else
2011	if_maddr_rlock(ifp);
2012#endif
2013	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2014		if (ifma->ifma_addr->sa_family != AF_LINK)
2015			continue;
2016
2017		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2018			break;
2019
2020		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2021		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2022		mcnt++;
2023	}
2024#if __FreeBSD_version < 800000
2025	IF_ADDR_UNLOCK(ifp);
2026#else
2027	if_maddr_runlock(ifp);
2028#endif
2029	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2030		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2031		reg_rctl |= E1000_RCTL_MPE;
2032		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2033	} else
2034		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2035
2036	if (adapter->hw.mac.type == e1000_82542 &&
2037	    adapter->hw.revision_id == E1000_REVISION_2) {
2038		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2039		reg_rctl &= ~E1000_RCTL_RST;
2040		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2041		msec_delay(5);
2042		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2043			e1000_pci_set_mwi(&adapter->hw);
2044	}
2045	free(mta, M_DEVBUF);
2046}
2047
2048
2049/*********************************************************************
2050 *  Timer routine
2051 *
2052 *  This routine checks for link status and updates statistics.
2053 *
2054 **********************************************************************/
2055
2056static void
2057em_local_timer(void *arg)
2058{
2059	struct adapter	*adapter = arg;
2060	struct ifnet	*ifp = adapter->ifp;
2061	struct tx_ring	*txr = adapter->tx_rings;
2062
2063	EM_CORE_LOCK_ASSERT(adapter);
2064
2065	em_update_link_status(adapter);
2066	em_update_stats_counters(adapter);
2067
2068	/* Reset LAA into RAR[0] on 82571 */
2069	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2070		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2071
2072	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2073		em_print_hw_stats(adapter);
2074
2075	/*
2076	** Check for time since any descriptor was cleaned
2077	*/
2078	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2079		EM_TX_LOCK(txr);
2080		if (txr->watchdog_check == FALSE) {
2081			EM_TX_UNLOCK(txr);
2082			continue;
2083		}
2084		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2085			goto hung;
2086		EM_TX_UNLOCK(txr);
2087	}
2088
2089	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2090	return;
2091hung:
2092	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2093	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2094	adapter->watchdog_events++;
2095	EM_TX_UNLOCK(txr);
2096	em_init_locked(adapter);
2097}
2098
2099
2100static void
2101em_update_link_status(struct adapter *adapter)
2102{
2103	struct e1000_hw *hw = &adapter->hw;
2104	struct ifnet *ifp = adapter->ifp;
2105	device_t dev = adapter->dev;
2106	u32 link_check = 0;
2107
2108	/* Get the cached link value or read phy for real */
2109	switch (hw->phy.media_type) {
2110	case e1000_media_type_copper:
2111		if (hw->mac.get_link_status) {
2112			/* Do the work to read phy */
2113			e1000_check_for_link(hw);
2114			link_check = !hw->mac.get_link_status;
2115			if (link_check) /* ESB2 fix */
2116				e1000_cfg_on_link_up(hw);
2117		} else
2118			link_check = TRUE;
2119		break;
2120	case e1000_media_type_fiber:
2121		e1000_check_for_link(hw);
2122		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2123                                 E1000_STATUS_LU);
2124		break;
2125	case e1000_media_type_internal_serdes:
2126		e1000_check_for_link(hw);
2127		link_check = adapter->hw.mac.serdes_has_link;
2128		break;
2129	default:
2130	case e1000_media_type_unknown:
2131		break;
2132	}
2133
2134	/* Now check for a transition */
2135	if (link_check && (adapter->link_active == 0)) {
2136		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2137		    &adapter->link_duplex);
2138		/* Check if we must disable SPEED_MODE bit on PCI-E */
2139		if ((adapter->link_speed != SPEED_1000) &&
2140		    ((hw->mac.type == e1000_82571) ||
2141		    (hw->mac.type == e1000_82572))) {
2142			int tarc0;
2143			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2144			tarc0 &= ~SPEED_MODE_BIT;
2145			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2146		}
2147		if (bootverbose)
2148			device_printf(dev, "Link is up %d Mbps %s\n",
2149			    adapter->link_speed,
2150			    ((adapter->link_duplex == FULL_DUPLEX) ?
2151			    "Full Duplex" : "Half Duplex"));
2152		adapter->link_active = 1;
2153		adapter->smartspeed = 0;
2154		ifp->if_baudrate = adapter->link_speed * 1000000;
2155		if_link_state_change(ifp, LINK_STATE_UP);
2156	} else if (!link_check && (adapter->link_active == 1)) {
2157		ifp->if_baudrate = adapter->link_speed = 0;
2158		adapter->link_duplex = 0;
2159		if (bootverbose)
2160			device_printf(dev, "Link is Down\n");
2161		adapter->link_active = 0;
2162		/* Link down, disable watchdog */
2163		// JFV change later
2164		//adapter->watchdog_check = FALSE;
2165		if_link_state_change(ifp, LINK_STATE_DOWN);
2166	}
2167}
2168
2169/*********************************************************************
2170 *
2171 *  This routine disables all traffic on the adapter by issuing a
2172 *  global reset on the MAC and deallocates TX/RX buffers.
2173 *
2174 *  This routine should always be called with BOTH the CORE
2175 *  and TX locks.
2176 **********************************************************************/
2177
2178static void
2179em_stop(void *arg)
2180{
2181	struct adapter	*adapter = arg;
2182	struct ifnet	*ifp = adapter->ifp;
2183	struct tx_ring	*txr = adapter->tx_rings;
2184
2185	EM_CORE_LOCK_ASSERT(adapter);
2186
2187	INIT_DEBUGOUT("em_stop: begin");
2188
2189	em_disable_intr(adapter);
2190	callout_stop(&adapter->timer);
2191
2192	/* Tell the stack that the interface is no longer active */
2193	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2194
2195        /* Unarm watchdog timer. */
2196	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2197		EM_TX_LOCK(txr);
2198		txr->watchdog_check = FALSE;
2199		EM_TX_UNLOCK(txr);
2200	}
2201
2202	e1000_reset_hw(&adapter->hw);
2203	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2204
2205	e1000_led_off(&adapter->hw);
2206	e1000_cleanup_led(&adapter->hw);
2207}
2208
2209
2210/*********************************************************************
2211 *
2212 *  Determine hardware revision.
2213 *
2214 **********************************************************************/
2215static void
2216em_identify_hardware(struct adapter *adapter)
2217{
2218	device_t dev = adapter->dev;
2219
2220	/* Make sure our PCI config space has the necessary stuff set */
2221	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2222	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2223	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2224		device_printf(dev, "Memory Access and/or Bus Master bits "
2225		    "were not set!\n");
2226		adapter->hw.bus.pci_cmd_word |=
2227		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2228		pci_write_config(dev, PCIR_COMMAND,
2229		    adapter->hw.bus.pci_cmd_word, 2);
2230	}
2231
2232	/* Save off the information about this board */
2233	adapter->hw.vendor_id = pci_get_vendor(dev);
2234	adapter->hw.device_id = pci_get_device(dev);
2235	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2236	adapter->hw.subsystem_vendor_id =
2237	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2238	adapter->hw.subsystem_device_id =
2239	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2240
2241	/* Do Shared Code Init and Setup */
2242	if (e1000_set_mac_type(&adapter->hw)) {
2243		device_printf(dev, "Setup init failure\n");
2244		return;
2245	}
2246}
2247
2248static int
2249em_allocate_pci_resources(struct adapter *adapter)
2250{
2251	device_t	dev = adapter->dev;
2252	int		rid;
2253
2254	rid = PCIR_BAR(0);
2255	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2256	    &rid, RF_ACTIVE);
2257	if (adapter->memory == NULL) {
2258		device_printf(dev, "Unable to allocate bus resource: memory\n");
2259		return (ENXIO);
2260	}
2261	adapter->osdep.mem_bus_space_tag =
2262	    rman_get_bustag(adapter->memory);
2263	adapter->osdep.mem_bus_space_handle =
2264	    rman_get_bushandle(adapter->memory);
2265	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2266
2267	/* Default to a single queue */
2268	adapter->num_queues = 1;
2269
2270	/*
2271	 * Setup MSI/X or MSI if PCI Express
2272	 */
2273	adapter->msix = em_setup_msix(adapter);
2274
2275	adapter->hw.back = &adapter->osdep;
2276
2277	return (0);
2278}
2279
2280/*********************************************************************
2281 *
2282 *  Setup the Legacy or MSI Interrupt handler
2283 *
2284 **********************************************************************/
2285int
2286em_allocate_legacy(struct adapter *adapter)
2287{
2288	device_t dev = adapter->dev;
2289	int error, rid = 0;
2290
2291	/* Manually turn off all interrupts */
2292	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2293
2294	if (adapter->msix == 1) /* using MSI */
2295		rid = 1;
2296	/* We allocate a single interrupt resource */
2297	adapter->res = bus_alloc_resource_any(dev,
2298	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2299	if (adapter->res == NULL) {
2300		device_printf(dev, "Unable to allocate bus resource: "
2301		    "interrupt\n");
2302		return (ENXIO);
2303	}
2304
2305	/*
2306	 * Allocate a fast interrupt and the associated
2307	 * deferred processing contexts.
2308	 */
2309	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2310	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2311	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2312	    taskqueue_thread_enqueue, &adapter->tq);
2313	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2314	    device_get_nameunit(adapter->dev));
2315	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2316	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2317		device_printf(dev, "Failed to register fast interrupt "
2318			    "handler: %d\n", error);
2319		taskqueue_free(adapter->tq);
2320		adapter->tq = NULL;
2321		return (error);
2322	}
2323
2324	return (0);
2325}
2326
2327/*********************************************************************
2328 *
2329 *  Setup the MSIX Interrupt handlers
2330 *   This is not really Multiqueue, rather
2331 *   its just multiple interrupt vectors.
2332 *
2333 **********************************************************************/
2334int
2335em_allocate_msix(struct adapter *adapter)
2336{
2337	device_t	dev = adapter->dev;
2338	struct		tx_ring *txr = adapter->tx_rings;
2339	struct		rx_ring *rxr = adapter->rx_rings;
2340	int		error, rid, vector = 0;
2341
2342
2343	/* Make sure all interrupts are disabled */
2344	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2345
2346	/* First set up ring resources */
2347	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2348
2349		/* RX ring */
2350		rid = vector + 1;
2351
2352		rxr->res = bus_alloc_resource_any(dev,
2353		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2354		if (rxr->res == NULL) {
2355			device_printf(dev,
2356			    "Unable to allocate bus resource: "
2357			    "RX MSIX Interrupt %d\n", i);
2358			return (ENXIO);
2359		}
2360		if ((error = bus_setup_intr(dev, rxr->res,
2361		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2362		    rxr, &rxr->tag)) != 0) {
2363			device_printf(dev, "Failed to register RX handler");
2364			return (error);
2365		}
2366		rxr->msix = vector++; /* NOTE increment vector for TX */
2367		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2368		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2369		    taskqueue_thread_enqueue, &rxr->tq);
2370		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2371		    device_get_nameunit(adapter->dev));
2372		/*
2373		** Set the bit to enable interrupt
2374		** in E1000_IMS -- bits 20 and 21
2375		** are for RX0 and RX1, note this has
2376		** NOTHING to do with the MSIX vector
2377		*/
2378		rxr->ims = 1 << (20 + i);
2379		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2380
2381		/* TX ring */
2382		rid = vector + 1;
2383		txr->res = bus_alloc_resource_any(dev,
2384		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2385		if (txr->res == NULL) {
2386			device_printf(dev,
2387			    "Unable to allocate bus resource: "
2388			    "TX MSIX Interrupt %d\n", i);
2389			return (ENXIO);
2390		}
2391		if ((error = bus_setup_intr(dev, txr->res,
2392		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2393		    txr, &txr->tag)) != 0) {
2394			device_printf(dev, "Failed to register TX handler");
2395			return (error);
2396		}
2397		txr->msix = vector++; /* Increment vector for next pass */
2398		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2399		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2400		    taskqueue_thread_enqueue, &txr->tq);
2401		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2402		    device_get_nameunit(adapter->dev));
2403		/*
2404		** Set the bit to enable interrupt
2405		** in E1000_IMS -- bits 22 and 23
2406		** are for TX0 and TX1, note this has
2407		** NOTHING to do with the MSIX vector
2408		*/
2409		txr->ims = 1 << (22 + i);
2410		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2411	}
2412
2413	/* Link interrupt */
2414	++rid;
2415	adapter->res = bus_alloc_resource_any(dev,
2416	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2417	if (!adapter->res) {
2418		device_printf(dev,"Unable to allocate "
2419		    "bus resource: Link interrupt [%d]\n", rid);
2420		return (ENXIO);
2421        }
2422	/* Set the link handler function */
2423	error = bus_setup_intr(dev, adapter->res,
2424	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2425	    em_msix_link, adapter, &adapter->tag);
2426	if (error) {
2427		adapter->res = NULL;
2428		device_printf(dev, "Failed to register LINK handler");
2429		return (error);
2430	}
2431	adapter->linkvec = vector;
2432	adapter->ivars |=  (8 | vector) << 16;
2433	adapter->ivars |= 0x80000000;
2434	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2435	adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT,
2436	    taskqueue_thread_enqueue, &adapter->tq);
2437	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2438	    device_get_nameunit(adapter->dev));
2439
2440	return (0);
2441}
2442
2443
2444static void
2445em_free_pci_resources(struct adapter *adapter)
2446{
2447	device_t	dev = adapter->dev;
2448	struct tx_ring	*txr;
2449	struct rx_ring	*rxr;
2450	int		rid;
2451
2452
2453	/*
2454	** Release all the queue interrupt resources:
2455	*/
2456	for (int i = 0; i < adapter->num_queues; i++) {
2457		txr = &adapter->tx_rings[i];
2458		rxr = &adapter->rx_rings[i];
2459		rid = txr->msix +1;
2460		if (txr->tag != NULL) {
2461			bus_teardown_intr(dev, txr->res, txr->tag);
2462			txr->tag = NULL;
2463		}
2464		if (txr->res != NULL)
2465			bus_release_resource(dev, SYS_RES_IRQ,
2466			    rid, txr->res);
2467		rid = rxr->msix +1;
2468		if (rxr->tag != NULL) {
2469			bus_teardown_intr(dev, rxr->res, rxr->tag);
2470			rxr->tag = NULL;
2471		}
2472		if (rxr->res != NULL)
2473			bus_release_resource(dev, SYS_RES_IRQ,
2474			    rid, rxr->res);
2475	}
2476
2477        if (adapter->linkvec) /* we are doing MSIX */
2478                rid = adapter->linkvec + 1;
2479        else
2480                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2481
2482	if (adapter->tag != NULL) {
2483		bus_teardown_intr(dev, adapter->res, adapter->tag);
2484		adapter->tag = NULL;
2485	}
2486
2487	if (adapter->res != NULL)
2488		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2489
2490
2491	if (adapter->msix)
2492		pci_release_msi(dev);
2493
2494	if (adapter->msix_mem != NULL)
2495		bus_release_resource(dev, SYS_RES_MEMORY,
2496		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2497
2498	if (adapter->memory != NULL)
2499		bus_release_resource(dev, SYS_RES_MEMORY,
2500		    PCIR_BAR(0), adapter->memory);
2501
2502	if (adapter->flash != NULL)
2503		bus_release_resource(dev, SYS_RES_MEMORY,
2504		    EM_FLASH, adapter->flash);
2505}
2506
2507/*
2508 * Setup MSI or MSI/X
2509 */
2510static int
2511em_setup_msix(struct adapter *adapter)
2512{
2513	device_t dev = adapter->dev;
2514	int val = 0;
2515
2516
2517	/* Setup MSI/X for Hartwell */
2518	if ((adapter->hw.mac.type == e1000_82574) &&
2519	    (em_enable_msix == TRUE)) {
2520		/* Map the MSIX BAR */
2521		int rid = PCIR_BAR(EM_MSIX_BAR);
2522		adapter->msix_mem = bus_alloc_resource_any(dev,
2523		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2524       		if (!adapter->msix_mem) {
2525			/* May not be enabled */
2526               		device_printf(adapter->dev,
2527			    "Unable to map MSIX table \n");
2528			goto msi;
2529       		}
2530		val = pci_msix_count(dev);
2531		if (val != 5) {
2532			bus_release_resource(dev, SYS_RES_MEMORY,
2533			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2534			adapter->msix_mem = NULL;
2535               		device_printf(adapter->dev,
2536			    "MSIX vectors wrong, using MSI \n");
2537			goto msi;
2538		}
2539		if (em_msix_queues == 2) {
2540			val = 5;
2541			adapter->num_queues = 2;
2542		} else {
2543			val = 3;
2544			adapter->num_queues = 1;
2545		}
2546		if (pci_alloc_msix(dev, &val) == 0) {
2547			device_printf(adapter->dev,
2548			    "Using MSIX interrupts "
2549			    "with %d vectors\n", val);
2550		}
2551
2552		return (val);
2553	}
2554msi:
2555       	val = pci_msi_count(dev);
2556       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2557               	adapter->msix = 1;
2558               	device_printf(adapter->dev,"Using MSI interrupt\n");
2559		return (val);
2560	}
2561	/* Should only happen due to manual invention */
2562	device_printf(adapter->dev,"Setup MSIX failure\n");
2563	return (0);
2564}
2565
2566
2567/*********************************************************************
2568 *
2569 *  Initialize the hardware to a configuration
2570 *  as specified by the adapter structure.
2571 *
2572 **********************************************************************/
2573static void
2574em_reset(struct adapter *adapter)
2575{
2576	device_t	dev = adapter->dev;
2577	struct e1000_hw	*hw = &adapter->hw;
2578	u16		rx_buffer_size;
2579
2580	INIT_DEBUGOUT("em_reset: begin");
2581
2582	/* Set up smart power down as default off on newer adapters. */
2583	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2584	    hw->mac.type == e1000_82572)) {
2585		u16 phy_tmp = 0;
2586
2587		/* Speed up time to link by disabling smart power down. */
2588		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2589		phy_tmp &= ~IGP02E1000_PM_SPD;
2590		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2591	}
2592
2593	/*
2594	 * These parameters control the automatic generation (Tx) and
2595	 * response (Rx) to Ethernet PAUSE frames.
2596	 * - High water mark should allow for at least two frames to be
2597	 *   received after sending an XOFF.
2598	 * - Low water mark works best when it is very near the high water mark.
2599	 *   This allows the receiver to restart by sending XON when it has
2600	 *   drained a bit. Here we use an arbitary value of 1500 which will
2601	 *   restart after one full frame is pulled from the buffer. There
2602	 *   could be several smaller frames in the buffer and if so they will
2603	 *   not trigger the XON until their total number reduces the buffer
2604	 *   by 1500.
2605	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2606	 */
2607	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2608
2609	hw->fc.high_water = rx_buffer_size -
2610	    roundup2(adapter->max_frame_size, 1024);
2611	hw->fc.low_water = hw->fc.high_water - 1500;
2612
2613	if (hw->mac.type == e1000_80003es2lan)
2614		hw->fc.pause_time = 0xFFFF;
2615	else
2616		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2617
2618	hw->fc.send_xon = TRUE;
2619
2620        /* Set Flow control, use the tunable location if sane */
2621        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2622		hw->fc.requested_mode = em_fc_setting;
2623	else
2624		hw->fc.requested_mode = e1000_fc_none;
2625
2626	/* Override - workaround for PCHLAN issue */
2627	if (hw->mac.type == e1000_pchlan)
2628                hw->fc.requested_mode = e1000_fc_rx_pause;
2629
2630	/* Issue a global reset */
2631	e1000_reset_hw(hw);
2632	E1000_WRITE_REG(hw, E1000_WUC, 0);
2633
2634	if (e1000_init_hw(hw) < 0) {
2635		device_printf(dev, "Hardware Initialization Failed\n");
2636		return;
2637	}
2638
2639	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2640	e1000_get_phy_info(hw);
2641	e1000_check_for_link(hw);
2642	return;
2643}
2644
2645/*********************************************************************
2646 *
2647 *  Setup networking device structure and register an interface.
2648 *
2649 **********************************************************************/
2650static void
2651em_setup_interface(device_t dev, struct adapter *adapter)
2652{
2653	struct ifnet   *ifp;
2654
2655	INIT_DEBUGOUT("em_setup_interface: begin");
2656
2657	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2658	if (ifp == NULL)
2659		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2660	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2661	ifp->if_mtu = ETHERMTU;
2662	ifp->if_init =  em_init;
2663	ifp->if_softc = adapter;
2664	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2665	ifp->if_ioctl = em_ioctl;
2666	ifp->if_start = em_start;
2667	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2668	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2669	IFQ_SET_READY(&ifp->if_snd);
2670
2671	ether_ifattach(ifp, adapter->hw.mac.addr);
2672
2673	ifp->if_capabilities = ifp->if_capenable = 0;
2674
2675#ifdef EM_MULTIQUEUE
2676	/* Multiqueue tx functions */
2677	ifp->if_transmit = em_mq_start;
2678	ifp->if_qflush = em_qflush;
2679#endif
2680
2681	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2682	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2683
2684	/* Enable TSO by default, can disable with ifconfig */
2685	ifp->if_capabilities |= IFCAP_TSO4;
2686	ifp->if_capenable |= IFCAP_TSO4;
2687
2688	/*
2689	 * Tell the upper layer(s) we
2690	 * support full VLAN capability
2691	 */
2692	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2693	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2694	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2695
2696	/*
2697	** Dont turn this on by default, if vlans are
2698	** created on another pseudo device (eg. lagg)
2699	** then vlan events are not passed thru, breaking
2700	** operation, but with HW FILTER off it works. If
2701	** using vlans directly on the em driver you can
2702	** enable this and get full hardware tag filtering.
2703	*/
2704	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2705
2706#ifdef DEVICE_POLLING
2707	ifp->if_capabilities |= IFCAP_POLLING;
2708#endif
2709
2710	/* Enable All WOL methods by default */
2711	if (adapter->wol) {
2712		ifp->if_capabilities |= IFCAP_WOL;
2713		ifp->if_capenable |= IFCAP_WOL;
2714	}
2715
2716	/*
2717	 * Specify the media types supported by this adapter and register
2718	 * callbacks to update media and link information
2719	 */
2720	ifmedia_init(&adapter->media, IFM_IMASK,
2721	    em_media_change, em_media_status);
2722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724		u_char fiber_type = IFM_1000_SX;	/* default type */
2725
2726		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2727			    0, NULL);
2728		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2729	} else {
2730		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2731		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2732			    0, NULL);
2733		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2734			    0, NULL);
2735		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2736			    0, NULL);
2737		if (adapter->hw.phy.type != e1000_phy_ife) {
2738			ifmedia_add(&adapter->media,
2739				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2740			ifmedia_add(&adapter->media,
2741				IFM_ETHER | IFM_1000_T, 0, NULL);
2742		}
2743	}
2744	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2745	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2746}
2747
2748
2749/*
2750 * Manage DMA'able memory.
2751 */
2752static void
2753em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2754{
2755	if (error)
2756		return;
2757	*(bus_addr_t *) arg = segs[0].ds_addr;
2758}
2759
2760static int
2761em_dma_malloc(struct adapter *adapter, bus_size_t size,
2762        struct em_dma_alloc *dma, int mapflags)
2763{
2764	int error;
2765
2766	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2767				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2768				BUS_SPACE_MAXADDR,	/* lowaddr */
2769				BUS_SPACE_MAXADDR,	/* highaddr */
2770				NULL, NULL,		/* filter, filterarg */
2771				size,			/* maxsize */
2772				1,			/* nsegments */
2773				size,			/* maxsegsize */
2774				0,			/* flags */
2775				NULL,			/* lockfunc */
2776				NULL,			/* lockarg */
2777				&dma->dma_tag);
2778	if (error) {
2779		device_printf(adapter->dev,
2780		    "%s: bus_dma_tag_create failed: %d\n",
2781		    __func__, error);
2782		goto fail_0;
2783	}
2784
2785	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2786	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2787	if (error) {
2788		device_printf(adapter->dev,
2789		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2790		    __func__, (uintmax_t)size, error);
2791		goto fail_2;
2792	}
2793
2794	dma->dma_paddr = 0;
2795	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2796	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2797	if (error || dma->dma_paddr == 0) {
2798		device_printf(adapter->dev,
2799		    "%s: bus_dmamap_load failed: %d\n",
2800		    __func__, error);
2801		goto fail_3;
2802	}
2803
2804	return (0);
2805
2806fail_3:
2807	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2808fail_2:
2809	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2810	bus_dma_tag_destroy(dma->dma_tag);
2811fail_0:
2812	dma->dma_map = NULL;
2813	dma->dma_tag = NULL;
2814
2815	return (error);
2816}
2817
2818static void
2819em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2820{
2821	if (dma->dma_tag == NULL)
2822		return;
2823	if (dma->dma_map != NULL) {
2824		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2825		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2826		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2827		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2828		dma->dma_map = NULL;
2829	}
2830	bus_dma_tag_destroy(dma->dma_tag);
2831	dma->dma_tag = NULL;
2832}
2833
2834
2835/*********************************************************************
2836 *
2837 *  Allocate memory for the transmit and receive rings, and then
2838 *  the descriptors associated with each, called only once at attach.
2839 *
2840 **********************************************************************/
2841static int
2842em_allocate_queues(struct adapter *adapter)
2843{
2844	device_t		dev = adapter->dev;
2845	struct tx_ring		*txr = NULL;
2846	struct rx_ring		*rxr = NULL;
2847	int rsize, tsize, error = E1000_SUCCESS;
2848	int txconf = 0, rxconf = 0;
2849
2850
2851	/* Allocate the TX ring struct memory */
2852	if (!(adapter->tx_rings =
2853	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2854	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2855		device_printf(dev, "Unable to allocate TX ring memory\n");
2856		error = ENOMEM;
2857		goto fail;
2858	}
2859
2860	/* Now allocate the RX */
2861	if (!(adapter->rx_rings =
2862	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2863	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2864		device_printf(dev, "Unable to allocate RX ring memory\n");
2865		error = ENOMEM;
2866		goto rx_fail;
2867	}
2868
2869	tsize = roundup2(adapter->num_tx_desc *
2870	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2871	/*
2872	 * Now set up the TX queues, txconf is needed to handle the
2873	 * possibility that things fail midcourse and we need to
2874	 * undo memory gracefully
2875	 */
2876	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2877		/* Set up some basics */
2878		txr = &adapter->tx_rings[i];
2879		txr->adapter = adapter;
2880		txr->me = i;
2881
2882		/* Initialize the TX lock */
2883		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2884		    device_get_nameunit(dev), txr->me);
2885		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2886
2887		if (em_dma_malloc(adapter, tsize,
2888			&txr->txdma, BUS_DMA_NOWAIT)) {
2889			device_printf(dev,
2890			    "Unable to allocate TX Descriptor memory\n");
2891			error = ENOMEM;
2892			goto err_tx_desc;
2893		}
2894		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2895		bzero((void *)txr->tx_base, tsize);
2896
2897        	if (em_allocate_transmit_buffers(txr)) {
2898			device_printf(dev,
2899			    "Critical Failure setting up transmit buffers\n");
2900			error = ENOMEM;
2901			goto err_tx_desc;
2902        	}
2903#if __FreeBSD_version >= 800000
2904		/* Allocate a buf ring */
2905		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2906		    M_WAITOK, &txr->tx_mtx);
2907#endif
2908	}
2909
2910	/*
2911	 * Next the RX queues...
2912	 */
2913	rsize = roundup2(adapter->num_rx_desc *
2914	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2915	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2916		rxr = &adapter->rx_rings[i];
2917		rxr->adapter = adapter;
2918		rxr->me = i;
2919
2920		/* Initialize the RX lock */
2921		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2922		    device_get_nameunit(dev), txr->me);
2923		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2924
2925		if (em_dma_malloc(adapter, rsize,
2926			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2927			device_printf(dev,
2928			    "Unable to allocate RxDescriptor memory\n");
2929			error = ENOMEM;
2930			goto err_rx_desc;
2931		}
2932		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2933		bzero((void *)rxr->rx_base, rsize);
2934
2935        	/* Allocate receive buffers for the ring*/
2936		if (em_allocate_receive_buffers(rxr)) {
2937			device_printf(dev,
2938			    "Critical Failure setting up receive buffers\n");
2939			error = ENOMEM;
2940			goto err_rx_desc;
2941		}
2942	}
2943
2944	return (0);
2945
2946err_rx_desc:
2947	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2948		em_dma_free(adapter, &rxr->rxdma);
2949err_tx_desc:
2950	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2951		em_dma_free(adapter, &txr->txdma);
2952	free(adapter->rx_rings, M_DEVBUF);
2953rx_fail:
2954	buf_ring_free(txr->br, M_DEVBUF);
2955	free(adapter->tx_rings, M_DEVBUF);
2956fail:
2957	return (error);
2958}
2959
2960
2961/*********************************************************************
2962 *
2963 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2964 *  the information needed to transmit a packet on the wire. This is
2965 *  called only once at attach, setup is done every reset.
2966 *
2967 **********************************************************************/
2968static int
2969em_allocate_transmit_buffers(struct tx_ring *txr)
2970{
2971	struct adapter *adapter = txr->adapter;
2972	device_t dev = adapter->dev;
2973	struct em_buffer *txbuf;
2974	int error, i;
2975
2976	/*
2977	 * Setup DMA descriptor areas.
2978	 */
2979	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2980			       1, 0,			/* alignment, bounds */
2981			       BUS_SPACE_MAXADDR,	/* lowaddr */
2982			       BUS_SPACE_MAXADDR,	/* highaddr */
2983			       NULL, NULL,		/* filter, filterarg */
2984			       EM_TSO_SIZE,		/* maxsize */
2985			       EM_MAX_SCATTER,		/* nsegments */
2986			       PAGE_SIZE,		/* maxsegsize */
2987			       0,			/* flags */
2988			       NULL,			/* lockfunc */
2989			       NULL,			/* lockfuncarg */
2990			       &txr->txtag))) {
2991		device_printf(dev,"Unable to allocate TX DMA tag\n");
2992		goto fail;
2993	}
2994
2995	if (!(txr->tx_buffers =
2996	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2997	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2998		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2999		error = ENOMEM;
3000		goto fail;
3001	}
3002
3003        /* Create the descriptor buffer dma maps */
3004	txbuf = txr->tx_buffers;
3005	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3006		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3007		if (error != 0) {
3008			device_printf(dev, "Unable to create TX DMA map\n");
3009			goto fail;
3010		}
3011	}
3012
3013	return 0;
3014fail:
3015	/* We free all, it handles case where we are in the middle */
3016	em_free_transmit_structures(adapter);
3017	return (error);
3018}
3019
3020/*********************************************************************
3021 *
3022 *  Initialize a transmit ring.
3023 *
3024 **********************************************************************/
3025static void
3026em_setup_transmit_ring(struct tx_ring *txr)
3027{
3028	struct adapter *adapter = txr->adapter;
3029	struct em_buffer *txbuf;
3030	int i;
3031
3032	/* Clear the old descriptor contents */
3033	EM_TX_LOCK(txr);
3034	bzero((void *)txr->tx_base,
3035	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3036	/* Reset indices */
3037	txr->next_avail_desc = 0;
3038	txr->next_to_clean = 0;
3039
3040	/* Free any existing tx buffers. */
3041        txbuf = txr->tx_buffers;
3042	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3043		if (txbuf->m_head != NULL) {
3044			bus_dmamap_sync(txr->txtag, txbuf->map,
3045			    BUS_DMASYNC_POSTWRITE);
3046			bus_dmamap_unload(txr->txtag, txbuf->map);
3047			m_freem(txbuf->m_head);
3048			txbuf->m_head = NULL;
3049		}
3050		/* clear the watch index */
3051		txbuf->next_eop = -1;
3052        }
3053
3054	/* Set number of descriptors available */
3055	txr->tx_avail = adapter->num_tx_desc;
3056
3057	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3058	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3059	EM_TX_UNLOCK(txr);
3060}
3061
3062/*********************************************************************
3063 *
3064 *  Initialize all transmit rings.
3065 *
3066 **********************************************************************/
3067static void
3068em_setup_transmit_structures(struct adapter *adapter)
3069{
3070	struct tx_ring *txr = adapter->tx_rings;
3071
3072	for (int i = 0; i < adapter->num_queues; i++, txr++)
3073		em_setup_transmit_ring(txr);
3074
3075	return;
3076}
3077
3078/*********************************************************************
3079 *
3080 *  Enable transmit unit.
3081 *
3082 **********************************************************************/
3083static void
3084em_initialize_transmit_unit(struct adapter *adapter)
3085{
3086	struct tx_ring	*txr = adapter->tx_rings;
3087	struct e1000_hw	*hw = &adapter->hw;
3088	u32	tctl, tarc, tipg = 0;
3089
3090	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3091
3092	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3093		u64 bus_addr = txr->txdma.dma_paddr;
3094		/* Base and Len of TX Ring */
3095		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3096	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3097		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3098	    	    (u32)(bus_addr >> 32));
3099		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3100	    	    (u32)bus_addr);
3101		/* Init the HEAD/TAIL indices */
3102		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3103		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3104
3105		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3106		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3107		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3108
3109		txr->watchdog_check = FALSE;
3110	}
3111
3112	/* Set the default values for the Tx Inter Packet Gap timer */
3113	switch (adapter->hw.mac.type) {
3114	case e1000_82542:
3115		tipg = DEFAULT_82542_TIPG_IPGT;
3116		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3117		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3118		break;
3119	case e1000_80003es2lan:
3120		tipg = DEFAULT_82543_TIPG_IPGR1;
3121		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3122		    E1000_TIPG_IPGR2_SHIFT;
3123		break;
3124	default:
3125		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3126		    (adapter->hw.phy.media_type ==
3127		    e1000_media_type_internal_serdes))
3128			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3129		else
3130			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3131		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3132		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3133	}
3134
3135	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3136	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3137
3138	if(adapter->hw.mac.type >= e1000_82540)
3139		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3140		    adapter->tx_abs_int_delay.value);
3141
3142	if ((adapter->hw.mac.type == e1000_82571) ||
3143	    (adapter->hw.mac.type == e1000_82572)) {
3144		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3145		tarc |= SPEED_MODE_BIT;
3146		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3147	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3148		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3149		tarc |= 1;
3150		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3151		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3152		tarc |= 1;
3153		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3154	}
3155
3156	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3157	if (adapter->tx_int_delay.value > 0)
3158		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3159
3160	/* Program the Transmit Control Register */
3161	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3162	tctl &= ~E1000_TCTL_CT;
3163	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3164		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3165
3166	if (adapter->hw.mac.type >= e1000_82571)
3167		tctl |= E1000_TCTL_MULR;
3168
3169	/* This write will effectively turn on the transmit unit. */
3170	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3171
3172}
3173
3174
3175/*********************************************************************
3176 *
3177 *  Free all transmit rings.
3178 *
3179 **********************************************************************/
3180static void
3181em_free_transmit_structures(struct adapter *adapter)
3182{
3183	struct tx_ring *txr = adapter->tx_rings;
3184
3185	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3186		EM_TX_LOCK(txr);
3187		em_free_transmit_buffers(txr);
3188		em_dma_free(adapter, &txr->txdma);
3189		EM_TX_UNLOCK(txr);
3190		EM_TX_LOCK_DESTROY(txr);
3191	}
3192
3193	free(adapter->tx_rings, M_DEVBUF);
3194}
3195
3196/*********************************************************************
3197 *
3198 *  Free transmit ring related data structures.
3199 *
3200 **********************************************************************/
3201static void
3202em_free_transmit_buffers(struct tx_ring *txr)
3203{
3204	struct adapter		*adapter = txr->adapter;
3205	struct em_buffer	*txbuf;
3206
3207	INIT_DEBUGOUT("free_transmit_ring: begin");
3208
3209	if (txr->tx_buffers == NULL)
3210		return;
3211
3212	for (int i = 0; i < adapter->num_tx_desc; i++) {
3213		txbuf = &txr->tx_buffers[i];
3214		if (txbuf->m_head != NULL) {
3215			bus_dmamap_sync(txr->txtag, txbuf->map,
3216			    BUS_DMASYNC_POSTWRITE);
3217			bus_dmamap_unload(txr->txtag,
3218			    txbuf->map);
3219			m_freem(txbuf->m_head);
3220			txbuf->m_head = NULL;
3221			if (txbuf->map != NULL) {
3222				bus_dmamap_destroy(txr->txtag,
3223				    txbuf->map);
3224				txbuf->map = NULL;
3225			}
3226		} else if (txbuf->map != NULL) {
3227			bus_dmamap_unload(txr->txtag,
3228			    txbuf->map);
3229			bus_dmamap_destroy(txr->txtag,
3230			    txbuf->map);
3231			txbuf->map = NULL;
3232		}
3233	}
3234#if __FreeBSD_version >= 800000
3235	if (txr->br != NULL)
3236		buf_ring_free(txr->br, M_DEVBUF);
3237#endif
3238	if (txr->tx_buffers != NULL) {
3239		free(txr->tx_buffers, M_DEVBUF);
3240		txr->tx_buffers = NULL;
3241	}
3242	if (txr->txtag != NULL) {
3243		bus_dma_tag_destroy(txr->txtag);
3244		txr->txtag = NULL;
3245	}
3246	return;
3247}
3248
3249
3250/*********************************************************************
3251 *
3252 *  The offload context needs to be set when we transfer the first
3253 *  packet of a particular protocol (TCP/UDP). This routine has been
3254 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3255 *
3256 *  Added back the old method of keeping the current context type
3257 *  and not setting if unnecessary, as this is reported to be a
3258 *  big performance win.  -jfv
3259 **********************************************************************/
3260static void
3261em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3262    u32 *txd_upper, u32 *txd_lower)
3263{
3264	struct adapter			*adapter = txr->adapter;
3265	struct e1000_context_desc	*TXD = NULL;
3266	struct em_buffer *tx_buffer;
3267	struct ether_vlan_header *eh;
3268	struct ip *ip = NULL;
3269	struct ip6_hdr *ip6;
3270	int cur, ehdrlen;
3271	u32 cmd, hdr_len, ip_hlen;
3272	u16 etype;
3273	u8 ipproto;
3274
3275
3276	cmd = hdr_len = ipproto = 0;
3277	cur = txr->next_avail_desc;
3278
3279	/*
3280	 * Determine where frame payload starts.
3281	 * Jump over vlan headers if already present,
3282	 * helpful for QinQ too.
3283	 */
3284	eh = mtod(mp, struct ether_vlan_header *);
3285	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3286		etype = ntohs(eh->evl_proto);
3287		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3288	} else {
3289		etype = ntohs(eh->evl_encap_proto);
3290		ehdrlen = ETHER_HDR_LEN;
3291	}
3292
3293	/*
3294	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3295	 * TODO: Support SCTP too when it hits the tree.
3296	 */
3297	switch (etype) {
3298	case ETHERTYPE_IP:
3299		ip = (struct ip *)(mp->m_data + ehdrlen);
3300		ip_hlen = ip->ip_hl << 2;
3301
3302		/* Setup of IP header checksum. */
3303		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3304			/*
3305			 * Start offset for header checksum calculation.
3306			 * End offset for header checksum calculation.
3307			 * Offset of place to put the checksum.
3308			 */
3309			TXD = (struct e1000_context_desc *)
3310			    &txr->tx_base[cur];
3311			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3312			TXD->lower_setup.ip_fields.ipcse =
3313			    htole16(ehdrlen + ip_hlen);
3314			TXD->lower_setup.ip_fields.ipcso =
3315			    ehdrlen + offsetof(struct ip, ip_sum);
3316			cmd |= E1000_TXD_CMD_IP;
3317			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3318		}
3319
3320		if (mp->m_len < ehdrlen + ip_hlen)
3321			return;	/* failure */
3322
3323		hdr_len = ehdrlen + ip_hlen;
3324		ipproto = ip->ip_p;
3325
3326		break;
3327	case ETHERTYPE_IPV6:
3328		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3329		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3330
3331		if (mp->m_len < ehdrlen + ip_hlen)
3332			return;	/* failure */
3333
3334		/* IPv6 doesn't have a header checksum. */
3335
3336		hdr_len = ehdrlen + ip_hlen;
3337		ipproto = ip6->ip6_nxt;
3338
3339		break;
3340	default:
3341		*txd_upper = 0;
3342		*txd_lower = 0;
3343		return;
3344	}
3345
3346	switch (ipproto) {
3347	case IPPROTO_TCP:
3348		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3349			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3350			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3351			/* no need for context if already set */
3352			if (txr->last_hw_offload == CSUM_TCP)
3353				return;
3354			txr->last_hw_offload = CSUM_TCP;
3355			/*
3356			 * Start offset for payload checksum calculation.
3357			 * End offset for payload checksum calculation.
3358			 * Offset of place to put the checksum.
3359			 */
3360			TXD = (struct e1000_context_desc *)
3361			    &txr->tx_base[cur];
3362			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3363			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3364			TXD->upper_setup.tcp_fields.tucso =
3365			    hdr_len + offsetof(struct tcphdr, th_sum);
3366			cmd |= E1000_TXD_CMD_TCP;
3367		}
3368		break;
3369	case IPPROTO_UDP:
3370	{
3371		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3372			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3373			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3374			/* no need for context if already set */
3375			if (txr->last_hw_offload == CSUM_UDP)
3376				return;
3377			txr->last_hw_offload = CSUM_UDP;
3378			/*
3379			 * Start offset for header checksum calculation.
3380			 * End offset for header checksum calculation.
3381			 * Offset of place to put the checksum.
3382			 */
3383			TXD = (struct e1000_context_desc *)
3384			    &txr->tx_base[cur];
3385			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3386			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3387			TXD->upper_setup.tcp_fields.tucso =
3388			    hdr_len + offsetof(struct udphdr, uh_sum);
3389		}
3390		/* Fall Thru */
3391	}
3392	default:
3393		break;
3394	}
3395
3396	TXD->tcp_seg_setup.data = htole32(0);
3397	TXD->cmd_and_length =
3398	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3399	tx_buffer = &txr->tx_buffers[cur];
3400	tx_buffer->m_head = NULL;
3401	tx_buffer->next_eop = -1;
3402
3403	if (++cur == adapter->num_tx_desc)
3404		cur = 0;
3405
3406	txr->tx_avail--;
3407	txr->next_avail_desc = cur;
3408}
3409
3410
3411/**********************************************************************
3412 *
3413 *  Setup work for hardware segmentation offload (TSO)
3414 *
3415 **********************************************************************/
3416static bool
3417em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3418   u32 *txd_lower)
3419{
3420	struct adapter			*adapter = txr->adapter;
3421	struct e1000_context_desc	*TXD;
3422	struct em_buffer		*tx_buffer;
3423	struct ether_vlan_header	*eh;
3424	struct ip			*ip;
3425	struct ip6_hdr			*ip6;
3426	struct tcphdr			*th;
3427	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3428	u16 etype;
3429
3430	/*
3431	 * This function could/should be extended to support IP/IPv6
3432	 * fragmentation as well.  But as they say, one step at a time.
3433	 */
3434
3435	/*
3436	 * Determine where frame payload starts.
3437	 * Jump over vlan headers if already present,
3438	 * helpful for QinQ too.
3439	 */
3440	eh = mtod(mp, struct ether_vlan_header *);
3441	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3442		etype = ntohs(eh->evl_proto);
3443		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3444	} else {
3445		etype = ntohs(eh->evl_encap_proto);
3446		ehdrlen = ETHER_HDR_LEN;
3447	}
3448
3449	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3450	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3451		return FALSE;	/* -1 */
3452
3453	/*
3454	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3455	 * TODO: Support SCTP too when it hits the tree.
3456	 */
3457	switch (etype) {
3458	case ETHERTYPE_IP:
3459		isip6 = 0;
3460		ip = (struct ip *)(mp->m_data + ehdrlen);
3461		if (ip->ip_p != IPPROTO_TCP)
3462			return FALSE;	/* 0 */
3463		ip->ip_len = 0;
3464		ip->ip_sum = 0;
3465		ip_hlen = ip->ip_hl << 2;
3466		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3467			return FALSE;	/* -1 */
3468		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3469#if 1
3470		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3471		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3472#else
3473		th->th_sum = mp->m_pkthdr.csum_data;
3474#endif
3475		break;
3476	case ETHERTYPE_IPV6:
3477		isip6 = 1;
3478		return FALSE;			/* Not supported yet. */
3479		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3480		if (ip6->ip6_nxt != IPPROTO_TCP)
3481			return FALSE;	/* 0 */
3482		ip6->ip6_plen = 0;
3483		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3484		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3485			return FALSE;	/* -1 */
3486		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3487#if 0
3488		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3489		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3490#else
3491		th->th_sum = mp->m_pkthdr.csum_data;
3492#endif
3493		break;
3494	default:
3495		return FALSE;
3496	}
3497	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3498
3499	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3500		      E1000_TXD_DTYP_D |	/* Data descr type */
3501		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3502
3503	/* IP and/or TCP header checksum calculation and insertion. */
3504	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3505		      E1000_TXD_POPTS_TXSM) << 8;
3506
3507	cur = txr->next_avail_desc;
3508	tx_buffer = &txr->tx_buffers[cur];
3509	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3510
3511	/* IPv6 doesn't have a header checksum. */
3512	if (!isip6) {
3513		/*
3514		 * Start offset for header checksum calculation.
3515		 * End offset for header checksum calculation.
3516		 * Offset of place put the checksum.
3517		 */
3518		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3519		TXD->lower_setup.ip_fields.ipcse =
3520		    htole16(ehdrlen + ip_hlen - 1);
3521		TXD->lower_setup.ip_fields.ipcso =
3522		    ehdrlen + offsetof(struct ip, ip_sum);
3523	}
3524	/*
3525	 * Start offset for payload checksum calculation.
3526	 * End offset for payload checksum calculation.
3527	 * Offset of place to put the checksum.
3528	 */
3529	TXD->upper_setup.tcp_fields.tucss =
3530	    ehdrlen + ip_hlen;
3531	TXD->upper_setup.tcp_fields.tucse = 0;
3532	TXD->upper_setup.tcp_fields.tucso =
3533	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3534	/*
3535	 * Payload size per packet w/o any headers.
3536	 * Length of all headers up to payload.
3537	 */
3538	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3539	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3540
3541	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3542				E1000_TXD_CMD_DEXT |	/* Extended descr */
3543				E1000_TXD_CMD_TSE |	/* TSE context */
3544				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3545				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3546				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3547
3548	tx_buffer->m_head = NULL;
3549	tx_buffer->next_eop = -1;
3550
3551	if (++cur == adapter->num_tx_desc)
3552		cur = 0;
3553
3554	txr->tx_avail--;
3555	txr->next_avail_desc = cur;
3556	txr->tx_tso = TRUE;
3557
3558	return TRUE;
3559}
3560
3561
3562/**********************************************************************
3563 *
3564 *  Examine each tx_buffer in the used queue. If the hardware is done
3565 *  processing the packet then free associated resources. The
3566 *  tx_buffer is put back on the free queue.
3567 *
3568 **********************************************************************/
3569static bool
3570em_txeof(struct tx_ring *txr)
3571{
3572	struct adapter	*adapter = txr->adapter;
3573        int first, last, done, num_avail;
3574        struct em_buffer *tx_buffer;
3575        struct e1000_tx_desc   *tx_desc, *eop_desc;
3576	struct ifnet   *ifp = adapter->ifp;
3577
3578	EM_TX_LOCK_ASSERT(txr);
3579
3580        if (txr->tx_avail == adapter->num_tx_desc)
3581                return (FALSE);
3582
3583        num_avail = txr->tx_avail;
3584        first = txr->next_to_clean;
3585        tx_desc = &txr->tx_base[first];
3586        tx_buffer = &txr->tx_buffers[first];
3587	last = tx_buffer->next_eop;
3588        eop_desc = &txr->tx_base[last];
3589
3590	/*
3591	 * What this does is get the index of the
3592	 * first descriptor AFTER the EOP of the
3593	 * first packet, that way we can do the
3594	 * simple comparison on the inner while loop.
3595	 */
3596	if (++last == adapter->num_tx_desc)
3597 		last = 0;
3598	done = last;
3599
3600        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3601            BUS_DMASYNC_POSTREAD);
3602
3603        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3604		/* We clean the range of the packet */
3605		while (first != done) {
3606                	tx_desc->upper.data = 0;
3607                	tx_desc->lower.data = 0;
3608                	tx_desc->buffer_addr = 0;
3609                	++num_avail;
3610
3611			if (tx_buffer->m_head) {
3612				ifp->if_opackets++;
3613				bus_dmamap_sync(txr->txtag,
3614				    tx_buffer->map,
3615				    BUS_DMASYNC_POSTWRITE);
3616				bus_dmamap_unload(txr->txtag,
3617				    tx_buffer->map);
3618
3619                        	m_freem(tx_buffer->m_head);
3620                        	tx_buffer->m_head = NULL;
3621                	}
3622			tx_buffer->next_eop = -1;
3623			txr->watchdog_time = ticks;
3624
3625	                if (++first == adapter->num_tx_desc)
3626				first = 0;
3627
3628	                tx_buffer = &txr->tx_buffers[first];
3629			tx_desc = &txr->tx_base[first];
3630		}
3631		/* See if we can continue to the next packet */
3632		last = tx_buffer->next_eop;
3633		if (last != -1) {
3634        		eop_desc = &txr->tx_base[last];
3635			/* Get new done point */
3636			if (++last == adapter->num_tx_desc) last = 0;
3637			done = last;
3638		} else
3639			break;
3640        }
3641        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3642            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3643
3644        txr->next_to_clean = first;
3645
3646        /*
3647         * If we have enough room, clear IFF_DRV_OACTIVE to
3648         * tell the stack that it is OK to send packets.
3649         * If there are no pending descriptors, clear the watchdog.
3650         */
3651        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3652                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3653                if (num_avail == adapter->num_tx_desc) {
3654			txr->watchdog_check = FALSE;
3655        		txr->tx_avail = num_avail;
3656			return (FALSE);
3657		}
3658        }
3659
3660        txr->tx_avail = num_avail;
3661	return (TRUE);
3662}
3663
3664
3665/*********************************************************************
3666 *
3667 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3668 *
3669 **********************************************************************/
3670static void
3671em_refresh_mbufs(struct rx_ring *rxr, int limit)
3672{
3673	struct adapter		*adapter = rxr->adapter;
3674	struct mbuf		*m;
3675	bus_dma_segment_t	segs[1];
3676	bus_dmamap_t		map;
3677	struct em_buffer	*rxbuf;
3678	int			i, error, nsegs, cleaned;
3679
3680	i = rxr->next_to_refresh;
3681	cleaned = -1;
3682	while (i != limit) {
3683		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3684		if (m == NULL)
3685			goto update;
3686		m->m_len = m->m_pkthdr.len = MCLBYTES;
3687
3688		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3689			m_adj(m, ETHER_ALIGN);
3690
3691		/*
3692		 * Using memory from the mbuf cluster pool, invoke the
3693		 * bus_dma machinery to arrange the memory mapping.
3694		 */
3695		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3696		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3697		if (error != 0) {
3698			m_free(m);
3699			goto update;
3700		}
3701
3702		/* If nsegs is wrong then the stack is corrupt. */
3703		KASSERT(nsegs == 1, ("Too many segments returned!"));
3704
3705		rxbuf = &rxr->rx_buffers[i];
3706		if (rxbuf->m_head != NULL)
3707			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3708
3709		map = rxbuf->map;
3710		rxbuf->map = rxr->rx_sparemap;
3711		rxr->rx_sparemap = map;
3712		bus_dmamap_sync(rxr->rxtag,
3713		    rxbuf->map, BUS_DMASYNC_PREREAD);
3714		rxbuf->m_head = m;
3715		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3716
3717		cleaned = i;
3718		/* Calculate next index */
3719		if (++i == adapter->num_rx_desc)
3720			i = 0;
3721		/* This is the work marker for refresh */
3722		rxr->next_to_refresh = i;
3723	}
3724update:
3725	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3726	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3727	if (cleaned != -1) /* Update tail index */
3728		E1000_WRITE_REG(&adapter->hw,
3729		    E1000_RDT(rxr->me), cleaned);
3730
3731	return;
3732}
3733
3734
3735/*********************************************************************
3736 *
3737 *  Allocate memory for rx_buffer structures. Since we use one
3738 *  rx_buffer per received packet, the maximum number of rx_buffer's
3739 *  that we'll need is equal to the number of receive descriptors
3740 *  that we've allocated.
3741 *
3742 **********************************************************************/
3743static int
3744em_allocate_receive_buffers(struct rx_ring *rxr)
3745{
3746	struct adapter		*adapter = rxr->adapter;
3747	device_t		dev = adapter->dev;
3748	struct em_buffer	*rxbuf;
3749	int			error;
3750
3751	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3752	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3753	if (rxr->rx_buffers == NULL) {
3754		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3755		return (ENOMEM);
3756	}
3757
3758	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3759				1, 0,			/* alignment, bounds */
3760				BUS_SPACE_MAXADDR,	/* lowaddr */
3761				BUS_SPACE_MAXADDR,	/* highaddr */
3762				NULL, NULL,		/* filter, filterarg */
3763				MCLBYTES,		/* maxsize */
3764				1,			/* nsegments */
3765				MCLBYTES,		/* maxsegsize */
3766				0,			/* flags */
3767				NULL,			/* lockfunc */
3768				NULL,			/* lockarg */
3769				&rxr->rxtag);
3770	if (error) {
3771		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3772		    __func__, error);
3773		goto fail;
3774	}
3775
3776	/* Create the spare map (used by getbuf) */
3777	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3778	     &rxr->rx_sparemap);
3779	if (error) {
3780		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3781		    __func__, error);
3782		goto fail;
3783	}
3784
3785	rxbuf = rxr->rx_buffers;
3786	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3787		rxbuf = &rxr->rx_buffers[i];
3788		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3789		    &rxbuf->map);
3790		if (error) {
3791			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3792			    __func__, error);
3793			goto fail;
3794		}
3795	}
3796
3797	return (0);
3798
3799fail:
3800	em_free_receive_structures(adapter);
3801	return (error);
3802}
3803
3804
3805/*********************************************************************
3806 *
3807 *  Initialize a receive ring and its buffers.
3808 *
3809 **********************************************************************/
3810static int
3811em_setup_receive_ring(struct rx_ring *rxr)
3812{
3813	struct	adapter 	*adapter = rxr->adapter;
3814	struct em_buffer	*rxbuf;
3815	bus_dma_segment_t	seg[1];
3816	int			rsize, nsegs, error;
3817
3818
3819	/* Clear the ring contents */
3820	EM_RX_LOCK(rxr);
3821	rsize = roundup2(adapter->num_rx_desc *
3822	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3823	bzero((void *)rxr->rx_base, rsize);
3824
3825	/*
3826	** Free current RX buffer structs and their mbufs
3827	*/
3828	for (int i = 0; i < adapter->num_rx_desc; i++) {
3829		rxbuf = &rxr->rx_buffers[i];
3830		if (rxbuf->m_head != NULL) {
3831			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3832			    BUS_DMASYNC_POSTREAD);
3833			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3834			m_freem(rxbuf->m_head);
3835		}
3836	}
3837
3838	/* Now replenish the mbufs */
3839	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3840
3841		rxbuf = &rxr->rx_buffers[j];
3842		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3843		if (rxbuf->m_head == NULL)
3844			panic("RX ring hdr initialization failed!\n");
3845		rxbuf->m_head->m_len = MCLBYTES;
3846		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3847		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3848
3849		/* Get the memory mapping */
3850		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3851		    rxbuf->map, rxbuf->m_head, seg,
3852		    &nsegs, BUS_DMA_NOWAIT);
3853		if (error != 0)
3854			panic("RX ring dma initialization failed!\n");
3855		bus_dmamap_sync(rxr->rxtag,
3856		    rxbuf->map, BUS_DMASYNC_PREREAD);
3857
3858		/* Update descriptor */
3859		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3860	}
3861
3862
3863	/* Setup our descriptor indices */
3864	rxr->next_to_check = 0;
3865	rxr->next_to_refresh = 0;
3866
3867	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3868	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3869
3870	EM_RX_UNLOCK(rxr);
3871	return (0);
3872}
3873
3874/*********************************************************************
3875 *
3876 *  Initialize all receive rings.
3877 *
3878 **********************************************************************/
3879static int
3880em_setup_receive_structures(struct adapter *adapter)
3881{
3882	struct rx_ring *rxr = adapter->rx_rings;
3883	int j;
3884
3885	for (j = 0; j < adapter->num_queues; j++, rxr++)
3886		if (em_setup_receive_ring(rxr))
3887			goto fail;
3888
3889	return (0);
3890fail:
3891	/*
3892	 * Free RX buffers allocated so far, we will only handle
3893	 * the rings that completed, the failing case will have
3894	 * cleaned up for itself. 'j' failed, so its the terminus.
3895	 */
3896	for (int i = 0; i < j; ++i) {
3897		rxr = &adapter->rx_rings[i];
3898		for (int n = 0; n < adapter->num_rx_desc; n++) {
3899			struct em_buffer *rxbuf;
3900			rxbuf = &rxr->rx_buffers[n];
3901			if (rxbuf->m_head != NULL) {
3902				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3903			  	  BUS_DMASYNC_POSTREAD);
3904				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3905				m_freem(rxbuf->m_head);
3906				rxbuf->m_head = NULL;
3907			}
3908		}
3909	}
3910
3911	return (ENOBUFS);
3912}
3913
3914/*********************************************************************
3915 *
3916 *  Free all receive rings.
3917 *
3918 **********************************************************************/
3919static void
3920em_free_receive_structures(struct adapter *adapter)
3921{
3922	struct rx_ring *rxr = adapter->rx_rings;
3923
3924	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3925		em_free_receive_buffers(rxr);
3926		/* Free the ring memory as well */
3927		em_dma_free(adapter, &rxr->rxdma);
3928		EM_RX_LOCK_DESTROY(rxr);
3929	}
3930
3931	free(adapter->rx_rings, M_DEVBUF);
3932}
3933
3934
3935/*********************************************************************
3936 *
3937 *  Free receive ring data structures
3938 *
3939 **********************************************************************/
3940static void
3941em_free_receive_buffers(struct rx_ring *rxr)
3942{
3943	struct adapter		*adapter = rxr->adapter;
3944	struct em_buffer	*rxbuf = NULL;
3945
3946	INIT_DEBUGOUT("free_receive_buffers: begin");
3947
3948	if (rxr->rx_sparemap) {
3949		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3950		rxr->rx_sparemap = NULL;
3951	}
3952
3953	if (rxr->rx_buffers != NULL) {
3954		for (int i = 0; i < adapter->num_rx_desc; i++) {
3955			rxbuf = &rxr->rx_buffers[i];
3956			if (rxbuf->map != NULL) {
3957				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3958				    BUS_DMASYNC_POSTREAD);
3959				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3960				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3961			}
3962			if (rxbuf->m_head != NULL) {
3963				m_freem(rxbuf->m_head);
3964				rxbuf->m_head = NULL;
3965			}
3966		}
3967		free(rxr->rx_buffers, M_DEVBUF);
3968		rxr->rx_buffers = NULL;
3969	}
3970
3971	if (rxr->rxtag != NULL) {
3972		bus_dma_tag_destroy(rxr->rxtag);
3973		rxr->rxtag = NULL;
3974	}
3975
3976	return;
3977}
3978
3979
3980/*********************************************************************
3981 *
3982 *  Enable receive unit.
3983 *
3984 **********************************************************************/
3985#define MAX_INTS_PER_SEC	8000
3986#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3987
3988static void
3989em_initialize_receive_unit(struct adapter *adapter)
3990{
3991	struct rx_ring	*rxr = adapter->rx_rings;
3992	struct ifnet	*ifp = adapter->ifp;
3993	struct e1000_hw	*hw = &adapter->hw;
3994	u64	bus_addr;
3995	u32	rctl, rxcsum;
3996
3997	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3998
3999	/*
4000	 * Make sure receives are disabled while setting
4001	 * up the descriptor ring
4002	 */
4003	rctl = E1000_READ_REG(hw, E1000_RCTL);
4004	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4005
4006	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4007	    adapter->rx_abs_int_delay.value);
4008	/*
4009	 * Set the interrupt throttling rate. Value is calculated
4010	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4011	 */
4012	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4013
4014	/*
4015	** When using MSIX interrupts we need to throttle
4016	** using the EITR register (82574 only)
4017	*/
4018	if (hw->mac.type == e1000_82574)
4019		for (int i = 0; i < 4; i++)
4020			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4021			    DEFAULT_ITR);
4022
4023	/* Disable accelerated ackknowledge */
4024	if (adapter->hw.mac.type == e1000_82574)
4025		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4026
4027	if (ifp->if_capenable & IFCAP_RXCSUM) {
4028		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4029		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4030		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4031	}
4032
4033	/*
4034	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4035	** long latencies are observed, like Lenovo X60. This
4036	** change eliminates the problem, but since having positive
4037	** values in RDTR is a known source of problems on other
4038	** platforms another solution is being sought.
4039	*/
4040	if (hw->mac.type == e1000_82573)
4041		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4042
4043	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4044		/* Setup the Base and Length of the Rx Descriptor Ring */
4045		bus_addr = rxr->rxdma.dma_paddr;
4046		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4047		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4048		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4049		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4050		/* Setup the Head and Tail Descriptor Pointers */
4051		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4052		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4053	}
4054
4055	/* Setup the Receive Control Register */
4056	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4057	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4058	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4059	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4060
4061        /* Strip the CRC */
4062        rctl |= E1000_RCTL_SECRC;
4063
4064        /* Make sure VLAN Filters are off */
4065        rctl &= ~E1000_RCTL_VFE;
4066	rctl &= ~E1000_RCTL_SBP;
4067	rctl |= E1000_RCTL_SZ_2048;
4068	if (ifp->if_mtu > ETHERMTU)
4069		rctl |= E1000_RCTL_LPE;
4070	else
4071		rctl &= ~E1000_RCTL_LPE;
4072
4073	/* Write out the settings */
4074	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4075
4076	return;
4077}
4078
4079
4080/*********************************************************************
4081 *
4082 *  This routine executes in interrupt context. It replenishes
4083 *  the mbufs in the descriptor and sends data which has been
4084 *  dma'ed into host memory to upper layer.
4085 *
4086 *  We loop at most count times if count is > 0, or until done if
4087 *  count < 0.
4088 *
4089 *  For polling we also now return the number of cleaned packets
4090 *********************************************************************/
4091static int
4092em_rxeof(struct rx_ring *rxr, int count)
4093{
4094	struct adapter		*adapter = rxr->adapter;
4095	struct ifnet		*ifp = adapter->ifp;
4096	struct mbuf		*mp, *sendmp;
4097	u8			status = 0;
4098	u16 			len;
4099	int			i, processed, rxdone = 0;
4100	bool			eop;
4101	struct e1000_rx_desc	*cur;
4102
4103	EM_RX_LOCK_ASSERT(rxr);
4104
4105	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4106
4107		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4108			break;
4109
4110		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4111		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4112
4113		cur = &rxr->rx_base[i];
4114		status = cur->status;
4115		mp = sendmp = NULL;
4116
4117		if ((status & E1000_RXD_STAT_DD) == 0)
4118			break;
4119
4120		len = le16toh(cur->length);
4121		eop = (status & E1000_RXD_STAT_EOP) != 0;
4122		count--;
4123
4124		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4125
4126			/* Assign correct length to the current fragment */
4127			mp = rxr->rx_buffers[i].m_head;
4128			mp->m_len = len;
4129
4130			if (rxr->fmp == NULL) {
4131				mp->m_pkthdr.len = len;
4132				rxr->fmp = mp; /* Store the first mbuf */
4133				rxr->lmp = mp;
4134			} else {
4135				/* Chain mbuf's together */
4136				mp->m_flags &= ~M_PKTHDR;
4137				rxr->lmp->m_next = mp;
4138				rxr->lmp = rxr->lmp->m_next;
4139				rxr->fmp->m_pkthdr.len += len;
4140			}
4141
4142			if (eop) {
4143				rxr->fmp->m_pkthdr.rcvif = ifp;
4144				ifp->if_ipackets++;
4145				em_receive_checksum(cur, rxr->fmp);
4146#ifndef __NO_STRICT_ALIGNMENT
4147				if (adapter->max_frame_size >
4148				    (MCLBYTES - ETHER_ALIGN) &&
4149				    em_fixup_rx(rxr) != 0)
4150					goto skip;
4151#endif
4152				if (status & E1000_RXD_STAT_VP) {
4153					rxr->fmp->m_pkthdr.ether_vtag =
4154					    (le16toh(cur->special) &
4155					    E1000_RXD_SPC_VLAN_MASK);
4156					rxr->fmp->m_flags |= M_VLANTAG;
4157				}
4158#ifdef EM_MULTIQUEUE
4159				rxr->fmp->m_pkthdr.flowid = curcpu;
4160				rxr->fmp->m_flags |= M_FLOWID;
4161#endif
4162#ifndef __NO_STRICT_ALIGNMENT
4163skip:
4164#endif
4165				sendmp = rxr->fmp;
4166				rxr->fmp = NULL;
4167				rxr->lmp = NULL;
4168			}
4169		} else {
4170			ifp->if_ierrors++;
4171			/* Reuse loaded DMA map and just update mbuf chain */
4172			mp = rxr->rx_buffers[i].m_head;
4173			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4174			mp->m_data = mp->m_ext.ext_buf;
4175			mp->m_next = NULL;
4176			if (adapter->max_frame_size <=
4177			    (MCLBYTES - ETHER_ALIGN))
4178				m_adj(mp, ETHER_ALIGN);
4179			if (rxr->fmp != NULL) {
4180				m_freem(rxr->fmp);
4181				rxr->fmp = NULL;
4182				rxr->lmp = NULL;
4183			}
4184			sendmp = NULL;
4185		}
4186
4187		/* Zero out the receive descriptors status. */
4188		cur->status = 0;
4189		++rxdone;	/* cumulative for POLL */
4190		++processed;
4191
4192		/* Advance our pointers to the next descriptor. */
4193		if (++i == adapter->num_rx_desc)
4194			i = 0;
4195
4196		/* Send to the stack */
4197		if (sendmp != NULL)
4198			(*ifp->if_input)(ifp, sendmp);
4199
4200		/* Only refresh mbufs every 8 descriptors */
4201		if (processed == 8) {
4202			em_refresh_mbufs(rxr, i);
4203			processed = 0;
4204		}
4205	}
4206
4207	/* Catch any remaining refresh work */
4208	if (processed != 0) {
4209		em_refresh_mbufs(rxr, i);
4210		processed = 0;
4211	}
4212
4213	rxr->next_to_check = i;
4214
4215#ifdef DEVICE_POLLING
4216	return (rxdone);
4217#else
4218	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4219#endif
4220}
4221
4222#ifndef __NO_STRICT_ALIGNMENT
4223/*
4224 * When jumbo frames are enabled we should realign entire payload on
4225 * architecures with strict alignment. This is serious design mistake of 8254x
4226 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4227 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4228 * payload. On architecures without strict alignment restrictions 8254x still
4229 * performs unaligned memory access which would reduce the performance too.
4230 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4231 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4232 * existing mbuf chain.
4233 *
4234 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4235 * not used at all on architectures with strict alignment.
4236 */
4237static int
4238em_fixup_rx(struct rx_ring *rxr)
4239{
4240	struct adapter *adapter = rxr->adapter;
4241	struct mbuf *m, *n;
4242	int error;
4243
4244	error = 0;
4245	m = rxr->fmp;
4246	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4247		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4248		m->m_data += ETHER_HDR_LEN;
4249	} else {
4250		MGETHDR(n, M_DONTWAIT, MT_DATA);
4251		if (n != NULL) {
4252			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4253			m->m_data += ETHER_HDR_LEN;
4254			m->m_len -= ETHER_HDR_LEN;
4255			n->m_len = ETHER_HDR_LEN;
4256			M_MOVE_PKTHDR(n, m);
4257			n->m_next = m;
4258			rxr->fmp = n;
4259		} else {
4260			adapter->dropped_pkts++;
4261			m_freem(rxr->fmp);
4262			rxr->fmp = NULL;
4263			error = ENOMEM;
4264		}
4265	}
4266
4267	return (error);
4268}
4269#endif
4270
4271/*********************************************************************
4272 *
4273 *  Verify that the hardware indicated that the checksum is valid.
4274 *  Inform the stack about the status of checksum so that stack
4275 *  doesn't spend time verifying the checksum.
4276 *
4277 *********************************************************************/
4278static void
4279em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4280{
4281	/* Ignore Checksum bit is set */
4282	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4283		mp->m_pkthdr.csum_flags = 0;
4284		return;
4285	}
4286
4287	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4288		/* Did it pass? */
4289		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4290			/* IP Checksum Good */
4291			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4292			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4293
4294		} else {
4295			mp->m_pkthdr.csum_flags = 0;
4296		}
4297	}
4298
4299	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4300		/* Did it pass? */
4301		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4302			mp->m_pkthdr.csum_flags |=
4303			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4304			mp->m_pkthdr.csum_data = htons(0xffff);
4305		}
4306	}
4307}
4308
4309/*
4310 * This routine is run via an vlan
4311 * config EVENT
4312 */
4313static void
4314em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4315{
4316	struct adapter	*adapter = ifp->if_softc;
4317	u32		index, bit;
4318
4319	if (ifp->if_softc !=  arg)   /* Not our event */
4320		return;
4321
4322	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4323                return;
4324
4325	index = (vtag >> 5) & 0x7F;
4326	bit = vtag & 0x1F;
4327	em_shadow_vfta[index] |= (1 << bit);
4328	++adapter->num_vlans;
4329	/* Re-init to load the changes */
4330	em_init(adapter);
4331}
4332
4333/*
4334 * This routine is run via an vlan
4335 * unconfig EVENT
4336 */
4337static void
4338em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4339{
4340	struct adapter	*adapter = ifp->if_softc;
4341	u32		index, bit;
4342
4343	if (ifp->if_softc !=  arg)
4344		return;
4345
4346	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4347                return;
4348
4349	index = (vtag >> 5) & 0x7F;
4350	bit = vtag & 0x1F;
4351	em_shadow_vfta[index] &= ~(1 << bit);
4352	--adapter->num_vlans;
4353	/* Re-init to load the changes */
4354	em_init(adapter);
4355}
4356
4357static void
4358em_setup_vlan_hw_support(struct adapter *adapter)
4359{
4360	struct e1000_hw *hw = &adapter->hw;
4361	u32             reg;
4362
4363	/*
4364	** We get here thru init_locked, meaning
4365	** a soft reset, this has already cleared
4366	** the VFTA and other state, so if there
4367	** have been no vlan's registered do nothing.
4368	*/
4369	if (adapter->num_vlans == 0)
4370                return;
4371
4372	/*
4373	** A soft reset zero's out the VFTA, so
4374	** we need to repopulate it now.
4375	*/
4376	for (int i = 0; i < EM_VFTA_SIZE; i++)
4377                if (em_shadow_vfta[i] != 0)
4378			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4379                            i, em_shadow_vfta[i]);
4380
4381	reg = E1000_READ_REG(hw, E1000_CTRL);
4382	reg |= E1000_CTRL_VME;
4383	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4384
4385	/* Enable the Filter Table */
4386	reg = E1000_READ_REG(hw, E1000_RCTL);
4387	reg &= ~E1000_RCTL_CFIEN;
4388	reg |= E1000_RCTL_VFE;
4389	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4390
4391	/* Update the frame size */
4392	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4393	    adapter->max_frame_size + VLAN_TAG_SIZE);
4394}
4395
4396static void
4397em_enable_intr(struct adapter *adapter)
4398{
4399	struct e1000_hw *hw = &adapter->hw;
4400	u32 ims_mask = IMS_ENABLE_MASK;
4401
4402	if (hw->mac.type == e1000_82574) {
4403		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4404		ims_mask |= EM_MSIX_MASK;
4405	}
4406	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4407}
4408
4409static void
4410em_disable_intr(struct adapter *adapter)
4411{
4412	struct e1000_hw *hw = &adapter->hw;
4413
4414	if (hw->mac.type == e1000_82574)
4415		E1000_WRITE_REG(hw, EM_EIAC, 0);
4416	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4417}
4418
4419/*
4420 * Bit of a misnomer, what this really means is
4421 * to enable OS management of the system... aka
4422 * to disable special hardware management features
4423 */
4424static void
4425em_init_manageability(struct adapter *adapter)
4426{
4427	/* A shared code workaround */
4428#define E1000_82542_MANC2H E1000_MANC2H
4429	if (adapter->has_manage) {
4430		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4431		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4432
4433		/* disable hardware interception of ARP */
4434		manc &= ~(E1000_MANC_ARP_EN);
4435
4436                /* enable receiving management packets to the host */
4437		manc |= E1000_MANC_EN_MNG2HOST;
4438#define E1000_MNG2HOST_PORT_623 (1 << 5)
4439#define E1000_MNG2HOST_PORT_664 (1 << 6)
4440		manc2h |= E1000_MNG2HOST_PORT_623;
4441		manc2h |= E1000_MNG2HOST_PORT_664;
4442		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4443		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4444	}
4445}
4446
4447/*
4448 * Give control back to hardware management
4449 * controller if there is one.
4450 */
4451static void
4452em_release_manageability(struct adapter *adapter)
4453{
4454	if (adapter->has_manage) {
4455		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4456
4457		/* re-enable hardware interception of ARP */
4458		manc |= E1000_MANC_ARP_EN;
4459		manc &= ~E1000_MANC_EN_MNG2HOST;
4460
4461		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4462	}
4463}
4464
4465/*
4466 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4467 * For ASF and Pass Through versions of f/w this means
4468 * that the driver is loaded. For AMT version type f/w
4469 * this means that the network i/f is open.
4470 */
4471static void
4472em_get_hw_control(struct adapter *adapter)
4473{
4474	u32 ctrl_ext, swsm;
4475
4476	if (adapter->hw.mac.type == e1000_82573) {
4477		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4478		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4479		    swsm | E1000_SWSM_DRV_LOAD);
4480		return;
4481	}
4482	/* else */
4483	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4484	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4485	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4486	return;
4487}
4488
4489/*
4490 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4491 * For ASF and Pass Through versions of f/w this means that
4492 * the driver is no longer loaded. For AMT versions of the
4493 * f/w this means that the network i/f is closed.
4494 */
4495static void
4496em_release_hw_control(struct adapter *adapter)
4497{
4498	u32 ctrl_ext, swsm;
4499
4500	if (!adapter->has_manage)
4501		return;
4502
4503	if (adapter->hw.mac.type == e1000_82573) {
4504		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4505		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4506		    swsm & ~E1000_SWSM_DRV_LOAD);
4507		return;
4508	}
4509	/* else */
4510	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4511	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4512	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4513	return;
4514}
4515
4516static int
4517em_is_valid_ether_addr(u8 *addr)
4518{
4519	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4520
4521	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4522		return (FALSE);
4523	}
4524
4525	return (TRUE);
4526}
4527
4528/*
4529** Parse the interface capabilities with regard
4530** to both system management and wake-on-lan for
4531** later use.
4532*/
4533static void
4534em_get_wakeup(device_t dev)
4535{
4536	struct adapter	*adapter = device_get_softc(dev);
4537	u16		eeprom_data = 0, device_id, apme_mask;
4538
4539	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4540	apme_mask = EM_EEPROM_APME;
4541
4542	switch (adapter->hw.mac.type) {
4543	case e1000_82573:
4544	case e1000_82583:
4545		adapter->has_amt = TRUE;
4546		/* Falls thru */
4547	case e1000_82571:
4548	case e1000_82572:
4549	case e1000_80003es2lan:
4550		if (adapter->hw.bus.func == 1) {
4551			e1000_read_nvm(&adapter->hw,
4552			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4553			break;
4554		} else
4555			e1000_read_nvm(&adapter->hw,
4556			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4557		break;
4558	case e1000_ich8lan:
4559	case e1000_ich9lan:
4560	case e1000_ich10lan:
4561	case e1000_pchlan:
4562		apme_mask = E1000_WUC_APME;
4563		adapter->has_amt = TRUE;
4564		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4565		break;
4566	default:
4567		e1000_read_nvm(&adapter->hw,
4568		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4569		break;
4570	}
4571	if (eeprom_data & apme_mask)
4572		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4573	/*
4574         * We have the eeprom settings, now apply the special cases
4575         * where the eeprom may be wrong or the board won't support
4576         * wake on lan on a particular port
4577	 */
4578	device_id = pci_get_device(dev);
4579        switch (device_id) {
4580	case E1000_DEV_ID_82571EB_FIBER:
4581		/* Wake events only supported on port A for dual fiber
4582		 * regardless of eeprom setting */
4583		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4584		    E1000_STATUS_FUNC_1)
4585			adapter->wol = 0;
4586		break;
4587	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4588	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4589	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4590                /* if quad port adapter, disable WoL on all but port A */
4591		if (global_quad_port_a != 0)
4592			adapter->wol = 0;
4593		/* Reset for multiple quad port adapters */
4594		if (++global_quad_port_a == 4)
4595			global_quad_port_a = 0;
4596                break;
4597	}
4598	return;
4599}
4600
4601
4602/*
4603 * Enable PCI Wake On Lan capability
4604 */
4605static void
4606em_enable_wakeup(device_t dev)
4607{
4608	struct adapter	*adapter = device_get_softc(dev);
4609	struct ifnet	*ifp = adapter->ifp;
4610	u32		pmc, ctrl, ctrl_ext, rctl;
4611	u16     	status;
4612
4613	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4614		return;
4615
4616	/* Advertise the wakeup capability */
4617	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4618	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4619	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4620	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4621
4622	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4623	    (adapter->hw.mac.type == e1000_pchlan) ||
4624	    (adapter->hw.mac.type == e1000_ich9lan) ||
4625	    (adapter->hw.mac.type == e1000_ich10lan)) {
4626		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4627		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4628	}
4629
4630	/* Keep the laser running on Fiber adapters */
4631	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4632	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4633		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4635		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4636	}
4637
4638	/*
4639	** Determine type of Wakeup: note that wol
4640	** is set with all bits on by default.
4641	*/
4642	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4643		adapter->wol &= ~E1000_WUFC_MAG;
4644
4645	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4646		adapter->wol &= ~E1000_WUFC_MC;
4647	else {
4648		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4649		rctl |= E1000_RCTL_MPE;
4650		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4651	}
4652
4653	if (adapter->hw.mac.type == e1000_pchlan) {
4654		if (em_enable_phy_wakeup(adapter))
4655			return;
4656	} else {
4657		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4658		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4659	}
4660
4661	if (adapter->hw.phy.type == e1000_phy_igp_3)
4662		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4663
4664        /* Request PME */
4665        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4666	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4667	if (ifp->if_capenable & IFCAP_WOL)
4668		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4669        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4670
4671	return;
4672}
4673
4674/*
4675** WOL in the newer chipset interfaces (pchlan)
4676** require thing to be copied into the phy
4677*/
4678static int
4679em_enable_phy_wakeup(struct adapter *adapter)
4680{
4681	struct e1000_hw *hw = &adapter->hw;
4682	u32 mreg, ret = 0;
4683	u16 preg;
4684
4685	/* copy MAC RARs to PHY RARs */
4686	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4687		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4688		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4689		e1000_write_phy_reg(hw, BM_RAR_M(i),
4690		    (u16)((mreg >> 16) & 0xFFFF));
4691		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4692		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4693		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4694		    (u16)((mreg >> 16) & 0xFFFF));
4695	}
4696
4697	/* copy MAC MTA to PHY MTA */
4698	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4699		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4700		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4701		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4702		    (u16)((mreg >> 16) & 0xFFFF));
4703	}
4704
4705	/* configure PHY Rx Control register */
4706	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4707	mreg = E1000_READ_REG(hw, E1000_RCTL);
4708	if (mreg & E1000_RCTL_UPE)
4709		preg |= BM_RCTL_UPE;
4710	if (mreg & E1000_RCTL_MPE)
4711		preg |= BM_RCTL_MPE;
4712	preg &= ~(BM_RCTL_MO_MASK);
4713	if (mreg & E1000_RCTL_MO_3)
4714		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4715				<< BM_RCTL_MO_SHIFT);
4716	if (mreg & E1000_RCTL_BAM)
4717		preg |= BM_RCTL_BAM;
4718	if (mreg & E1000_RCTL_PMCF)
4719		preg |= BM_RCTL_PMCF;
4720	mreg = E1000_READ_REG(hw, E1000_CTRL);
4721	if (mreg & E1000_CTRL_RFCE)
4722		preg |= BM_RCTL_RFCE;
4723	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4724
4725	/* enable PHY wakeup in MAC register */
4726	E1000_WRITE_REG(hw, E1000_WUC,
4727	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4728	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4729
4730	/* configure and enable PHY wakeup in PHY registers */
4731	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4732	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4733
4734	/* activate PHY wakeup */
4735	ret = hw->phy.ops.acquire(hw);
4736	if (ret) {
4737		printf("Could not acquire PHY\n");
4738		return ret;
4739	}
4740	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4741	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4742	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4743	if (ret) {
4744		printf("Could not read PHY page 769\n");
4745		goto out;
4746	}
4747	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4748	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4749	if (ret)
4750		printf("Could not set PHY Host Wakeup bit\n");
4751out:
4752	hw->phy.ops.release(hw);
4753
4754	return ret;
4755}
4756
4757static void
4758em_led_func(void *arg, int onoff)
4759{
4760	struct adapter	*adapter = arg;
4761
4762	EM_CORE_LOCK(adapter);
4763	if (onoff) {
4764		e1000_setup_led(&adapter->hw);
4765		e1000_led_on(&adapter->hw);
4766	} else {
4767		e1000_led_off(&adapter->hw);
4768		e1000_cleanup_led(&adapter->hw);
4769	}
4770	EM_CORE_UNLOCK(adapter);
4771}
4772
4773/**********************************************************************
4774 *
4775 *  Update the board statistics counters.
4776 *
4777 **********************************************************************/
4778static void
4779em_update_stats_counters(struct adapter *adapter)
4780{
4781	struct ifnet   *ifp;
4782
4783	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4784	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4785		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4786		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4787	}
4788	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4789	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4790	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4791	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4792
4793	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4794	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4795	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4796	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4797	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4798	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4799	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4800	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4801	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4802	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4803	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4804	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4805	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4806	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4807	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4808	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4809	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4810	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4811	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4812	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4813
4814	/* For the 64-bit byte counters the low dword must be read first. */
4815	/* Both registers clear on the read of the high dword */
4816
4817	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4818	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4819
4820	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4821	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4822	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4823	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4824	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4825
4826	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4827	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4828
4829	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4830	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4831	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4832	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4833	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4834	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4835	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4836	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4837	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4838	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4839
4840	if (adapter->hw.mac.type >= e1000_82543) {
4841		adapter->stats.algnerrc +=
4842		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4843		adapter->stats.rxerrc +=
4844		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4845		adapter->stats.tncrs +=
4846		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4847		adapter->stats.cexterr +=
4848		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4849		adapter->stats.tsctc +=
4850		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4851		adapter->stats.tsctfc +=
4852		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4853	}
4854	ifp = adapter->ifp;
4855
4856	ifp->if_collisions = adapter->stats.colc;
4857
4858	/* Rx Errors */
4859	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4860	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4861	    adapter->stats.ruc + adapter->stats.roc +
4862	    adapter->stats.mpc + adapter->stats.cexterr;
4863
4864	/* Tx Errors */
4865	ifp->if_oerrors = adapter->stats.ecol +
4866	    adapter->stats.latecol + adapter->watchdog_events;
4867}
4868
4869
4870/**********************************************************************
4871 *
4872 *  This routine is called only when em_display_debug_stats is enabled.
4873 *  This routine provides a way to take a look at important statistics
4874 *  maintained by the driver and hardware.
4875 *
4876 **********************************************************************/
4877static void
4878em_print_debug_info(struct adapter *adapter)
4879{
4880	device_t dev = adapter->dev;
4881	u8 *hw_addr = adapter->hw.hw_addr;
4882	struct rx_ring *rxr = adapter->rx_rings;
4883	struct tx_ring *txr = adapter->tx_rings;
4884
4885	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4886	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4887	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4888	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4889	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4890	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4891	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4892	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4893	    adapter->hw.fc.high_water,
4894	    adapter->hw.fc.low_water);
4895	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4896	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4897	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4898	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4899	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4900	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4901
4902	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4903		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4904		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4905		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4906		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4907		    txr->me, txr->no_desc_avail);
4908		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4909		    txr->me, txr->tx_irq);
4910		device_printf(dev, "Num Tx descriptors avail = %d\n",
4911		    txr->tx_avail);
4912		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4913		    txr->no_desc_avail);
4914	}
4915	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4916		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4917		    rxr->me, rxr->rx_irq);
4918		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4919		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4920		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4921	}
4922	device_printf(dev, "Std mbuf failed = %ld\n",
4923	    adapter->mbuf_alloc_failed);
4924	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4925	    adapter->mbuf_cluster_failed);
4926	device_printf(dev, "Driver dropped packets = %ld\n",
4927	    adapter->dropped_pkts);
4928}
4929
4930static void
4931em_print_hw_stats(struct adapter *adapter)
4932{
4933	device_t dev = adapter->dev;
4934
4935	device_printf(dev, "Excessive collisions = %lld\n",
4936	    (long long)adapter->stats.ecol);
4937#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4938	device_printf(dev, "Symbol errors = %lld\n",
4939	    (long long)adapter->stats.symerrs);
4940#endif
4941	device_printf(dev, "Sequence errors = %lld\n",
4942	    (long long)adapter->stats.sec);
4943	device_printf(dev, "Defer count = %lld\n",
4944	    (long long)adapter->stats.dc);
4945	device_printf(dev, "Missed Packets = %lld\n",
4946	    (long long)adapter->stats.mpc);
4947	device_printf(dev, "Receive No Buffers = %lld\n",
4948	    (long long)adapter->stats.rnbc);
4949	/* RLEC is inaccurate on some hardware, calculate our own. */
4950	device_printf(dev, "Receive Length Errors = %lld\n",
4951	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4952	device_printf(dev, "Receive errors = %lld\n",
4953	    (long long)adapter->stats.rxerrc);
4954	device_printf(dev, "Crc errors = %lld\n",
4955	    (long long)adapter->stats.crcerrs);
4956	device_printf(dev, "Alignment errors = %lld\n",
4957	    (long long)adapter->stats.algnerrc);
4958	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4959	    (long long)adapter->stats.cexterr);
4960	device_printf(dev, "watchdog timeouts = %ld\n",
4961	    adapter->watchdog_events);
4962	device_printf(dev, "XON Rcvd = %lld\n",
4963	    (long long)adapter->stats.xonrxc);
4964	device_printf(dev, "XON Xmtd = %lld\n",
4965	    (long long)adapter->stats.xontxc);
4966	device_printf(dev, "XOFF Rcvd = %lld\n",
4967	    (long long)adapter->stats.xoffrxc);
4968	device_printf(dev, "XOFF Xmtd = %lld\n",
4969	    (long long)adapter->stats.xofftxc);
4970	device_printf(dev, "Good Packets Rcvd = %lld\n",
4971	    (long long)adapter->stats.gprc);
4972	device_printf(dev, "Good Packets Xmtd = %lld\n",
4973	    (long long)adapter->stats.gptc);
4974	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4975	    (long long)adapter->stats.tsctc);
4976	device_printf(dev, "TSO Contexts Failed = %lld\n",
4977	    (long long)adapter->stats.tsctfc);
4978}
4979
4980/**********************************************************************
4981 *
4982 *  This routine provides a way to dump out the adapter eeprom,
4983 *  often a useful debug/service tool. This only dumps the first
4984 *  32 words, stuff that matters is in that extent.
4985 *
4986 **********************************************************************/
4987static void
4988em_print_nvm_info(struct adapter *adapter)
4989{
4990	u16	eeprom_data;
4991	int	i, j, row = 0;
4992
4993	/* Its a bit crude, but it gets the job done */
4994	printf("\nInterface EEPROM Dump:\n");
4995	printf("Offset\n0x0000  ");
4996	for (i = 0, j = 0; i < 32; i++, j++) {
4997		if (j == 8) { /* Make the offset block */
4998			j = 0; ++row;
4999			printf("\n0x00%x0  ",row);
5000		}
5001		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5002		printf("%04x ", eeprom_data);
5003	}
5004	printf("\n");
5005}
5006
5007static int
5008em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5009{
5010	struct adapter *adapter;
5011	int error;
5012	int result;
5013
5014	result = -1;
5015	error = sysctl_handle_int(oidp, &result, 0, req);
5016
5017	if (error || !req->newptr)
5018		return (error);
5019
5020	if (result == 1) {
5021		adapter = (struct adapter *)arg1;
5022		em_print_debug_info(adapter);
5023	}
5024	/*
5025	 * This value will cause a hex dump of the
5026	 * first 32 16-bit words of the EEPROM to
5027	 * the screen.
5028	 */
5029	if (result == 2) {
5030		adapter = (struct adapter *)arg1;
5031		em_print_nvm_info(adapter);
5032        }
5033
5034	return (error);
5035}
5036
5037
5038static int
5039em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5040{
5041	struct adapter *adapter;
5042	int error;
5043	int result;
5044
5045	result = -1;
5046	error = sysctl_handle_int(oidp, &result, 0, req);
5047
5048	if (error || !req->newptr)
5049		return (error);
5050
5051	if (result == 1) {
5052		adapter = (struct adapter *)arg1;
5053		em_print_hw_stats(adapter);
5054	}
5055
5056	return (error);
5057}
5058
5059static int
5060em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5061{
5062	struct em_int_delay_info *info;
5063	struct adapter *adapter;
5064	u32 regval;
5065	int error, usecs, ticks;
5066
5067	info = (struct em_int_delay_info *)arg1;
5068	usecs = info->value;
5069	error = sysctl_handle_int(oidp, &usecs, 0, req);
5070	if (error != 0 || req->newptr == NULL)
5071		return (error);
5072	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5073		return (EINVAL);
5074	info->value = usecs;
5075	ticks = EM_USECS_TO_TICKS(usecs);
5076
5077	adapter = info->adapter;
5078
5079	EM_CORE_LOCK(adapter);
5080	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5081	regval = (regval & ~0xffff) | (ticks & 0xffff);
5082	/* Handle a few special cases. */
5083	switch (info->offset) {
5084	case E1000_RDTR:
5085		break;
5086	case E1000_TIDV:
5087		if (ticks == 0) {
5088			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5089			/* Don't write 0 into the TIDV register. */
5090			regval++;
5091		} else
5092			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5093		break;
5094	}
5095	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5096	EM_CORE_UNLOCK(adapter);
5097	return (0);
5098}
5099
5100static void
5101em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5102	const char *description, struct em_int_delay_info *info,
5103	int offset, int value)
5104{
5105	info->adapter = adapter;
5106	info->offset = offset;
5107	info->value = value;
5108	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5109	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5110	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5111	    info, 0, em_sysctl_int_delay, "I", description);
5112}
5113
5114static void
5115em_add_rx_process_limit(struct adapter *adapter, const char *name,
5116	const char *description, int *limit, int value)
5117{
5118	*limit = value;
5119	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5120	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5121	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5122}
5123
5124
5125