if_em.c revision 209238
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 209238 2010-06-16 16:37:36Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <machine/bus.h>
59#include <machine/resource.h>
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip6.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <machine/in_cksum.h>
80#include <dev/led/led.h>
81#include <dev/pci/pcivar.h>
82#include <dev/pci/pcireg.h>
83
84#include "e1000_api.h"
85#include "e1000_82571.h"
86#include "if_em.h"
87
88/*********************************************************************
89 *  Set this to one to display debug statistics
90 *********************************************************************/
91int	em_display_debug_stats = 0;
92
93/*********************************************************************
94 *  Driver version:
95 *********************************************************************/
96char em_driver_version[] = "7.0.5";
97
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	/* required last entry */
173	{ 0, 0, 0, 0, 0}
174};
175
176/*********************************************************************
177 *  Table of branding strings for all supported NICs.
178 *********************************************************************/
179
180static char *em_strings[] = {
181	"Intel(R) PRO/1000 Network Connection"
182};
183
184/*********************************************************************
185 *  Function prototypes
186 *********************************************************************/
187static int	em_probe(device_t);
188static int	em_attach(device_t);
189static int	em_detach(device_t);
190static int	em_shutdown(device_t);
191static int	em_suspend(device_t);
192static int	em_resume(device_t);
193static void	em_start(struct ifnet *);
194static void	em_start_locked(struct ifnet *, struct tx_ring *);
195#ifdef EM_MULTIQUEUE
196static int	em_mq_start(struct ifnet *, struct mbuf *);
197static int	em_mq_start_locked(struct ifnet *,
198		    struct tx_ring *, struct mbuf *);
199static void	em_qflush(struct ifnet *);
200#endif
201static int	em_ioctl(struct ifnet *, u_long, caddr_t);
202static void	em_init(void *);
203static void	em_init_locked(struct adapter *);
204static void	em_stop(void *);
205static void	em_media_status(struct ifnet *, struct ifmediareq *);
206static int	em_media_change(struct ifnet *);
207static void	em_identify_hardware(struct adapter *);
208static int	em_allocate_pci_resources(struct adapter *);
209static int	em_allocate_legacy(struct adapter *);
210static int	em_allocate_msix(struct adapter *);
211static int	em_allocate_queues(struct adapter *);
212static int	em_setup_msix(struct adapter *);
213static void	em_free_pci_resources(struct adapter *);
214static void	em_local_timer(void *);
215static void	em_reset(struct adapter *);
216static void	em_setup_interface(device_t, struct adapter *);
217
218static void	em_setup_transmit_structures(struct adapter *);
219static void	em_initialize_transmit_unit(struct adapter *);
220static int	em_allocate_transmit_buffers(struct tx_ring *);
221static void	em_free_transmit_structures(struct adapter *);
222static void	em_free_transmit_buffers(struct tx_ring *);
223
224static int	em_setup_receive_structures(struct adapter *);
225static int	em_allocate_receive_buffers(struct rx_ring *);
226static void	em_initialize_receive_unit(struct adapter *);
227static void	em_free_receive_structures(struct adapter *);
228static void	em_free_receive_buffers(struct rx_ring *);
229
230static void	em_enable_intr(struct adapter *);
231static void	em_disable_intr(struct adapter *);
232static void	em_update_stats_counters(struct adapter *);
233static bool	em_txeof(struct tx_ring *);
234static bool	em_rxeof(struct rx_ring *, int, int *);
235#ifndef __NO_STRICT_ALIGNMENT
236static int	em_fixup_rx(struct rx_ring *);
237#endif
238static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
239static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *,
240		    u32 *, u32 *);
241static bool	em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
242static void	em_set_promisc(struct adapter *);
243static void	em_disable_promisc(struct adapter *);
244static void	em_set_multi(struct adapter *);
245static void	em_print_hw_stats(struct adapter *);
246static void	em_update_link_status(struct adapter *);
247static void	em_refresh_mbufs(struct rx_ring *, int);
248static void	em_register_vlan(void *, struct ifnet *, u16);
249static void	em_unregister_vlan(void *, struct ifnet *, u16);
250static void	em_setup_vlan_hw_support(struct adapter *);
251static int	em_xmit(struct tx_ring *, struct mbuf **);
252static int	em_dma_malloc(struct adapter *, bus_size_t,
253		    struct em_dma_alloc *, int);
254static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
255static void	em_print_debug_info(struct adapter *);
256static void	em_print_nvm_info(struct adapter *);
257static int 	em_is_valid_ether_addr(u8 *);
258static int	em_sysctl_stats(SYSCTL_HANDLER_ARGS);
259static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
260static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
261static void	em_add_int_delay_sysctl(struct adapter *, const char *,
262		    const char *, struct em_int_delay_info *, int, int);
263/* Management and WOL Support */
264static void	em_init_manageability(struct adapter *);
265static void	em_release_manageability(struct adapter *);
266static void     em_get_hw_control(struct adapter *);
267static void     em_release_hw_control(struct adapter *);
268static void	em_get_wakeup(device_t);
269static void     em_enable_wakeup(device_t);
270static int	em_enable_phy_wakeup(struct adapter *);
271static void	em_led_func(void *, int);
272
273static int	em_irq_fast(void *);
274
275/* MSIX handlers */
276static void	em_msix_tx(void *);
277static void	em_msix_rx(void *);
278static void	em_msix_link(void *);
279static void	em_handle_tx(void *context, int pending);
280static void	em_handle_rx(void *context, int pending);
281static void	em_handle_link(void *context, int pending);
282
283static void	em_add_rx_process_limit(struct adapter *, const char *,
284		    const char *, int *, int);
285
286#ifdef DEVICE_POLLING
287static poll_handler_t em_poll;
288#endif /* POLLING */
289
290/*********************************************************************
291 *  FreeBSD Device Interface Entry Points
292 *********************************************************************/
293
294static device_method_t em_methods[] = {
295	/* Device interface */
296	DEVMETHOD(device_probe, em_probe),
297	DEVMETHOD(device_attach, em_attach),
298	DEVMETHOD(device_detach, em_detach),
299	DEVMETHOD(device_shutdown, em_shutdown),
300	DEVMETHOD(device_suspend, em_suspend),
301	DEVMETHOD(device_resume, em_resume),
302	{0, 0}
303};
304
305static driver_t em_driver = {
306	"em", em_methods, sizeof(struct adapter),
307};
308
309devclass_t em_devclass;
310DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
311MODULE_DEPEND(em, pci, 1, 1, 1);
312MODULE_DEPEND(em, ether, 1, 1, 1);
313
314/*********************************************************************
315 *  Tunable default values.
316 *********************************************************************/
317
318#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
319#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
320#define M_TSO_LEN			66
321
322/* Allow common code without TSO */
323#ifndef CSUM_TSO
324#define CSUM_TSO	0
325#endif
326
327static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
328static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
329TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
330TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
331
332static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
333static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
334TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
335TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
336
337static int em_rxd = EM_DEFAULT_RXD;
338static int em_txd = EM_DEFAULT_TXD;
339TUNABLE_INT("hw.em.rxd", &em_rxd);
340TUNABLE_INT("hw.em.txd", &em_txd);
341
342static int em_smart_pwr_down = FALSE;
343TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
344
345/* Controls whether promiscuous also shows bad packets */
346static int em_debug_sbp = FALSE;
347TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
348
349/* Local controls for MSI/MSIX */
350#ifdef EM_MULTIQUEUE
351static int em_enable_msix = TRUE;
352static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */
353#else
354static int em_enable_msix = FALSE;
355static int em_msix_queues = 0; /* disable */
356#endif
357TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
358TUNABLE_INT("hw.em.msix_queues", &em_msix_queues);
359
360/* How many packets rxeof tries to clean at a time */
361static int em_rx_process_limit = 100;
362TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
363
364/* Flow control setting - default to FULL */
365static int em_fc_setting = e1000_fc_full;
366TUNABLE_INT("hw.em.fc_setting", &em_fc_setting);
367
368/*
369** Shadow VFTA table, this is needed because
370** the real vlan filter table gets cleared during
371** a soft reset and the driver needs to be able
372** to repopulate it.
373*/
374static u32 em_shadow_vfta[EM_VFTA_SIZE];
375
376/* Global used in WOL setup with multiport cards */
377static int global_quad_port_a = 0;
378
379/*********************************************************************
380 *  Device identification routine
381 *
382 *  em_probe determines if the driver should be loaded on
383 *  adapter based on PCI vendor/device id of the adapter.
384 *
385 *  return BUS_PROBE_DEFAULT on success, positive on failure
386 *********************************************************************/
387
388static int
389em_probe(device_t dev)
390{
391	char		adapter_name[60];
392	u16		pci_vendor_id = 0;
393	u16		pci_device_id = 0;
394	u16		pci_subvendor_id = 0;
395	u16		pci_subdevice_id = 0;
396	em_vendor_info_t *ent;
397
398	INIT_DEBUGOUT("em_probe: begin");
399
400	pci_vendor_id = pci_get_vendor(dev);
401	if (pci_vendor_id != EM_VENDOR_ID)
402		return (ENXIO);
403
404	pci_device_id = pci_get_device(dev);
405	pci_subvendor_id = pci_get_subvendor(dev);
406	pci_subdevice_id = pci_get_subdevice(dev);
407
408	ent = em_vendor_info_array;
409	while (ent->vendor_id != 0) {
410		if ((pci_vendor_id == ent->vendor_id) &&
411		    (pci_device_id == ent->device_id) &&
412
413		    ((pci_subvendor_id == ent->subvendor_id) ||
414		    (ent->subvendor_id == PCI_ANY_ID)) &&
415
416		    ((pci_subdevice_id == ent->subdevice_id) ||
417		    (ent->subdevice_id == PCI_ANY_ID))) {
418			sprintf(adapter_name, "%s %s",
419				em_strings[ent->index],
420				em_driver_version);
421			device_set_desc_copy(dev, adapter_name);
422			return (BUS_PROBE_DEFAULT);
423		}
424		ent++;
425	}
426
427	return (ENXIO);
428}
429
430/*********************************************************************
431 *  Device initialization routine
432 *
433 *  The attach entry point is called when the driver is being loaded.
434 *  This routine identifies the type of hardware, allocates all resources
435 *  and initializes the hardware.
436 *
437 *  return 0 on success, positive on failure
438 *********************************************************************/
439
440static int
441em_attach(device_t dev)
442{
443	struct adapter	*adapter;
444	int		error = 0;
445
446	INIT_DEBUGOUT("em_attach: begin");
447
448	adapter = device_get_softc(dev);
449	adapter->dev = adapter->osdep.dev = dev;
450	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
451
452	/* SYSCTL stuff */
453	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
454	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
455	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
456	    em_sysctl_debug_info, "I", "Debug Information");
457
458	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
461	    em_sysctl_stats, "I", "Statistics");
462
463	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
464
465	/* Determine hardware and mac info */
466	em_identify_hardware(adapter);
467
468	/* Setup PCI resources */
469	if (em_allocate_pci_resources(adapter)) {
470		device_printf(dev, "Allocation of PCI resources failed\n");
471		error = ENXIO;
472		goto err_pci;
473	}
474
475	/*
476	** For ICH8 and family we need to
477	** map the flash memory, and this
478	** must happen after the MAC is
479	** identified
480	*/
481	if ((adapter->hw.mac.type == e1000_ich8lan) ||
482	    (adapter->hw.mac.type == e1000_pchlan) ||
483	    (adapter->hw.mac.type == e1000_ich9lan) ||
484	    (adapter->hw.mac.type == e1000_ich10lan)) {
485		int rid = EM_BAR_TYPE_FLASH;
486		adapter->flash = bus_alloc_resource_any(dev,
487		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
488		if (adapter->flash == NULL) {
489			device_printf(dev, "Mapping of Flash failed\n");
490			error = ENXIO;
491			goto err_pci;
492		}
493		/* This is used in the shared code */
494		adapter->hw.flash_address = (u8 *)adapter->flash;
495		adapter->osdep.flash_bus_space_tag =
496		    rman_get_bustag(adapter->flash);
497		adapter->osdep.flash_bus_space_handle =
498		    rman_get_bushandle(adapter->flash);
499	}
500
501	/* Do Shared Code initialization */
502	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
503		device_printf(dev, "Setup of Shared code failed\n");
504		error = ENXIO;
505		goto err_pci;
506	}
507
508	e1000_get_bus_info(&adapter->hw);
509
510	/* Set up some sysctls for the tunable interrupt delays */
511	em_add_int_delay_sysctl(adapter, "rx_int_delay",
512	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
513	    E1000_REGISTER(&adapter->hw, E1000_RDTR), em_rx_int_delay_dflt);
514	em_add_int_delay_sysctl(adapter, "tx_int_delay",
515	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
516	    E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt);
517	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
518	    "receive interrupt delay limit in usecs",
519	    &adapter->rx_abs_int_delay,
520	    E1000_REGISTER(&adapter->hw, E1000_RADV),
521	    em_rx_abs_int_delay_dflt);
522	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
523	    "transmit interrupt delay limit in usecs",
524	    &adapter->tx_abs_int_delay,
525	    E1000_REGISTER(&adapter->hw, E1000_TADV),
526	    em_tx_abs_int_delay_dflt);
527
528	/* Sysctls for limiting the amount of work done in the taskqueue */
529	em_add_rx_process_limit(adapter, "rx_processing_limit",
530	    "max number of rx packets to process", &adapter->rx_process_limit,
531	    em_rx_process_limit);
532
533	/*
534	 * Validate number of transmit and receive descriptors. It
535	 * must not exceed hardware maximum, and must be multiple
536	 * of E1000_DBA_ALIGN.
537	 */
538	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
539	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
540		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
541		    EM_DEFAULT_TXD, em_txd);
542		adapter->num_tx_desc = EM_DEFAULT_TXD;
543	} else
544		adapter->num_tx_desc = em_txd;
545
546	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
547	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    EM_DEFAULT_RXD, em_rxd);
550		adapter->num_rx_desc = EM_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = em_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
571
572	/*
573	 * This controls when hardware reports transmit completion
574	 * status.
575	 */
576	adapter->hw.mac.report_tx_early = 1;
577
578	/*
579	** Get queue/ring memory
580	*/
581	if (em_allocate_queues(adapter)) {
582		error = ENOMEM;
583		goto err_pci;
584	}
585
586	/*
587	** Start from a known state, this is
588	** important in reading the nvm and
589	** mac from that.
590	*/
591	e1000_reset_hw(&adapter->hw);
592
593	/* Make sure we have a good EEPROM before we read from it */
594	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
595		/*
596		** Some PCI-E parts fail the first check due to
597		** the link being in sleep state, call it again,
598		** if it fails a second time its a real issue.
599		*/
600		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
601			device_printf(dev,
602			    "The EEPROM Checksum Is Not Valid\n");
603			error = EIO;
604			goto err_late;
605		}
606	}
607
608	/* Copy the permanent MAC address out of the EEPROM */
609	if (e1000_read_mac_addr(&adapter->hw) < 0) {
610		device_printf(dev, "EEPROM read error while reading MAC"
611		    " address\n");
612		error = EIO;
613		goto err_late;
614	}
615
616	if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) {
617		device_printf(dev, "Invalid MAC address\n");
618		error = EIO;
619		goto err_late;
620	}
621
622	/*
623	**  Do interrupt configuration
624	*/
625	if (adapter->msix > 1) /* Do MSIX */
626		error = em_allocate_msix(adapter);
627	else  /* MSI or Legacy */
628		error = em_allocate_legacy(adapter);
629	if (error)
630		goto err_late;
631
632	/*
633	 * Get Wake-on-Lan and Management info for later use
634	 */
635	em_get_wakeup(dev);
636
637	/* Setup OS specific network interface */
638	em_setup_interface(dev, adapter);
639
640	em_reset(adapter);
641
642	/* Initialize statistics */
643	em_update_stats_counters(adapter);
644
645	adapter->hw.mac.get_link_status = 1;
646	em_update_link_status(adapter);
647
648	/* Indicate SOL/IDER usage */
649	if (e1000_check_reset_block(&adapter->hw))
650		device_printf(dev,
651		    "PHY reset is blocked due to SOL/IDER session.\n");
652
653	/* Register for VLAN events */
654	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
655	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
656	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
657	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
658
659	/* Non-AMT based hardware can now take control from firmware */
660	if (adapter->has_manage && !adapter->has_amt)
661		em_get_hw_control(adapter);
662
663	/* Tell the stack that the interface is not active */
664	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
665
666	adapter->led_dev = led_create(em_led_func, adapter,
667	    device_get_nameunit(dev));
668
669	INIT_DEBUGOUT("em_attach: end");
670
671	return (0);
672
673err_late:
674	em_free_transmit_structures(adapter);
675	em_free_receive_structures(adapter);
676	em_release_hw_control(adapter);
677err_pci:
678	em_free_pci_resources(adapter);
679	EM_CORE_LOCK_DESTROY(adapter);
680
681	return (error);
682}
683
684/*********************************************************************
685 *  Device removal routine
686 *
687 *  The detach entry point is called when the driver is being removed.
688 *  This routine stops the adapter and deallocates all the resources
689 *  that were allocated for driver operation.
690 *
691 *  return 0 on success, positive on failure
692 *********************************************************************/
693
694static int
695em_detach(device_t dev)
696{
697	struct adapter	*adapter = device_get_softc(dev);
698	struct ifnet	*ifp = adapter->ifp;
699
700	INIT_DEBUGOUT("em_detach: begin");
701
702	/* Make sure VLANS are not using driver */
703	if (adapter->ifp->if_vlantrunk != NULL) {
704		device_printf(dev,"Vlan in use, detach first\n");
705		return (EBUSY);
706	}
707
708#ifdef DEVICE_POLLING
709	if (ifp->if_capenable & IFCAP_POLLING)
710		ether_poll_deregister(ifp);
711#endif
712
713	if (adapter->led_dev != NULL)
714		led_destroy(adapter->led_dev);
715
716	EM_CORE_LOCK(adapter);
717	adapter->in_detach = 1;
718	em_stop(adapter);
719	EM_CORE_UNLOCK(adapter);
720	EM_CORE_LOCK_DESTROY(adapter);
721
722	e1000_phy_hw_reset(&adapter->hw);
723
724	em_release_manageability(adapter);
725	em_release_hw_control(adapter);
726
727	/* Unregister VLAN events */
728	if (adapter->vlan_attach != NULL)
729		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730	if (adapter->vlan_detach != NULL)
731		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733	ether_ifdetach(adapter->ifp);
734	callout_drain(&adapter->timer);
735
736	em_free_pci_resources(adapter);
737	bus_generic_detach(dev);
738	if_free(ifp);
739
740	em_free_transmit_structures(adapter);
741	em_free_receive_structures(adapter);
742
743	em_release_hw_control(adapter);
744
745	return (0);
746}
747
748/*********************************************************************
749 *
750 *  Shutdown entry point
751 *
752 **********************************************************************/
753
754static int
755em_shutdown(device_t dev)
756{
757	return em_suspend(dev);
758}
759
760/*
761 * Suspend/resume device methods.
762 */
763static int
764em_suspend(device_t dev)
765{
766	struct adapter *adapter = device_get_softc(dev);
767
768	EM_CORE_LOCK(adapter);
769
770        em_release_manageability(adapter);
771	em_release_hw_control(adapter);
772	em_enable_wakeup(dev);
773
774	EM_CORE_UNLOCK(adapter);
775
776	return bus_generic_suspend(dev);
777}
778
779static int
780em_resume(device_t dev)
781{
782	struct adapter *adapter = device_get_softc(dev);
783	struct ifnet *ifp = adapter->ifp;
784
785	EM_CORE_LOCK(adapter);
786	em_init_locked(adapter);
787	em_init_manageability(adapter);
788	EM_CORE_UNLOCK(adapter);
789	em_start(ifp);
790
791	return bus_generic_resume(dev);
792}
793
794
795/*********************************************************************
796 *  Transmit entry point
797 *
798 *  em_start is called by the stack to initiate a transmit.
799 *  The driver will remain in this routine as long as there are
800 *  packets to transmit and transmit resources are available.
801 *  In case resources are not available stack is notified and
802 *  the packet is requeued.
803 **********************************************************************/
804
805#ifdef EM_MULTIQUEUE
806static int
807em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
808{
809	struct adapter  *adapter = txr->adapter;
810        struct mbuf     *next;
811        int             err = 0, enq = 0;
812
813	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
814	    IFF_DRV_RUNNING || adapter->link_active == 0) {
815		if (m != NULL)
816			err = drbr_enqueue(ifp, txr->br, m);
817		return (err);
818	}
819
820        /* Call cleanup if number of TX descriptors low */
821	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
822		em_txeof(txr);
823
824	enq = 0;
825	if (m == NULL) {
826		next = drbr_dequeue(ifp, txr->br);
827	} else if (drbr_needs_enqueue(ifp, txr->br)) {
828		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
829			return (err);
830		next = drbr_dequeue(ifp, txr->br);
831	} else
832		next = m;
833
834	/* Process the queue */
835	while (next != NULL) {
836		if ((err = em_xmit(txr, &next)) != 0) {
837                        if (next != NULL)
838                                err = drbr_enqueue(ifp, txr->br, next);
839                        break;
840		}
841		enq++;
842		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
843		ETHER_BPF_MTAP(ifp, next);
844		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
845                        break;
846		if (txr->tx_avail < EM_MAX_SCATTER) {
847			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
848			break;
849		}
850		next = drbr_dequeue(ifp, txr->br);
851	}
852
853	if (enq > 0) {
854                /* Set the watchdog */
855                txr->watchdog_check = TRUE;
856		txr->watchdog_time = ticks;
857	}
858	return (err);
859}
860
861/*
862** Multiqueue capable stack interface, this is not
863** yet truely multiqueue, but that is coming...
864*/
865static int
866em_mq_start(struct ifnet *ifp, struct mbuf *m)
867{
868	struct adapter	*adapter = ifp->if_softc;
869	struct tx_ring	*txr;
870	int 		i, error = 0;
871
872	/* Which queue to use */
873	if ((m->m_flags & M_FLOWID) != 0)
874                i = m->m_pkthdr.flowid % adapter->num_queues;
875	else
876		i = curcpu % adapter->num_queues;
877
878	txr = &adapter->tx_rings[i];
879
880	if (EM_TX_TRYLOCK(txr)) {
881		error = em_mq_start_locked(ifp, txr, m);
882		EM_TX_UNLOCK(txr);
883	} else
884		error = drbr_enqueue(ifp, txr->br, m);
885
886	return (error);
887}
888
889/*
890** Flush all ring buffers
891*/
892static void
893em_qflush(struct ifnet *ifp)
894{
895	struct adapter  *adapter = ifp->if_softc;
896	struct tx_ring  *txr = adapter->tx_rings;
897	struct mbuf     *m;
898
899	for (int i = 0; i < adapter->num_queues; i++, txr++) {
900		EM_TX_LOCK(txr);
901		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
902			m_freem(m);
903		EM_TX_UNLOCK(txr);
904	}
905	if_qflush(ifp);
906}
907
908#endif /* EM_MULTIQUEUE */
909
910static void
911em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
912{
913	struct adapter	*adapter = ifp->if_softc;
914	struct mbuf	*m_head;
915
916	EM_TX_LOCK_ASSERT(txr);
917
918	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
919	    IFF_DRV_RUNNING)
920		return;
921
922	if (!adapter->link_active)
923		return;
924
925        /* Call cleanup if number of TX descriptors low */
926	if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
927		em_txeof(txr);
928
929	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
930		if (txr->tx_avail < EM_MAX_SCATTER) {
931			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
932			break;
933		}
934                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
935		if (m_head == NULL)
936			break;
937		/*
938		 *  Encapsulation can modify our pointer, and or make it
939		 *  NULL on failure.  In that event, we can't requeue.
940		 */
941		if (em_xmit(txr, &m_head)) {
942			if (m_head == NULL)
943				break;
944			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
945			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
946			break;
947		}
948
949		/* Send a copy of the frame to the BPF listener */
950		ETHER_BPF_MTAP(ifp, m_head);
951
952		/* Set timeout in case hardware has problems transmitting. */
953		txr->watchdog_time = ticks;
954		txr->watchdog_check = TRUE;
955	}
956
957	return;
958}
959
960static void
961em_start(struct ifnet *ifp)
962{
963	struct adapter	*adapter = ifp->if_softc;
964	struct tx_ring	*txr = adapter->tx_rings;
965
966	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
967		EM_TX_LOCK(txr);
968		em_start_locked(ifp, txr);
969		EM_TX_UNLOCK(txr);
970	}
971	return;
972}
973
974/*********************************************************************
975 *  Ioctl entry point
976 *
977 *  em_ioctl is called when the user wants to configure the
978 *  interface.
979 *
980 *  return 0 on success, positive on failure
981 **********************************************************************/
982
983static int
984em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
985{
986	struct adapter	*adapter = ifp->if_softc;
987	struct ifreq *ifr = (struct ifreq *)data;
988#ifdef INET
989	struct ifaddr *ifa = (struct ifaddr *)data;
990#endif
991	int error = 0;
992
993	if (adapter->in_detach)
994		return (error);
995
996	switch (command) {
997	case SIOCSIFADDR:
998#ifdef INET
999		if (ifa->ifa_addr->sa_family == AF_INET) {
1000			/*
1001			 * XXX
1002			 * Since resetting hardware takes a very long time
1003			 * and results in link renegotiation we only
1004			 * initialize the hardware only when it is absolutely
1005			 * required.
1006			 */
1007			ifp->if_flags |= IFF_UP;
1008			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1009				EM_CORE_LOCK(adapter);
1010				em_init_locked(adapter);
1011				EM_CORE_UNLOCK(adapter);
1012			}
1013			arp_ifinit(ifp, ifa);
1014		} else
1015#endif
1016			error = ether_ioctl(ifp, command, data);
1017		break;
1018	case SIOCSIFMTU:
1019	    {
1020		int max_frame_size;
1021
1022		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1023
1024		EM_CORE_LOCK(adapter);
1025		switch (adapter->hw.mac.type) {
1026		case e1000_82571:
1027		case e1000_82572:
1028		case e1000_ich9lan:
1029		case e1000_ich10lan:
1030		case e1000_82574:
1031		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1032			max_frame_size = 9234;
1033			break;
1034		case e1000_pchlan:
1035			max_frame_size = 4096;
1036			break;
1037			/* Adapters that do not support jumbo frames */
1038		case e1000_82583:
1039		case e1000_ich8lan:
1040			max_frame_size = ETHER_MAX_LEN;
1041			break;
1042		default:
1043			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1044		}
1045		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1046		    ETHER_CRC_LEN) {
1047			EM_CORE_UNLOCK(adapter);
1048			error = EINVAL;
1049			break;
1050		}
1051
1052		ifp->if_mtu = ifr->ifr_mtu;
1053		adapter->max_frame_size =
1054		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1055		em_init_locked(adapter);
1056		EM_CORE_UNLOCK(adapter);
1057		break;
1058	    }
1059	case SIOCSIFFLAGS:
1060		IOCTL_DEBUGOUT("ioctl rcv'd:\
1061		    SIOCSIFFLAGS (Set Interface Flags)");
1062		EM_CORE_LOCK(adapter);
1063		if (ifp->if_flags & IFF_UP) {
1064			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1065				if ((ifp->if_flags ^ adapter->if_flags) &
1066				    (IFF_PROMISC | IFF_ALLMULTI)) {
1067					em_disable_promisc(adapter);
1068					em_set_promisc(adapter);
1069				}
1070			} else
1071				em_init_locked(adapter);
1072		} else
1073			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1074				em_stop(adapter);
1075		adapter->if_flags = ifp->if_flags;
1076		EM_CORE_UNLOCK(adapter);
1077		break;
1078	case SIOCADDMULTI:
1079	case SIOCDELMULTI:
1080		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1081		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1082			EM_CORE_LOCK(adapter);
1083			em_disable_intr(adapter);
1084			em_set_multi(adapter);
1085#ifdef DEVICE_POLLING
1086			if (!(ifp->if_capenable & IFCAP_POLLING))
1087#endif
1088				em_enable_intr(adapter);
1089			EM_CORE_UNLOCK(adapter);
1090		}
1091		break;
1092	case SIOCSIFMEDIA:
1093		/* Check SOL/IDER usage */
1094		EM_CORE_LOCK(adapter);
1095		if (e1000_check_reset_block(&adapter->hw)) {
1096			EM_CORE_UNLOCK(adapter);
1097			device_printf(adapter->dev, "Media change is"
1098			    " blocked due to SOL/IDER session.\n");
1099			break;
1100		}
1101		EM_CORE_UNLOCK(adapter);
1102	case SIOCGIFMEDIA:
1103		IOCTL_DEBUGOUT("ioctl rcv'd: \
1104		    SIOCxIFMEDIA (Get/Set Interface Media)");
1105		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1106		break;
1107	case SIOCSIFCAP:
1108	    {
1109		int mask, reinit;
1110
1111		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1112		reinit = 0;
1113		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1114#ifdef DEVICE_POLLING
1115		if (mask & IFCAP_POLLING) {
1116			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1117				error = ether_poll_register(em_poll, ifp);
1118				if (error)
1119					return (error);
1120				EM_CORE_LOCK(adapter);
1121				em_disable_intr(adapter);
1122				ifp->if_capenable |= IFCAP_POLLING;
1123				EM_CORE_UNLOCK(adapter);
1124			} else {
1125				error = ether_poll_deregister(ifp);
1126				/* Enable interrupt even in error case */
1127				EM_CORE_LOCK(adapter);
1128				em_enable_intr(adapter);
1129				ifp->if_capenable &= ~IFCAP_POLLING;
1130				EM_CORE_UNLOCK(adapter);
1131			}
1132		}
1133#endif
1134		if (mask & IFCAP_HWCSUM) {
1135			ifp->if_capenable ^= IFCAP_HWCSUM;
1136			reinit = 1;
1137		}
1138		if (mask & IFCAP_TSO4) {
1139			ifp->if_capenable ^= IFCAP_TSO4;
1140			reinit = 1;
1141		}
1142		if (mask & IFCAP_VLAN_HWTAGGING) {
1143			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1144			reinit = 1;
1145		}
1146		if (mask & IFCAP_VLAN_HWFILTER) {
1147			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1148			reinit = 1;
1149		}
1150		if ((mask & IFCAP_WOL) &&
1151		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1152			if (mask & IFCAP_WOL_MCAST)
1153				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1154			if (mask & IFCAP_WOL_MAGIC)
1155				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1156		}
1157		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1158			em_init(adapter);
1159		VLAN_CAPABILITIES(ifp);
1160		break;
1161	    }
1162
1163	default:
1164		error = ether_ioctl(ifp, command, data);
1165		break;
1166	}
1167
1168	return (error);
1169}
1170
1171
1172/*********************************************************************
1173 *  Init entry point
1174 *
1175 *  This routine is used in two ways. It is used by the stack as
1176 *  init entry point in network interface structure. It is also used
1177 *  by the driver as a hw/sw initialization routine to get to a
1178 *  consistent state.
1179 *
1180 *  return 0 on success, positive on failure
1181 **********************************************************************/
1182
1183static void
1184em_init_locked(struct adapter *adapter)
1185{
1186	struct ifnet	*ifp = adapter->ifp;
1187	device_t	dev = adapter->dev;
1188	u32		pba;
1189
1190	INIT_DEBUGOUT("em_init: begin");
1191
1192	EM_CORE_LOCK_ASSERT(adapter);
1193
1194	em_disable_intr(adapter);
1195	callout_stop(&adapter->timer);
1196
1197	/*
1198	 * Packet Buffer Allocation (PBA)
1199	 * Writing PBA sets the receive portion of the buffer
1200	 * the remainder is used for the transmit buffer.
1201	 */
1202	switch (adapter->hw.mac.type) {
1203	/* Total Packet Buffer on these is 48K */
1204	case e1000_82571:
1205	case e1000_82572:
1206	case e1000_80003es2lan:
1207			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1208		break;
1209	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
1210			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
1211		break;
1212	case e1000_82574:
1213	case e1000_82583:
1214			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
1215		break;
1216	case e1000_ich9lan:
1217	case e1000_ich10lan:
1218	case e1000_pchlan:
1219		pba = E1000_PBA_10K;
1220		break;
1221	case e1000_ich8lan:
1222		pba = E1000_PBA_8K;
1223		break;
1224	default:
1225		if (adapter->max_frame_size > 8192)
1226			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1227		else
1228			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1229	}
1230
1231	INIT_DEBUGOUT1("em_init: pba=%dK",pba);
1232	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1233
1234	/* Get the latest mac address, User can use a LAA */
1235        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1236              ETHER_ADDR_LEN);
1237
1238	/* Put the address into the Receive Address Array */
1239	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1240
1241	/*
1242	 * With the 82571 adapter, RAR[0] may be overwritten
1243	 * when the other port is reset, we make a duplicate
1244	 * in RAR[14] for that eventuality, this assures
1245	 * the interface continues to function.
1246	 */
1247	if (adapter->hw.mac.type == e1000_82571) {
1248		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1249		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1250		    E1000_RAR_ENTRIES - 1);
1251	}
1252
1253	/* Initialize the hardware */
1254	em_reset(adapter);
1255	em_update_link_status(adapter);
1256
1257	/* Setup VLAN support, basic and offload if available */
1258	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1259
1260	/* Use real VLAN Filter support? */
1261	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1262		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1263			/* Use real VLAN Filter support */
1264			em_setup_vlan_hw_support(adapter);
1265		else {
1266			u32 ctrl;
1267			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1268			ctrl |= E1000_CTRL_VME;
1269			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1270		}
1271	}
1272
1273	/* Set hardware offload abilities */
1274	ifp->if_hwassist = 0;
1275	if (ifp->if_capenable & IFCAP_TXCSUM)
1276		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1277	if (ifp->if_capenable & IFCAP_TSO4)
1278		ifp->if_hwassist |= CSUM_TSO;
1279
1280	/* Configure for OS presence */
1281	em_init_manageability(adapter);
1282
1283	/* Prepare transmit descriptors and buffers */
1284	em_setup_transmit_structures(adapter);
1285	em_initialize_transmit_unit(adapter);
1286
1287	/* Setup Multicast table */
1288	em_set_multi(adapter);
1289
1290	/* Prepare receive descriptors and buffers */
1291	if (em_setup_receive_structures(adapter)) {
1292		device_printf(dev, "Could not setup receive structures\n");
1293		em_stop(adapter);
1294		return;
1295	}
1296	em_initialize_receive_unit(adapter);
1297
1298	/* Don't lose promiscuous settings */
1299	em_set_promisc(adapter);
1300
1301	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1302	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1303
1304	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1305	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1306
1307	/* MSI/X configuration for 82574 */
1308	if (adapter->hw.mac.type == e1000_82574) {
1309		int tmp;
1310		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1311		tmp |= E1000_CTRL_EXT_PBA_CLR;
1312		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1313		/* Set the IVAR - interrupt vector routing. */
1314		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1315	}
1316
1317#ifdef DEVICE_POLLING
1318	/*
1319	 * Only enable interrupts if we are not polling, make sure
1320	 * they are off otherwise.
1321	 */
1322	if (ifp->if_capenable & IFCAP_POLLING)
1323		em_disable_intr(adapter);
1324	else
1325#endif /* DEVICE_POLLING */
1326		em_enable_intr(adapter);
1327
1328	/* AMT based hardware can now take control from firmware */
1329	if (adapter->has_manage && adapter->has_amt)
1330		em_get_hw_control(adapter);
1331
1332	/* Don't reset the phy next time init gets called */
1333	adapter->hw.phy.reset_disable = TRUE;
1334}
1335
1336static void
1337em_init(void *arg)
1338{
1339	struct adapter *adapter = arg;
1340
1341	EM_CORE_LOCK(adapter);
1342	em_init_locked(adapter);
1343	EM_CORE_UNLOCK(adapter);
1344}
1345
1346
1347#ifdef DEVICE_POLLING
1348/*********************************************************************
1349 *
1350 *  Legacy polling routine: note this only works with single queue
1351 *
1352 *********************************************************************/
1353static int
1354em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1355{
1356	struct adapter *adapter = ifp->if_softc;
1357	struct tx_ring	*txr = adapter->tx_rings;
1358	struct rx_ring	*rxr = adapter->rx_rings;
1359	u32		reg_icr;
1360	int		rx_done;
1361
1362	EM_CORE_LOCK(adapter);
1363	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1364		EM_CORE_UNLOCK(adapter);
1365		return (0);
1366	}
1367
1368	if (cmd == POLL_AND_CHECK_STATUS) {
1369		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1370		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1371			callout_stop(&adapter->timer);
1372			adapter->hw.mac.get_link_status = 1;
1373			em_update_link_status(adapter);
1374			callout_reset(&adapter->timer, hz,
1375			    em_local_timer, adapter);
1376		}
1377	}
1378	EM_CORE_UNLOCK(adapter);
1379
1380	em_rxeof(rxr, count, &rx_done);
1381
1382	EM_TX_LOCK(txr);
1383	em_txeof(txr);
1384#ifdef EM_MULTIQUEUE
1385	if (!drbr_empty(ifp, txr->br))
1386		em_mq_start_locked(ifp, txr, NULL);
1387#else
1388	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1389		em_start_locked(ifp, txr);
1390#endif
1391	EM_TX_UNLOCK(txr);
1392
1393	return (rx_done);
1394}
1395#endif /* DEVICE_POLLING */
1396
1397
1398/*********************************************************************
1399 *
1400 *  Fast Legacy/MSI Combined Interrupt Service routine
1401 *
1402 *********************************************************************/
1403static int
1404em_irq_fast(void *arg)
1405{
1406	struct adapter	*adapter = arg;
1407	struct ifnet	*ifp;
1408	u32		reg_icr;
1409
1410	ifp = adapter->ifp;
1411
1412	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1413
1414	/* Hot eject?  */
1415	if (reg_icr == 0xffffffff)
1416		return FILTER_STRAY;
1417
1418	/* Definitely not our interrupt.  */
1419	if (reg_icr == 0x0)
1420		return FILTER_STRAY;
1421
1422	/*
1423	 * Starting with the 82571 chip, bit 31 should be used to
1424	 * determine whether the interrupt belongs to us.
1425	 */
1426	if (adapter->hw.mac.type >= e1000_82571 &&
1427	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1428		return FILTER_STRAY;
1429
1430	em_disable_intr(adapter);
1431	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1432
1433	/* Link status change */
1434	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1435		adapter->hw.mac.get_link_status = 1;
1436		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1437	}
1438
1439	if (reg_icr & E1000_ICR_RXO)
1440		adapter->rx_overruns++;
1441	return FILTER_HANDLED;
1442}
1443
1444/* Combined RX/TX handler, used by Legacy and MSI */
1445static void
1446em_handle_que(void *context, int pending)
1447{
1448	struct adapter	*adapter = context;
1449	struct ifnet	*ifp = adapter->ifp;
1450	struct tx_ring	*txr = adapter->tx_rings;
1451	struct rx_ring	*rxr = adapter->rx_rings;
1452	bool		more;
1453
1454
1455	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1456		more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1457
1458		EM_TX_LOCK(txr);
1459		if (em_txeof(txr))
1460			more = TRUE;
1461#ifdef EM_MULTIQUEUE
1462		if (!drbr_empty(ifp, txr->br))
1463			em_mq_start_locked(ifp, txr, NULL);
1464#else
1465		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1466			em_start_locked(ifp, txr);
1467#endif
1468		EM_TX_UNLOCK(txr);
1469		if (more) {
1470			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1471			return;
1472		}
1473	}
1474
1475	em_enable_intr(adapter);
1476	return;
1477}
1478
1479
1480/*********************************************************************
1481 *
1482 *  MSIX Interrupt Service Routines
1483 *
1484 **********************************************************************/
1485static void
1486em_msix_tx(void *arg)
1487{
1488	struct tx_ring *txr = arg;
1489	struct adapter *adapter = txr->adapter;
1490	bool		more;
1491
1492	++txr->tx_irq;
1493	EM_TX_LOCK(txr);
1494	more = em_txeof(txr);
1495	EM_TX_UNLOCK(txr);
1496	if (more)
1497		taskqueue_enqueue(txr->tq, &txr->tx_task);
1498	else
1499		/* Reenable this interrupt */
1500		E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1501	return;
1502}
1503
1504/*********************************************************************
1505 *
1506 *  MSIX RX Interrupt Service routine
1507 *
1508 **********************************************************************/
1509
1510static void
1511em_msix_rx(void *arg)
1512{
1513	struct rx_ring	*rxr = arg;
1514	struct adapter	*adapter = rxr->adapter;
1515	bool		more;
1516
1517	++rxr->rx_irq;
1518	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1519	if (more)
1520		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1521	else
1522		/* Reenable this interrupt */
1523		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1524	return;
1525}
1526
1527/*********************************************************************
1528 *
1529 *  MSIX Link Fast Interrupt Service routine
1530 *
1531 **********************************************************************/
1532static void
1533em_msix_link(void *arg)
1534{
1535	struct adapter	*adapter = arg;
1536	u32		reg_icr;
1537
1538	++adapter->link_irq;
1539	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1540
1541	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1542		adapter->hw.mac.get_link_status = 1;
1543		em_handle_link(adapter, 0);
1544	} else
1545		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1546		    EM_MSIX_LINK | E1000_IMS_LSC);
1547	return;
1548}
1549
1550static void
1551em_handle_rx(void *context, int pending)
1552{
1553	struct rx_ring	*rxr = context;
1554	struct adapter	*adapter = rxr->adapter;
1555        bool            more;
1556
1557	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1558	if (more)
1559		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1560	else
1561		/* Reenable this interrupt */
1562		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1563}
1564
1565static void
1566em_handle_tx(void *context, int pending)
1567{
1568	struct tx_ring	*txr = context;
1569	struct adapter	*adapter = txr->adapter;
1570	struct ifnet	*ifp = adapter->ifp;
1571
1572	if (!EM_TX_TRYLOCK(txr))
1573		return;
1574
1575	em_txeof(txr);
1576
1577#ifdef EM_MULTIQUEUE
1578	if (!drbr_empty(ifp, txr->br))
1579		em_mq_start_locked(ifp, txr, NULL);
1580#else
1581	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1582		em_start_locked(ifp, txr);
1583#endif
1584	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1585	EM_TX_UNLOCK(txr);
1586}
1587
1588static void
1589em_handle_link(void *context, int pending)
1590{
1591	struct adapter	*adapter = context;
1592	struct ifnet *ifp = adapter->ifp;
1593
1594	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1595		return;
1596
1597	EM_CORE_LOCK(adapter);
1598	callout_stop(&adapter->timer);
1599	em_update_link_status(adapter);
1600	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1601	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1602	    EM_MSIX_LINK | E1000_IMS_LSC);
1603	EM_CORE_UNLOCK(adapter);
1604}
1605
1606
1607/*********************************************************************
1608 *
1609 *  Media Ioctl callback
1610 *
1611 *  This routine is called whenever the user queries the status of
1612 *  the interface using ifconfig.
1613 *
1614 **********************************************************************/
1615static void
1616em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1617{
1618	struct adapter *adapter = ifp->if_softc;
1619	u_char fiber_type = IFM_1000_SX;
1620
1621	INIT_DEBUGOUT("em_media_status: begin");
1622
1623	EM_CORE_LOCK(adapter);
1624	em_update_link_status(adapter);
1625
1626	ifmr->ifm_status = IFM_AVALID;
1627	ifmr->ifm_active = IFM_ETHER;
1628
1629	if (!adapter->link_active) {
1630		EM_CORE_UNLOCK(adapter);
1631		return;
1632	}
1633
1634	ifmr->ifm_status |= IFM_ACTIVE;
1635
1636	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1637	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1638		ifmr->ifm_active |= fiber_type | IFM_FDX;
1639	} else {
1640		switch (adapter->link_speed) {
1641		case 10:
1642			ifmr->ifm_active |= IFM_10_T;
1643			break;
1644		case 100:
1645			ifmr->ifm_active |= IFM_100_TX;
1646			break;
1647		case 1000:
1648			ifmr->ifm_active |= IFM_1000_T;
1649			break;
1650		}
1651		if (adapter->link_duplex == FULL_DUPLEX)
1652			ifmr->ifm_active |= IFM_FDX;
1653		else
1654			ifmr->ifm_active |= IFM_HDX;
1655	}
1656	EM_CORE_UNLOCK(adapter);
1657}
1658
1659/*********************************************************************
1660 *
1661 *  Media Ioctl callback
1662 *
1663 *  This routine is called when the user changes speed/duplex using
1664 *  media/mediopt option with ifconfig.
1665 *
1666 **********************************************************************/
1667static int
1668em_media_change(struct ifnet *ifp)
1669{
1670	struct adapter *adapter = ifp->if_softc;
1671	struct ifmedia  *ifm = &adapter->media;
1672
1673	INIT_DEBUGOUT("em_media_change: begin");
1674
1675	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1676		return (EINVAL);
1677
1678	EM_CORE_LOCK(adapter);
1679	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1680	case IFM_AUTO:
1681		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1682		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1683		break;
1684	case IFM_1000_LX:
1685	case IFM_1000_SX:
1686	case IFM_1000_T:
1687		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1688		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1689		break;
1690	case IFM_100_TX:
1691		adapter->hw.mac.autoneg = FALSE;
1692		adapter->hw.phy.autoneg_advertised = 0;
1693		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1694			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1695		else
1696			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1697		break;
1698	case IFM_10_T:
1699		adapter->hw.mac.autoneg = FALSE;
1700		adapter->hw.phy.autoneg_advertised = 0;
1701		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1702			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1703		else
1704			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1705		break;
1706	default:
1707		device_printf(adapter->dev, "Unsupported media type\n");
1708	}
1709
1710	/* As the speed/duplex settings my have changed we need to
1711	 * reset the PHY.
1712	 */
1713	adapter->hw.phy.reset_disable = FALSE;
1714
1715	em_init_locked(adapter);
1716	EM_CORE_UNLOCK(adapter);
1717
1718	return (0);
1719}
1720
1721/*********************************************************************
1722 *
1723 *  This routine maps the mbufs to tx descriptors.
1724 *
1725 *  return 0 on success, positive on failure
1726 **********************************************************************/
1727
1728static int
1729em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1730{
1731	struct adapter		*adapter = txr->adapter;
1732	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1733	bus_dmamap_t		map;
1734	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1735	struct e1000_tx_desc	*ctxd = NULL;
1736	struct mbuf		*m_head;
1737	u32			txd_upper, txd_lower, txd_used, txd_saved;
1738	int			nsegs, i, j, first, last = 0;
1739	int			error, do_tso, tso_desc = 0;
1740
1741	m_head = *m_headp;
1742	txd_upper = txd_lower = txd_used = txd_saved = 0;
1743	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1744
1745	/*
1746	 * TSO workaround:
1747	 *  If an mbuf is only header we need
1748	 *     to pull 4 bytes of data into it.
1749	 */
1750	if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
1751		m_head = m_pullup(m_head, M_TSO_LEN + 4);
1752		*m_headp = m_head;
1753		if (m_head == NULL)
1754			return (ENOBUFS);
1755	}
1756
1757	/*
1758	 * Map the packet for DMA
1759	 *
1760	 * Capture the first descriptor index,
1761	 * this descriptor will have the index
1762	 * of the EOP which is the only one that
1763	 * now gets a DONE bit writeback.
1764	 */
1765	first = txr->next_avail_desc;
1766	tx_buffer = &txr->tx_buffers[first];
1767	tx_buffer_mapped = tx_buffer;
1768	map = tx_buffer->map;
1769
1770	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1771	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1772
1773	/*
1774	 * There are two types of errors we can (try) to handle:
1775	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1776	 *   out of segments.  Defragment the mbuf chain and try again.
1777	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1778	 *   at this point in time.  Defer sending and try again later.
1779	 * All other errors, in particular EINVAL, are fatal and prevent the
1780	 * mbuf chain from ever going through.  Drop it and report error.
1781	 */
1782	if (error == EFBIG) {
1783		struct mbuf *m;
1784
1785		m = m_defrag(*m_headp, M_DONTWAIT);
1786		if (m == NULL) {
1787			adapter->mbuf_alloc_failed++;
1788			m_freem(*m_headp);
1789			*m_headp = NULL;
1790			return (ENOBUFS);
1791		}
1792		*m_headp = m;
1793
1794		/* Try it again */
1795		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1796		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1797
1798		if (error) {
1799			adapter->no_tx_dma_setup++;
1800			m_freem(*m_headp);
1801			*m_headp = NULL;
1802			return (error);
1803		}
1804	} else if (error != 0) {
1805		adapter->no_tx_dma_setup++;
1806		return (error);
1807	}
1808
1809	/*
1810	 * TSO Hardware workaround, if this packet is not
1811	 * TSO, and is only a single descriptor long, and
1812	 * it follows a TSO burst, then we need to add a
1813	 * sentinel descriptor to prevent premature writeback.
1814	 */
1815	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1816		if (nsegs == 1)
1817			tso_desc = TRUE;
1818		txr->tx_tso = FALSE;
1819	}
1820
1821        if (nsegs > (txr->tx_avail - 2)) {
1822                txr->no_desc_avail++;
1823		bus_dmamap_unload(txr->txtag, map);
1824		return (ENOBUFS);
1825        }
1826	m_head = *m_headp;
1827
1828	/* Do hardware assists */
1829#if __FreeBSD_version >= 700000
1830	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1831		error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower);
1832		if (error != TRUE)
1833			return (ENXIO); /* something foobar */
1834		/* we need to make a final sentinel transmit desc */
1835		tso_desc = TRUE;
1836	} else
1837#endif
1838	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1839		em_transmit_checksum_setup(txr,  m_head,
1840		    &txd_upper, &txd_lower);
1841
1842	i = txr->next_avail_desc;
1843
1844	/* Set up our transmit descriptors */
1845	for (j = 0; j < nsegs; j++) {
1846		bus_size_t seg_len;
1847		bus_addr_t seg_addr;
1848
1849		tx_buffer = &txr->tx_buffers[i];
1850		ctxd = &txr->tx_base[i];
1851		seg_addr = segs[j].ds_addr;
1852		seg_len  = segs[j].ds_len;
1853		/*
1854		** TSO Workaround:
1855		** If this is the last descriptor, we want to
1856		** split it so we have a small final sentinel
1857		*/
1858		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
1859			seg_len -= 4;
1860			ctxd->buffer_addr = htole64(seg_addr);
1861			ctxd->lower.data = htole32(
1862			adapter->txd_cmd | txd_lower | seg_len);
1863			ctxd->upper.data =
1864			    htole32(txd_upper);
1865			if (++i == adapter->num_tx_desc)
1866				i = 0;
1867			/* Now make the sentinel */
1868			++txd_used; /* using an extra txd */
1869			ctxd = &txr->tx_base[i];
1870			tx_buffer = &txr->tx_buffers[i];
1871			ctxd->buffer_addr =
1872			    htole64(seg_addr + seg_len);
1873			ctxd->lower.data = htole32(
1874			adapter->txd_cmd | txd_lower | 4);
1875			ctxd->upper.data =
1876			    htole32(txd_upper);
1877			last = i;
1878			if (++i == adapter->num_tx_desc)
1879				i = 0;
1880		} else {
1881			ctxd->buffer_addr = htole64(seg_addr);
1882			ctxd->lower.data = htole32(
1883			adapter->txd_cmd | txd_lower | seg_len);
1884			ctxd->upper.data =
1885			    htole32(txd_upper);
1886			last = i;
1887			if (++i == adapter->num_tx_desc)
1888				i = 0;
1889		}
1890		tx_buffer->m_head = NULL;
1891		tx_buffer->next_eop = -1;
1892	}
1893
1894	txr->next_avail_desc = i;
1895	txr->tx_avail -= nsegs;
1896	if (tso_desc) /* TSO used an extra for sentinel */
1897		txr->tx_avail -= txd_used;
1898
1899	if (m_head->m_flags & M_VLANTAG) {
1900		/* Set the vlan id. */
1901		ctxd->upper.fields.special =
1902		    htole16(m_head->m_pkthdr.ether_vtag);
1903                /* Tell hardware to add tag */
1904                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1905        }
1906
1907        tx_buffer->m_head = m_head;
1908	tx_buffer_mapped->map = tx_buffer->map;
1909	tx_buffer->map = map;
1910        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1911
1912        /*
1913         * Last Descriptor of Packet
1914	 * needs End Of Packet (EOP)
1915	 * and Report Status (RS)
1916         */
1917        ctxd->lower.data |=
1918	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1919	/*
1920	 * Keep track in the first buffer which
1921	 * descriptor will be written back
1922	 */
1923	tx_buffer = &txr->tx_buffers[first];
1924	tx_buffer->next_eop = last;
1925
1926	/*
1927	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1928	 * that this frame is available to transmit.
1929	 */
1930	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1931	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1932	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1933
1934	return (0);
1935}
1936
1937static void
1938em_set_promisc(struct adapter *adapter)
1939{
1940	struct ifnet	*ifp = adapter->ifp;
1941	u32		reg_rctl;
1942
1943	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1944
1945	if (ifp->if_flags & IFF_PROMISC) {
1946		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1947		/* Turn this on if you want to see bad packets */
1948		if (em_debug_sbp)
1949			reg_rctl |= E1000_RCTL_SBP;
1950		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1951	} else if (ifp->if_flags & IFF_ALLMULTI) {
1952		reg_rctl |= E1000_RCTL_MPE;
1953		reg_rctl &= ~E1000_RCTL_UPE;
1954		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1955	}
1956}
1957
1958static void
1959em_disable_promisc(struct adapter *adapter)
1960{
1961	u32	reg_rctl;
1962
1963	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1964
1965	reg_rctl &=  (~E1000_RCTL_UPE);
1966	reg_rctl &=  (~E1000_RCTL_MPE);
1967	reg_rctl &=  (~E1000_RCTL_SBP);
1968	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1969}
1970
1971
1972/*********************************************************************
1973 *  Multicast Update
1974 *
1975 *  This routine is called whenever multicast address list is updated.
1976 *
1977 **********************************************************************/
1978
1979static void
1980em_set_multi(struct adapter *adapter)
1981{
1982	struct ifnet	*ifp = adapter->ifp;
1983	struct ifmultiaddr *ifma;
1984	u32 reg_rctl = 0;
1985	u8  *mta; /* Multicast array memory */
1986	int mcnt = 0;
1987
1988	IOCTL_DEBUGOUT("em_set_multi: begin");
1989
1990	if (adapter->hw.mac.type == e1000_82542 &&
1991	    adapter->hw.revision_id == E1000_REVISION_2) {
1992		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1993		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
1994			e1000_pci_clear_mwi(&adapter->hw);
1995		reg_rctl |= E1000_RCTL_RST;
1996		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1997		msec_delay(5);
1998	}
1999
2000	/* Allocate temporary memory to setup array */
2001	mta = malloc(sizeof(u8) *
2002	    (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES),
2003	    M_DEVBUF, M_NOWAIT | M_ZERO);
2004	if (mta == NULL)
2005		panic("em_set_multi memory failure\n");
2006
2007#if __FreeBSD_version < 800000
2008	IF_ADDR_LOCK(ifp);
2009#else
2010	if_maddr_rlock(ifp);
2011#endif
2012	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2013		if (ifma->ifma_addr->sa_family != AF_LINK)
2014			continue;
2015
2016		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2017			break;
2018
2019		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2020		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2021		mcnt++;
2022	}
2023#if __FreeBSD_version < 800000
2024	IF_ADDR_UNLOCK(ifp);
2025#else
2026	if_maddr_runlock(ifp);
2027#endif
2028	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2029		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2030		reg_rctl |= E1000_RCTL_MPE;
2031		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2032	} else
2033		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2034
2035	if (adapter->hw.mac.type == e1000_82542 &&
2036	    adapter->hw.revision_id == E1000_REVISION_2) {
2037		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2038		reg_rctl &= ~E1000_RCTL_RST;
2039		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2040		msec_delay(5);
2041		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2042			e1000_pci_set_mwi(&adapter->hw);
2043	}
2044	free(mta, M_DEVBUF);
2045}
2046
2047
2048/*********************************************************************
2049 *  Timer routine
2050 *
2051 *  This routine checks for link status and updates statistics.
2052 *
2053 **********************************************************************/
2054
2055static void
2056em_local_timer(void *arg)
2057{
2058	struct adapter	*adapter = arg;
2059	struct ifnet	*ifp = adapter->ifp;
2060	struct tx_ring	*txr = adapter->tx_rings;
2061
2062	EM_CORE_LOCK_ASSERT(adapter);
2063
2064	em_update_link_status(adapter);
2065	em_update_stats_counters(adapter);
2066
2067	/* Reset LAA into RAR[0] on 82571 */
2068	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
2069		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2070
2071	if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
2072		em_print_hw_stats(adapter);
2073
2074	/*
2075	** Check for time since any descriptor was cleaned
2076	*/
2077	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2078		EM_TX_LOCK(txr);
2079		if (txr->watchdog_check == FALSE) {
2080			EM_TX_UNLOCK(txr);
2081			continue;
2082		}
2083		if ((ticks - txr->watchdog_time) > EM_WATCHDOG)
2084			goto hung;
2085		EM_TX_UNLOCK(txr);
2086	}
2087
2088	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2089	return;
2090hung:
2091	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2092	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2093	adapter->watchdog_events++;
2094	EM_TX_UNLOCK(txr);
2095	em_init_locked(adapter);
2096}
2097
2098
2099static void
2100em_update_link_status(struct adapter *adapter)
2101{
2102	struct e1000_hw *hw = &adapter->hw;
2103	struct ifnet *ifp = adapter->ifp;
2104	device_t dev = adapter->dev;
2105	u32 link_check = 0;
2106
2107	/* Get the cached link value or read phy for real */
2108	switch (hw->phy.media_type) {
2109	case e1000_media_type_copper:
2110		if (hw->mac.get_link_status) {
2111			/* Do the work to read phy */
2112			e1000_check_for_link(hw);
2113			link_check = !hw->mac.get_link_status;
2114			if (link_check) /* ESB2 fix */
2115				e1000_cfg_on_link_up(hw);
2116		} else
2117			link_check = TRUE;
2118		break;
2119	case e1000_media_type_fiber:
2120		e1000_check_for_link(hw);
2121		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2122                                 E1000_STATUS_LU);
2123		break;
2124	case e1000_media_type_internal_serdes:
2125		e1000_check_for_link(hw);
2126		link_check = adapter->hw.mac.serdes_has_link;
2127		break;
2128	default:
2129	case e1000_media_type_unknown:
2130		break;
2131	}
2132
2133	/* Now check for a transition */
2134	if (link_check && (adapter->link_active == 0)) {
2135		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2136		    &adapter->link_duplex);
2137		/* Check if we must disable SPEED_MODE bit on PCI-E */
2138		if ((adapter->link_speed != SPEED_1000) &&
2139		    ((hw->mac.type == e1000_82571) ||
2140		    (hw->mac.type == e1000_82572))) {
2141			int tarc0;
2142			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2143			tarc0 &= ~SPEED_MODE_BIT;
2144			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2145		}
2146		if (bootverbose)
2147			device_printf(dev, "Link is up %d Mbps %s\n",
2148			    adapter->link_speed,
2149			    ((adapter->link_duplex == FULL_DUPLEX) ?
2150			    "Full Duplex" : "Half Duplex"));
2151		adapter->link_active = 1;
2152		adapter->smartspeed = 0;
2153		ifp->if_baudrate = adapter->link_speed * 1000000;
2154		if_link_state_change(ifp, LINK_STATE_UP);
2155	} else if (!link_check && (adapter->link_active == 1)) {
2156		ifp->if_baudrate = adapter->link_speed = 0;
2157		adapter->link_duplex = 0;
2158		if (bootverbose)
2159			device_printf(dev, "Link is Down\n");
2160		adapter->link_active = 0;
2161		/* Link down, disable watchdog */
2162		// JFV change later
2163		//adapter->watchdog_check = FALSE;
2164		if_link_state_change(ifp, LINK_STATE_DOWN);
2165	}
2166}
2167
2168/*********************************************************************
2169 *
2170 *  This routine disables all traffic on the adapter by issuing a
2171 *  global reset on the MAC and deallocates TX/RX buffers.
2172 *
2173 *  This routine should always be called with BOTH the CORE
2174 *  and TX locks.
2175 **********************************************************************/
2176
2177static void
2178em_stop(void *arg)
2179{
2180	struct adapter	*adapter = arg;
2181	struct ifnet	*ifp = adapter->ifp;
2182	struct tx_ring	*txr = adapter->tx_rings;
2183
2184	EM_CORE_LOCK_ASSERT(adapter);
2185
2186	INIT_DEBUGOUT("em_stop: begin");
2187
2188	em_disable_intr(adapter);
2189	callout_stop(&adapter->timer);
2190
2191	/* Tell the stack that the interface is no longer active */
2192	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2193
2194        /* Unarm watchdog timer. */
2195	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2196		EM_TX_LOCK(txr);
2197		txr->watchdog_check = FALSE;
2198		EM_TX_UNLOCK(txr);
2199	}
2200
2201	e1000_reset_hw(&adapter->hw);
2202	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2203
2204	e1000_led_off(&adapter->hw);
2205	e1000_cleanup_led(&adapter->hw);
2206}
2207
2208
2209/*********************************************************************
2210 *
2211 *  Determine hardware revision.
2212 *
2213 **********************************************************************/
2214static void
2215em_identify_hardware(struct adapter *adapter)
2216{
2217	device_t dev = adapter->dev;
2218
2219	/* Make sure our PCI config space has the necessary stuff set */
2220	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2221	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2222	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2223		device_printf(dev, "Memory Access and/or Bus Master bits "
2224		    "were not set!\n");
2225		adapter->hw.bus.pci_cmd_word |=
2226		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2227		pci_write_config(dev, PCIR_COMMAND,
2228		    adapter->hw.bus.pci_cmd_word, 2);
2229	}
2230
2231	/* Save off the information about this board */
2232	adapter->hw.vendor_id = pci_get_vendor(dev);
2233	adapter->hw.device_id = pci_get_device(dev);
2234	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2235	adapter->hw.subsystem_vendor_id =
2236	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2237	adapter->hw.subsystem_device_id =
2238	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2239
2240	/* Do Shared Code Init and Setup */
2241	if (e1000_set_mac_type(&adapter->hw)) {
2242		device_printf(dev, "Setup init failure\n");
2243		return;
2244	}
2245}
2246
2247static int
2248em_allocate_pci_resources(struct adapter *adapter)
2249{
2250	device_t	dev = adapter->dev;
2251	int		rid;
2252
2253	rid = PCIR_BAR(0);
2254	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2255	    &rid, RF_ACTIVE);
2256	if (adapter->memory == NULL) {
2257		device_printf(dev, "Unable to allocate bus resource: memory\n");
2258		return (ENXIO);
2259	}
2260	adapter->osdep.mem_bus_space_tag =
2261	    rman_get_bustag(adapter->memory);
2262	adapter->osdep.mem_bus_space_handle =
2263	    rman_get_bushandle(adapter->memory);
2264	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2265
2266	/* Default to a single queue */
2267	adapter->num_queues = 1;
2268
2269	/*
2270	 * Setup MSI/X or MSI if PCI Express
2271	 */
2272	adapter->msix = em_setup_msix(adapter);
2273
2274	adapter->hw.back = &adapter->osdep;
2275
2276	return (0);
2277}
2278
2279/*********************************************************************
2280 *
2281 *  Setup the Legacy or MSI Interrupt handler
2282 *
2283 **********************************************************************/
2284int
2285em_allocate_legacy(struct adapter *adapter)
2286{
2287	device_t dev = adapter->dev;
2288	int error, rid = 0;
2289
2290	/* Manually turn off all interrupts */
2291	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2292
2293	if (adapter->msix == 1) /* using MSI */
2294		rid = 1;
2295	/* We allocate a single interrupt resource */
2296	adapter->res = bus_alloc_resource_any(dev,
2297	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2298	if (adapter->res == NULL) {
2299		device_printf(dev, "Unable to allocate bus resource: "
2300		    "interrupt\n");
2301		return (ENXIO);
2302	}
2303
2304	/*
2305	 * Allocate a fast interrupt and the associated
2306	 * deferred processing contexts.
2307	 */
2308	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2309	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2310	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2311	    taskqueue_thread_enqueue, &adapter->tq);
2312	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2313	    device_get_nameunit(adapter->dev));
2314	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2315	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2316		device_printf(dev, "Failed to register fast interrupt "
2317			    "handler: %d\n", error);
2318		taskqueue_free(adapter->tq);
2319		adapter->tq = NULL;
2320		return (error);
2321	}
2322
2323	return (0);
2324}
2325
2326/*********************************************************************
2327 *
2328 *  Setup the MSIX Interrupt handlers
2329 *   This is not really Multiqueue, rather
2330 *   its just multiple interrupt vectors.
2331 *
2332 **********************************************************************/
2333int
2334em_allocate_msix(struct adapter *adapter)
2335{
2336	device_t	dev = adapter->dev;
2337	struct		tx_ring *txr = adapter->tx_rings;
2338	struct		rx_ring *rxr = adapter->rx_rings;
2339	int		error, rid, vector = 0;
2340
2341
2342	/* Make sure all interrupts are disabled */
2343	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2344
2345	/* First set up ring resources */
2346	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2347
2348		/* RX ring */
2349		rid = vector + 1;
2350
2351		rxr->res = bus_alloc_resource_any(dev,
2352		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2353		if (rxr->res == NULL) {
2354			device_printf(dev,
2355			    "Unable to allocate bus resource: "
2356			    "RX MSIX Interrupt %d\n", i);
2357			return (ENXIO);
2358		}
2359		if ((error = bus_setup_intr(dev, rxr->res,
2360		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2361		    rxr, &rxr->tag)) != 0) {
2362			device_printf(dev, "Failed to register RX handler");
2363			return (error);
2364		}
2365		rxr->msix = vector++; /* NOTE increment vector for TX */
2366		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2367		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2368		    taskqueue_thread_enqueue, &rxr->tq);
2369		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2370		    device_get_nameunit(adapter->dev));
2371		/*
2372		** Set the bit to enable interrupt
2373		** in E1000_IMS -- bits 20 and 21
2374		** are for RX0 and RX1, note this has
2375		** NOTHING to do with the MSIX vector
2376		*/
2377		rxr->ims = 1 << (20 + i);
2378		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2379
2380		/* TX ring */
2381		rid = vector + 1;
2382		txr->res = bus_alloc_resource_any(dev,
2383		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2384		if (txr->res == NULL) {
2385			device_printf(dev,
2386			    "Unable to allocate bus resource: "
2387			    "TX MSIX Interrupt %d\n", i);
2388			return (ENXIO);
2389		}
2390		if ((error = bus_setup_intr(dev, txr->res,
2391		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2392		    txr, &txr->tag)) != 0) {
2393			device_printf(dev, "Failed to register TX handler");
2394			return (error);
2395		}
2396		txr->msix = vector++; /* Increment vector for next pass */
2397		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2398		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2399		    taskqueue_thread_enqueue, &txr->tq);
2400		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2401		    device_get_nameunit(adapter->dev));
2402		/*
2403		** Set the bit to enable interrupt
2404		** in E1000_IMS -- bits 22 and 23
2405		** are for TX0 and TX1, note this has
2406		** NOTHING to do with the MSIX vector
2407		*/
2408		txr->ims = 1 << (22 + i);
2409		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2410	}
2411
2412	/* Link interrupt */
2413	++rid;
2414	adapter->res = bus_alloc_resource_any(dev,
2415	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2416	if (!adapter->res) {
2417		device_printf(dev,"Unable to allocate "
2418		    "bus resource: Link interrupt [%d]\n", rid);
2419		return (ENXIO);
2420        }
2421	/* Set the link handler function */
2422	error = bus_setup_intr(dev, adapter->res,
2423	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2424	    em_msix_link, adapter, &adapter->tag);
2425	if (error) {
2426		adapter->res = NULL;
2427		device_printf(dev, "Failed to register LINK handler");
2428		return (error);
2429	}
2430	adapter->linkvec = vector;
2431	adapter->ivars |=  (8 | vector) << 16;
2432	adapter->ivars |= 0x80000000;
2433
2434	return (0);
2435}
2436
2437
2438static void
2439em_free_pci_resources(struct adapter *adapter)
2440{
2441	device_t	dev = adapter->dev;
2442	struct tx_ring	*txr;
2443	struct rx_ring	*rxr;
2444	int		rid;
2445
2446
2447	/*
2448	** Release all the queue interrupt resources:
2449	*/
2450	for (int i = 0; i < adapter->num_queues; i++) {
2451		txr = &adapter->tx_rings[i];
2452		rxr = &adapter->rx_rings[i];
2453		rid = txr->msix +1;
2454		if (txr->tag != NULL) {
2455			bus_teardown_intr(dev, txr->res, txr->tag);
2456			txr->tag = NULL;
2457		}
2458		if (txr->res != NULL)
2459			bus_release_resource(dev, SYS_RES_IRQ,
2460			    rid, txr->res);
2461		rid = rxr->msix +1;
2462		if (rxr->tag != NULL) {
2463			bus_teardown_intr(dev, rxr->res, rxr->tag);
2464			rxr->tag = NULL;
2465		}
2466		if (rxr->res != NULL)
2467			bus_release_resource(dev, SYS_RES_IRQ,
2468			    rid, rxr->res);
2469	}
2470
2471        if (adapter->linkvec) /* we are doing MSIX */
2472                rid = adapter->linkvec + 1;
2473        else
2474                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2475
2476	if (adapter->tag != NULL) {
2477		bus_teardown_intr(dev, adapter->res, adapter->tag);
2478		adapter->tag = NULL;
2479	}
2480
2481	if (adapter->res != NULL)
2482		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2483
2484
2485	if (adapter->msix)
2486		pci_release_msi(dev);
2487
2488	if (adapter->msix_mem != NULL)
2489		bus_release_resource(dev, SYS_RES_MEMORY,
2490		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2491
2492	if (adapter->memory != NULL)
2493		bus_release_resource(dev, SYS_RES_MEMORY,
2494		    PCIR_BAR(0), adapter->memory);
2495
2496	if (adapter->flash != NULL)
2497		bus_release_resource(dev, SYS_RES_MEMORY,
2498		    EM_FLASH, adapter->flash);
2499}
2500
2501/*
2502 * Setup MSI or MSI/X
2503 */
2504static int
2505em_setup_msix(struct adapter *adapter)
2506{
2507	device_t dev = adapter->dev;
2508	int val = 0;
2509
2510
2511	/* Setup MSI/X for Hartwell */
2512	if ((adapter->hw.mac.type == e1000_82574) &&
2513	    (em_enable_msix == TRUE)) {
2514		/* Map the MSIX BAR */
2515		int rid = PCIR_BAR(EM_MSIX_BAR);
2516		adapter->msix_mem = bus_alloc_resource_any(dev,
2517		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2518       		if (!adapter->msix_mem) {
2519			/* May not be enabled */
2520               		device_printf(adapter->dev,
2521			    "Unable to map MSIX table \n");
2522			goto msi;
2523       		}
2524		val = pci_msix_count(dev);
2525		if (val != 5) {
2526			bus_release_resource(dev, SYS_RES_MEMORY,
2527			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2528			adapter->msix_mem = NULL;
2529               		device_printf(adapter->dev,
2530			    "MSIX vectors wrong, using MSI \n");
2531			goto msi;
2532		}
2533		if (em_msix_queues == 2) {
2534			val = 5;
2535			adapter->num_queues = 2;
2536		} else {
2537			val = 3;
2538			adapter->num_queues = 1;
2539		}
2540		if (pci_alloc_msix(dev, &val) == 0) {
2541			device_printf(adapter->dev,
2542			    "Using MSIX interrupts "
2543			    "with %d vectors\n", val);
2544		}
2545
2546		return (val);
2547	}
2548msi:
2549       	val = pci_msi_count(dev);
2550       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2551               	adapter->msix = 1;
2552               	device_printf(adapter->dev,"Using MSI interrupt\n");
2553		return (val);
2554	}
2555	/* Should only happen due to manual invention */
2556	device_printf(adapter->dev,"Setup MSIX failure\n");
2557	return (0);
2558}
2559
2560
2561/*********************************************************************
2562 *
2563 *  Initialize the hardware to a configuration
2564 *  as specified by the adapter structure.
2565 *
2566 **********************************************************************/
2567static void
2568em_reset(struct adapter *adapter)
2569{
2570	device_t	dev = adapter->dev;
2571	struct e1000_hw	*hw = &adapter->hw;
2572	u16		rx_buffer_size;
2573
2574	INIT_DEBUGOUT("em_reset: begin");
2575
2576	/* Set up smart power down as default off on newer adapters. */
2577	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2578	    hw->mac.type == e1000_82572)) {
2579		u16 phy_tmp = 0;
2580
2581		/* Speed up time to link by disabling smart power down. */
2582		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2583		phy_tmp &= ~IGP02E1000_PM_SPD;
2584		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2585	}
2586
2587	/*
2588	 * These parameters control the automatic generation (Tx) and
2589	 * response (Rx) to Ethernet PAUSE frames.
2590	 * - High water mark should allow for at least two frames to be
2591	 *   received after sending an XOFF.
2592	 * - Low water mark works best when it is very near the high water mark.
2593	 *   This allows the receiver to restart by sending XON when it has
2594	 *   drained a bit. Here we use an arbitary value of 1500 which will
2595	 *   restart after one full frame is pulled from the buffer. There
2596	 *   could be several smaller frames in the buffer and if so they will
2597	 *   not trigger the XON until their total number reduces the buffer
2598	 *   by 1500.
2599	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2600	 */
2601	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2602
2603	hw->fc.high_water = rx_buffer_size -
2604	    roundup2(adapter->max_frame_size, 1024);
2605	hw->fc.low_water = hw->fc.high_water - 1500;
2606
2607	if (hw->mac.type == e1000_80003es2lan)
2608		hw->fc.pause_time = 0xFFFF;
2609	else
2610		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2611
2612	hw->fc.send_xon = TRUE;
2613
2614        /* Set Flow control, use the tunable location if sane */
2615        if ((em_fc_setting >= 0) || (em_fc_setting < 4))
2616		hw->fc.requested_mode = em_fc_setting;
2617	else
2618		hw->fc.requested_mode = e1000_fc_none;
2619
2620	/* Override - workaround for PCHLAN issue */
2621	if (hw->mac.type == e1000_pchlan)
2622                hw->fc.requested_mode = e1000_fc_rx_pause;
2623
2624	/* Issue a global reset */
2625	e1000_reset_hw(hw);
2626	E1000_WRITE_REG(hw, E1000_WUC, 0);
2627
2628	if (e1000_init_hw(hw) < 0) {
2629		device_printf(dev, "Hardware Initialization Failed\n");
2630		return;
2631	}
2632
2633	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2634	e1000_get_phy_info(hw);
2635	e1000_check_for_link(hw);
2636	return;
2637}
2638
2639/*********************************************************************
2640 *
2641 *  Setup networking device structure and register an interface.
2642 *
2643 **********************************************************************/
2644static void
2645em_setup_interface(device_t dev, struct adapter *adapter)
2646{
2647	struct ifnet   *ifp;
2648
2649	INIT_DEBUGOUT("em_setup_interface: begin");
2650
2651	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2652	if (ifp == NULL)
2653		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2654	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2655	ifp->if_mtu = ETHERMTU;
2656	ifp->if_init =  em_init;
2657	ifp->if_softc = adapter;
2658	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2659	ifp->if_ioctl = em_ioctl;
2660	ifp->if_start = em_start;
2661	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2662	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2663	IFQ_SET_READY(&ifp->if_snd);
2664
2665	ether_ifattach(ifp, adapter->hw.mac.addr);
2666
2667	ifp->if_capabilities = ifp->if_capenable = 0;
2668
2669#ifdef EM_MULTIQUEUE
2670	/* Multiqueue tx functions */
2671	ifp->if_transmit = em_mq_start;
2672	ifp->if_qflush = em_qflush;
2673#endif
2674
2675	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2676	ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2677
2678	/* Enable TSO by default, can disable with ifconfig */
2679	ifp->if_capabilities |= IFCAP_TSO4;
2680	ifp->if_capenable |= IFCAP_TSO4;
2681
2682	/*
2683	 * Tell the upper layer(s) we
2684	 * support full VLAN capability
2685	 */
2686	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2687	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2688	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2689
2690	/*
2691	** Dont turn this on by default, if vlans are
2692	** created on another pseudo device (eg. lagg)
2693	** then vlan events are not passed thru, breaking
2694	** operation, but with HW FILTER off it works. If
2695	** using vlans directly on the em driver you can
2696	** enable this and get full hardware tag filtering.
2697	*/
2698	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2699
2700#ifdef DEVICE_POLLING
2701	ifp->if_capabilities |= IFCAP_POLLING;
2702#endif
2703
2704	/* Enable only WOL MAGIC by default */
2705	if (adapter->wol) {
2706		ifp->if_capabilities |= IFCAP_WOL;
2707		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2708	}
2709
2710	/*
2711	 * Specify the media types supported by this adapter and register
2712	 * callbacks to update media and link information
2713	 */
2714	ifmedia_init(&adapter->media, IFM_IMASK,
2715	    em_media_change, em_media_status);
2716	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2717	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2718		u_char fiber_type = IFM_1000_SX;	/* default type */
2719
2720		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2721			    0, NULL);
2722		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2723	} else {
2724		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2725		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2726			    0, NULL);
2727		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2728			    0, NULL);
2729		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2730			    0, NULL);
2731		if (adapter->hw.phy.type != e1000_phy_ife) {
2732			ifmedia_add(&adapter->media,
2733				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2734			ifmedia_add(&adapter->media,
2735				IFM_ETHER | IFM_1000_T, 0, NULL);
2736		}
2737	}
2738	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2739	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2740}
2741
2742
2743/*
2744 * Manage DMA'able memory.
2745 */
2746static void
2747em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2748{
2749	if (error)
2750		return;
2751	*(bus_addr_t *) arg = segs[0].ds_addr;
2752}
2753
2754static int
2755em_dma_malloc(struct adapter *adapter, bus_size_t size,
2756        struct em_dma_alloc *dma, int mapflags)
2757{
2758	int error;
2759
2760	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2761				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2762				BUS_SPACE_MAXADDR,	/* lowaddr */
2763				BUS_SPACE_MAXADDR,	/* highaddr */
2764				NULL, NULL,		/* filter, filterarg */
2765				size,			/* maxsize */
2766				1,			/* nsegments */
2767				size,			/* maxsegsize */
2768				0,			/* flags */
2769				NULL,			/* lockfunc */
2770				NULL,			/* lockarg */
2771				&dma->dma_tag);
2772	if (error) {
2773		device_printf(adapter->dev,
2774		    "%s: bus_dma_tag_create failed: %d\n",
2775		    __func__, error);
2776		goto fail_0;
2777	}
2778
2779	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2780	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2781	if (error) {
2782		device_printf(adapter->dev,
2783		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2784		    __func__, (uintmax_t)size, error);
2785		goto fail_2;
2786	}
2787
2788	dma->dma_paddr = 0;
2789	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2790	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2791	if (error || dma->dma_paddr == 0) {
2792		device_printf(adapter->dev,
2793		    "%s: bus_dmamap_load failed: %d\n",
2794		    __func__, error);
2795		goto fail_3;
2796	}
2797
2798	return (0);
2799
2800fail_3:
2801	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2802fail_2:
2803	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2804	bus_dma_tag_destroy(dma->dma_tag);
2805fail_0:
2806	dma->dma_map = NULL;
2807	dma->dma_tag = NULL;
2808
2809	return (error);
2810}
2811
2812static void
2813em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2814{
2815	if (dma->dma_tag == NULL)
2816		return;
2817	if (dma->dma_map != NULL) {
2818		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2819		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2820		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2821		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2822		dma->dma_map = NULL;
2823	}
2824	bus_dma_tag_destroy(dma->dma_tag);
2825	dma->dma_tag = NULL;
2826}
2827
2828
2829/*********************************************************************
2830 *
2831 *  Allocate memory for the transmit and receive rings, and then
2832 *  the descriptors associated with each, called only once at attach.
2833 *
2834 **********************************************************************/
2835static int
2836em_allocate_queues(struct adapter *adapter)
2837{
2838	device_t		dev = adapter->dev;
2839	struct tx_ring		*txr = NULL;
2840	struct rx_ring		*rxr = NULL;
2841	int rsize, tsize, error = E1000_SUCCESS;
2842	int txconf = 0, rxconf = 0;
2843
2844
2845	/* Allocate the TX ring struct memory */
2846	if (!(adapter->tx_rings =
2847	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2848	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2849		device_printf(dev, "Unable to allocate TX ring memory\n");
2850		error = ENOMEM;
2851		goto fail;
2852	}
2853
2854	/* Now allocate the RX */
2855	if (!(adapter->rx_rings =
2856	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2857	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2858		device_printf(dev, "Unable to allocate RX ring memory\n");
2859		error = ENOMEM;
2860		goto rx_fail;
2861	}
2862
2863	tsize = roundup2(adapter->num_tx_desc *
2864	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
2865	/*
2866	 * Now set up the TX queues, txconf is needed to handle the
2867	 * possibility that things fail midcourse and we need to
2868	 * undo memory gracefully
2869	 */
2870	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2871		/* Set up some basics */
2872		txr = &adapter->tx_rings[i];
2873		txr->adapter = adapter;
2874		txr->me = i;
2875
2876		/* Initialize the TX lock */
2877		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2878		    device_get_nameunit(dev), txr->me);
2879		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2880
2881		if (em_dma_malloc(adapter, tsize,
2882			&txr->txdma, BUS_DMA_NOWAIT)) {
2883			device_printf(dev,
2884			    "Unable to allocate TX Descriptor memory\n");
2885			error = ENOMEM;
2886			goto err_tx_desc;
2887		}
2888		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2889		bzero((void *)txr->tx_base, tsize);
2890
2891        	if (em_allocate_transmit_buffers(txr)) {
2892			device_printf(dev,
2893			    "Critical Failure setting up transmit buffers\n");
2894			error = ENOMEM;
2895			goto err_tx_desc;
2896        	}
2897#if __FreeBSD_version >= 800000
2898		/* Allocate a buf ring */
2899		txr->br = buf_ring_alloc(4096, M_DEVBUF,
2900		    M_WAITOK, &txr->tx_mtx);
2901#endif
2902	}
2903
2904	/*
2905	 * Next the RX queues...
2906	 */
2907	rsize = roundup2(adapter->num_rx_desc *
2908	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
2909	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2910		rxr = &adapter->rx_rings[i];
2911		rxr->adapter = adapter;
2912		rxr->me = i;
2913
2914		/* Initialize the RX lock */
2915		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2916		    device_get_nameunit(dev), txr->me);
2917		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2918
2919		if (em_dma_malloc(adapter, rsize,
2920			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2921			device_printf(dev,
2922			    "Unable to allocate RxDescriptor memory\n");
2923			error = ENOMEM;
2924			goto err_rx_desc;
2925		}
2926		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
2927		bzero((void *)rxr->rx_base, rsize);
2928
2929        	/* Allocate receive buffers for the ring*/
2930		if (em_allocate_receive_buffers(rxr)) {
2931			device_printf(dev,
2932			    "Critical Failure setting up receive buffers\n");
2933			error = ENOMEM;
2934			goto err_rx_desc;
2935		}
2936	}
2937
2938	return (0);
2939
2940err_rx_desc:
2941	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2942		em_dma_free(adapter, &rxr->rxdma);
2943err_tx_desc:
2944	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2945		em_dma_free(adapter, &txr->txdma);
2946	free(adapter->rx_rings, M_DEVBUF);
2947rx_fail:
2948#if __FreeBSD_version >= 800000
2949	buf_ring_free(txr->br, M_DEVBUF);
2950#endif
2951	free(adapter->tx_rings, M_DEVBUF);
2952fail:
2953	return (error);
2954}
2955
2956
2957/*********************************************************************
2958 *
2959 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2960 *  the information needed to transmit a packet on the wire. This is
2961 *  called only once at attach, setup is done every reset.
2962 *
2963 **********************************************************************/
2964static int
2965em_allocate_transmit_buffers(struct tx_ring *txr)
2966{
2967	struct adapter *adapter = txr->adapter;
2968	device_t dev = adapter->dev;
2969	struct em_buffer *txbuf;
2970	int error, i;
2971
2972	/*
2973	 * Setup DMA descriptor areas.
2974	 */
2975	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2976			       1, 0,			/* alignment, bounds */
2977			       BUS_SPACE_MAXADDR,	/* lowaddr */
2978			       BUS_SPACE_MAXADDR,	/* highaddr */
2979			       NULL, NULL,		/* filter, filterarg */
2980			       EM_TSO_SIZE,		/* maxsize */
2981			       EM_MAX_SCATTER,		/* nsegments */
2982			       PAGE_SIZE,		/* maxsegsize */
2983			       0,			/* flags */
2984			       NULL,			/* lockfunc */
2985			       NULL,			/* lockfuncarg */
2986			       &txr->txtag))) {
2987		device_printf(dev,"Unable to allocate TX DMA tag\n");
2988		goto fail;
2989	}
2990
2991	if (!(txr->tx_buffers =
2992	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
2993	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2994		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2995		error = ENOMEM;
2996		goto fail;
2997	}
2998
2999        /* Create the descriptor buffer dma maps */
3000	txbuf = txr->tx_buffers;
3001	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3002		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3003		if (error != 0) {
3004			device_printf(dev, "Unable to create TX DMA map\n");
3005			goto fail;
3006		}
3007	}
3008
3009	return 0;
3010fail:
3011	/* We free all, it handles case where we are in the middle */
3012	em_free_transmit_structures(adapter);
3013	return (error);
3014}
3015
3016/*********************************************************************
3017 *
3018 *  Initialize a transmit ring.
3019 *
3020 **********************************************************************/
3021static void
3022em_setup_transmit_ring(struct tx_ring *txr)
3023{
3024	struct adapter *adapter = txr->adapter;
3025	struct em_buffer *txbuf;
3026	int i;
3027
3028	/* Clear the old descriptor contents */
3029	EM_TX_LOCK(txr);
3030	bzero((void *)txr->tx_base,
3031	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3032	/* Reset indices */
3033	txr->next_avail_desc = 0;
3034	txr->next_to_clean = 0;
3035
3036	/* Free any existing tx buffers. */
3037        txbuf = txr->tx_buffers;
3038	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3039		if (txbuf->m_head != NULL) {
3040			bus_dmamap_sync(txr->txtag, txbuf->map,
3041			    BUS_DMASYNC_POSTWRITE);
3042			bus_dmamap_unload(txr->txtag, txbuf->map);
3043			m_freem(txbuf->m_head);
3044			txbuf->m_head = NULL;
3045		}
3046		/* clear the watch index */
3047		txbuf->next_eop = -1;
3048        }
3049
3050	/* Set number of descriptors available */
3051	txr->tx_avail = adapter->num_tx_desc;
3052
3053	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3054	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3055	EM_TX_UNLOCK(txr);
3056}
3057
3058/*********************************************************************
3059 *
3060 *  Initialize all transmit rings.
3061 *
3062 **********************************************************************/
3063static void
3064em_setup_transmit_structures(struct adapter *adapter)
3065{
3066	struct tx_ring *txr = adapter->tx_rings;
3067
3068	for (int i = 0; i < adapter->num_queues; i++, txr++)
3069		em_setup_transmit_ring(txr);
3070
3071	return;
3072}
3073
3074/*********************************************************************
3075 *
3076 *  Enable transmit unit.
3077 *
3078 **********************************************************************/
3079static void
3080em_initialize_transmit_unit(struct adapter *adapter)
3081{
3082	struct tx_ring	*txr = adapter->tx_rings;
3083	struct e1000_hw	*hw = &adapter->hw;
3084	u32	tctl, tarc, tipg = 0;
3085
3086	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3087
3088	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3089		u64 bus_addr = txr->txdma.dma_paddr;
3090		/* Base and Len of TX Ring */
3091		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3092	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3093		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3094	    	    (u32)(bus_addr >> 32));
3095		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3096	    	    (u32)bus_addr);
3097		/* Init the HEAD/TAIL indices */
3098		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3099		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3100
3101		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3102		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3103		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3104
3105		txr->watchdog_check = FALSE;
3106	}
3107
3108	/* Set the default values for the Tx Inter Packet Gap timer */
3109	switch (adapter->hw.mac.type) {
3110	case e1000_82542:
3111		tipg = DEFAULT_82542_TIPG_IPGT;
3112		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3113		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3114		break;
3115	case e1000_80003es2lan:
3116		tipg = DEFAULT_82543_TIPG_IPGR1;
3117		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3118		    E1000_TIPG_IPGR2_SHIFT;
3119		break;
3120	default:
3121		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3122		    (adapter->hw.phy.media_type ==
3123		    e1000_media_type_internal_serdes))
3124			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3125		else
3126			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3127		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3128		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3129	}
3130
3131	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3132	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3133
3134	if(adapter->hw.mac.type >= e1000_82540)
3135		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3136		    adapter->tx_abs_int_delay.value);
3137
3138	if ((adapter->hw.mac.type == e1000_82571) ||
3139	    (adapter->hw.mac.type == e1000_82572)) {
3140		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3141		tarc |= SPEED_MODE_BIT;
3142		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3143	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3144		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3145		tarc |= 1;
3146		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3147		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3148		tarc |= 1;
3149		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3150	}
3151
3152	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3153	if (adapter->tx_int_delay.value > 0)
3154		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3155
3156	/* Program the Transmit Control Register */
3157	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3158	tctl &= ~E1000_TCTL_CT;
3159	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3160		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3161
3162	if (adapter->hw.mac.type >= e1000_82571)
3163		tctl |= E1000_TCTL_MULR;
3164
3165	/* This write will effectively turn on the transmit unit. */
3166	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3167
3168}
3169
3170
3171/*********************************************************************
3172 *
3173 *  Free all transmit rings.
3174 *
3175 **********************************************************************/
3176static void
3177em_free_transmit_structures(struct adapter *adapter)
3178{
3179	struct tx_ring *txr = adapter->tx_rings;
3180
3181	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3182		EM_TX_LOCK(txr);
3183		em_free_transmit_buffers(txr);
3184		em_dma_free(adapter, &txr->txdma);
3185		EM_TX_UNLOCK(txr);
3186		EM_TX_LOCK_DESTROY(txr);
3187	}
3188
3189	free(adapter->tx_rings, M_DEVBUF);
3190}
3191
3192/*********************************************************************
3193 *
3194 *  Free transmit ring related data structures.
3195 *
3196 **********************************************************************/
3197static void
3198em_free_transmit_buffers(struct tx_ring *txr)
3199{
3200	struct adapter		*adapter = txr->adapter;
3201	struct em_buffer	*txbuf;
3202
3203	INIT_DEBUGOUT("free_transmit_ring: begin");
3204
3205	if (txr->tx_buffers == NULL)
3206		return;
3207
3208	for (int i = 0; i < adapter->num_tx_desc; i++) {
3209		txbuf = &txr->tx_buffers[i];
3210		if (txbuf->m_head != NULL) {
3211			bus_dmamap_sync(txr->txtag, txbuf->map,
3212			    BUS_DMASYNC_POSTWRITE);
3213			bus_dmamap_unload(txr->txtag,
3214			    txbuf->map);
3215			m_freem(txbuf->m_head);
3216			txbuf->m_head = NULL;
3217			if (txbuf->map != NULL) {
3218				bus_dmamap_destroy(txr->txtag,
3219				    txbuf->map);
3220				txbuf->map = NULL;
3221			}
3222		} else if (txbuf->map != NULL) {
3223			bus_dmamap_unload(txr->txtag,
3224			    txbuf->map);
3225			bus_dmamap_destroy(txr->txtag,
3226			    txbuf->map);
3227			txbuf->map = NULL;
3228		}
3229	}
3230#if __FreeBSD_version >= 800000
3231	if (txr->br != NULL)
3232		buf_ring_free(txr->br, M_DEVBUF);
3233#endif
3234	if (txr->tx_buffers != NULL) {
3235		free(txr->tx_buffers, M_DEVBUF);
3236		txr->tx_buffers = NULL;
3237	}
3238	if (txr->txtag != NULL) {
3239		bus_dma_tag_destroy(txr->txtag);
3240		txr->txtag = NULL;
3241	}
3242	return;
3243}
3244
3245
3246/*********************************************************************
3247 *
3248 *  The offload context needs to be set when we transfer the first
3249 *  packet of a particular protocol (TCP/UDP). This routine has been
3250 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
3251 *
3252 *  Added back the old method of keeping the current context type
3253 *  and not setting if unnecessary, as this is reported to be a
3254 *  big performance win.  -jfv
3255 **********************************************************************/
3256static void
3257em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp,
3258    u32 *txd_upper, u32 *txd_lower)
3259{
3260	struct adapter			*adapter = txr->adapter;
3261	struct e1000_context_desc	*TXD = NULL;
3262	struct em_buffer *tx_buffer;
3263	struct ether_vlan_header *eh;
3264	struct ip *ip = NULL;
3265	struct ip6_hdr *ip6;
3266	int cur, ehdrlen;
3267	u32 cmd, hdr_len, ip_hlen;
3268	u16 etype;
3269	u8 ipproto;
3270
3271
3272	cmd = hdr_len = ipproto = 0;
3273	cur = txr->next_avail_desc;
3274
3275	/*
3276	 * Determine where frame payload starts.
3277	 * Jump over vlan headers if already present,
3278	 * helpful for QinQ too.
3279	 */
3280	eh = mtod(mp, struct ether_vlan_header *);
3281	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3282		etype = ntohs(eh->evl_proto);
3283		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3284	} else {
3285		etype = ntohs(eh->evl_encap_proto);
3286		ehdrlen = ETHER_HDR_LEN;
3287	}
3288
3289	/*
3290	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
3291	 * TODO: Support SCTP too when it hits the tree.
3292	 */
3293	switch (etype) {
3294	case ETHERTYPE_IP:
3295		ip = (struct ip *)(mp->m_data + ehdrlen);
3296		ip_hlen = ip->ip_hl << 2;
3297
3298		/* Setup of IP header checksum. */
3299		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3300			/*
3301			 * Start offset for header checksum calculation.
3302			 * End offset for header checksum calculation.
3303			 * Offset of place to put the checksum.
3304			 */
3305			TXD = (struct e1000_context_desc *)
3306			    &txr->tx_base[cur];
3307			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3308			TXD->lower_setup.ip_fields.ipcse =
3309			    htole16(ehdrlen + ip_hlen);
3310			TXD->lower_setup.ip_fields.ipcso =
3311			    ehdrlen + offsetof(struct ip, ip_sum);
3312			cmd |= E1000_TXD_CMD_IP;
3313			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3314		}
3315
3316		if (mp->m_len < ehdrlen + ip_hlen)
3317			return;	/* failure */
3318
3319		hdr_len = ehdrlen + ip_hlen;
3320		ipproto = ip->ip_p;
3321
3322		break;
3323	case ETHERTYPE_IPV6:
3324		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3325		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3326
3327		if (mp->m_len < ehdrlen + ip_hlen)
3328			return;	/* failure */
3329
3330		/* IPv6 doesn't have a header checksum. */
3331
3332		hdr_len = ehdrlen + ip_hlen;
3333		ipproto = ip6->ip6_nxt;
3334
3335		break;
3336	default:
3337		*txd_upper = 0;
3338		*txd_lower = 0;
3339		return;
3340	}
3341
3342	switch (ipproto) {
3343	case IPPROTO_TCP:
3344		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3345			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3346			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3347			/* no need for context if already set */
3348			if (txr->last_hw_offload == CSUM_TCP)
3349				return;
3350			txr->last_hw_offload = CSUM_TCP;
3351			/*
3352			 * Start offset for payload checksum calculation.
3353			 * End offset for payload checksum calculation.
3354			 * Offset of place to put the checksum.
3355			 */
3356			TXD = (struct e1000_context_desc *)
3357			    &txr->tx_base[cur];
3358			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3359			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3360			TXD->upper_setup.tcp_fields.tucso =
3361			    hdr_len + offsetof(struct tcphdr, th_sum);
3362			cmd |= E1000_TXD_CMD_TCP;
3363		}
3364		break;
3365	case IPPROTO_UDP:
3366	{
3367		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3368			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3369			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3370			/* no need for context if already set */
3371			if (txr->last_hw_offload == CSUM_UDP)
3372				return;
3373			txr->last_hw_offload = CSUM_UDP;
3374			/*
3375			 * Start offset for header checksum calculation.
3376			 * End offset for header checksum calculation.
3377			 * Offset of place to put the checksum.
3378			 */
3379			TXD = (struct e1000_context_desc *)
3380			    &txr->tx_base[cur];
3381			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3382			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3383			TXD->upper_setup.tcp_fields.tucso =
3384			    hdr_len + offsetof(struct udphdr, uh_sum);
3385		}
3386		/* Fall Thru */
3387	}
3388	default:
3389		break;
3390	}
3391
3392	TXD->tcp_seg_setup.data = htole32(0);
3393	TXD->cmd_and_length =
3394	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3395	tx_buffer = &txr->tx_buffers[cur];
3396	tx_buffer->m_head = NULL;
3397	tx_buffer->next_eop = -1;
3398
3399	if (++cur == adapter->num_tx_desc)
3400		cur = 0;
3401
3402	txr->tx_avail--;
3403	txr->next_avail_desc = cur;
3404}
3405
3406
3407/**********************************************************************
3408 *
3409 *  Setup work for hardware segmentation offload (TSO)
3410 *
3411 **********************************************************************/
3412static bool
3413em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper,
3414   u32 *txd_lower)
3415{
3416	struct adapter			*adapter = txr->adapter;
3417	struct e1000_context_desc	*TXD;
3418	struct em_buffer		*tx_buffer;
3419	struct ether_vlan_header	*eh;
3420	struct ip			*ip;
3421	struct ip6_hdr			*ip6;
3422	struct tcphdr			*th;
3423	int cur, ehdrlen, hdr_len, ip_hlen, isip6;
3424	u16 etype;
3425
3426	/*
3427	 * This function could/should be extended to support IP/IPv6
3428	 * fragmentation as well.  But as they say, one step at a time.
3429	 */
3430
3431	/*
3432	 * Determine where frame payload starts.
3433	 * Jump over vlan headers if already present,
3434	 * helpful for QinQ too.
3435	 */
3436	eh = mtod(mp, struct ether_vlan_header *);
3437	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3438		etype = ntohs(eh->evl_proto);
3439		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3440	} else {
3441		etype = ntohs(eh->evl_encap_proto);
3442		ehdrlen = ETHER_HDR_LEN;
3443	}
3444
3445	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3446	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3447		return FALSE;	/* -1 */
3448
3449	/*
3450	 * We only support TCP for IPv4 and IPv6 (notyet) for the moment.
3451	 * TODO: Support SCTP too when it hits the tree.
3452	 */
3453	switch (etype) {
3454	case ETHERTYPE_IP:
3455		isip6 = 0;
3456		ip = (struct ip *)(mp->m_data + ehdrlen);
3457		if (ip->ip_p != IPPROTO_TCP)
3458			return FALSE;	/* 0 */
3459		ip->ip_len = 0;
3460		ip->ip_sum = 0;
3461		ip_hlen = ip->ip_hl << 2;
3462		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3463			return FALSE;	/* -1 */
3464		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3465#if 1
3466		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3467		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3468#else
3469		th->th_sum = mp->m_pkthdr.csum_data;
3470#endif
3471		break;
3472	case ETHERTYPE_IPV6:
3473		isip6 = 1;
3474		return FALSE;			/* Not supported yet. */
3475		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3476		if (ip6->ip6_nxt != IPPROTO_TCP)
3477			return FALSE;	/* 0 */
3478		ip6->ip6_plen = 0;
3479		ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
3480		if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
3481			return FALSE;	/* -1 */
3482		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3483#if 0
3484		th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
3485		    htons(IPPROTO_TCP));	/* XXX: function notyet. */
3486#else
3487		th->th_sum = mp->m_pkthdr.csum_data;
3488#endif
3489		break;
3490	default:
3491		return FALSE;
3492	}
3493	hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
3494
3495	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3496		      E1000_TXD_DTYP_D |	/* Data descr type */
3497		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3498
3499	/* IP and/or TCP header checksum calculation and insertion. */
3500	*txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) |
3501		      E1000_TXD_POPTS_TXSM) << 8;
3502
3503	cur = txr->next_avail_desc;
3504	tx_buffer = &txr->tx_buffers[cur];
3505	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3506
3507	/* IPv6 doesn't have a header checksum. */
3508	if (!isip6) {
3509		/*
3510		 * Start offset for header checksum calculation.
3511		 * End offset for header checksum calculation.
3512		 * Offset of place put the checksum.
3513		 */
3514		TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3515		TXD->lower_setup.ip_fields.ipcse =
3516		    htole16(ehdrlen + ip_hlen - 1);
3517		TXD->lower_setup.ip_fields.ipcso =
3518		    ehdrlen + offsetof(struct ip, ip_sum);
3519	}
3520	/*
3521	 * Start offset for payload checksum calculation.
3522	 * End offset for payload checksum calculation.
3523	 * Offset of place to put the checksum.
3524	 */
3525	TXD->upper_setup.tcp_fields.tucss =
3526	    ehdrlen + ip_hlen;
3527	TXD->upper_setup.tcp_fields.tucse = 0;
3528	TXD->upper_setup.tcp_fields.tucso =
3529	    ehdrlen + ip_hlen + offsetof(struct tcphdr, th_sum);
3530	/*
3531	 * Payload size per packet w/o any headers.
3532	 * Length of all headers up to payload.
3533	 */
3534	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3535	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3536
3537	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3538				E1000_TXD_CMD_DEXT |	/* Extended descr */
3539				E1000_TXD_CMD_TSE |	/* TSE context */
3540				(isip6 ? 0 : E1000_TXD_CMD_IP) |
3541				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3542				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3543
3544	tx_buffer->m_head = NULL;
3545	tx_buffer->next_eop = -1;
3546
3547	if (++cur == adapter->num_tx_desc)
3548		cur = 0;
3549
3550	txr->tx_avail--;
3551	txr->next_avail_desc = cur;
3552	txr->tx_tso = TRUE;
3553
3554	return TRUE;
3555}
3556
3557
3558/**********************************************************************
3559 *
3560 *  Examine each tx_buffer in the used queue. If the hardware is done
3561 *  processing the packet then free associated resources. The
3562 *  tx_buffer is put back on the free queue.
3563 *
3564 **********************************************************************/
3565static bool
3566em_txeof(struct tx_ring *txr)
3567{
3568	struct adapter	*adapter = txr->adapter;
3569        int first, last, done, num_avail;
3570        struct em_buffer *tx_buffer;
3571        struct e1000_tx_desc   *tx_desc, *eop_desc;
3572	struct ifnet   *ifp = adapter->ifp;
3573
3574	EM_TX_LOCK_ASSERT(txr);
3575
3576        if (txr->tx_avail == adapter->num_tx_desc)
3577                return (FALSE);
3578
3579        num_avail = txr->tx_avail;
3580        first = txr->next_to_clean;
3581        tx_desc = &txr->tx_base[first];
3582        tx_buffer = &txr->tx_buffers[first];
3583	last = tx_buffer->next_eop;
3584        eop_desc = &txr->tx_base[last];
3585
3586	/*
3587	 * What this does is get the index of the
3588	 * first descriptor AFTER the EOP of the
3589	 * first packet, that way we can do the
3590	 * simple comparison on the inner while loop.
3591	 */
3592	if (++last == adapter->num_tx_desc)
3593 		last = 0;
3594	done = last;
3595
3596        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3597            BUS_DMASYNC_POSTREAD);
3598
3599        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3600		/* We clean the range of the packet */
3601		while (first != done) {
3602                	tx_desc->upper.data = 0;
3603                	tx_desc->lower.data = 0;
3604                	tx_desc->buffer_addr = 0;
3605                	++num_avail;
3606
3607			if (tx_buffer->m_head) {
3608				ifp->if_opackets++;
3609				bus_dmamap_sync(txr->txtag,
3610				    tx_buffer->map,
3611				    BUS_DMASYNC_POSTWRITE);
3612				bus_dmamap_unload(txr->txtag,
3613				    tx_buffer->map);
3614
3615                        	m_freem(tx_buffer->m_head);
3616                        	tx_buffer->m_head = NULL;
3617                	}
3618			tx_buffer->next_eop = -1;
3619			txr->watchdog_time = ticks;
3620
3621	                if (++first == adapter->num_tx_desc)
3622				first = 0;
3623
3624	                tx_buffer = &txr->tx_buffers[first];
3625			tx_desc = &txr->tx_base[first];
3626		}
3627		/* See if we can continue to the next packet */
3628		last = tx_buffer->next_eop;
3629		if (last != -1) {
3630        		eop_desc = &txr->tx_base[last];
3631			/* Get new done point */
3632			if (++last == adapter->num_tx_desc) last = 0;
3633			done = last;
3634		} else
3635			break;
3636        }
3637        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3638            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3639
3640        txr->next_to_clean = first;
3641
3642        /*
3643         * If we have enough room, clear IFF_DRV_OACTIVE to
3644         * tell the stack that it is OK to send packets.
3645         * If there are no pending descriptors, clear the watchdog.
3646         */
3647        if (num_avail > EM_TX_CLEANUP_THRESHOLD) {
3648                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3649                if (num_avail == adapter->num_tx_desc) {
3650			txr->watchdog_check = FALSE;
3651        		txr->tx_avail = num_avail;
3652			return (FALSE);
3653		}
3654        }
3655
3656        txr->tx_avail = num_avail;
3657	return (TRUE);
3658}
3659
3660
3661/*********************************************************************
3662 *
3663 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3664 *
3665 **********************************************************************/
3666static void
3667em_refresh_mbufs(struct rx_ring *rxr, int limit)
3668{
3669	struct adapter		*adapter = rxr->adapter;
3670	struct mbuf		*m;
3671	bus_dma_segment_t	segs[1];
3672	bus_dmamap_t		map;
3673	struct em_buffer	*rxbuf;
3674	int			i, error, nsegs, cleaned;
3675
3676	i = rxr->next_to_refresh;
3677	cleaned = -1;
3678	while (i != limit) {
3679		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3680		if (m == NULL)
3681			goto update;
3682		m->m_len = m->m_pkthdr.len = MCLBYTES;
3683
3684		if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3685			m_adj(m, ETHER_ALIGN);
3686
3687		/*
3688		 * Using memory from the mbuf cluster pool, invoke the
3689		 * bus_dma machinery to arrange the memory mapping.
3690		 */
3691		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap,
3692		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3693		if (error != 0) {
3694			m_free(m);
3695			goto update;
3696		}
3697
3698		/* If nsegs is wrong then the stack is corrupt. */
3699		KASSERT(nsegs == 1, ("Too many segments returned!"));
3700
3701		rxbuf = &rxr->rx_buffers[i];
3702		if (rxbuf->m_head != NULL)
3703			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3704
3705		map = rxbuf->map;
3706		rxbuf->map = rxr->rx_sparemap;
3707		rxr->rx_sparemap = map;
3708		bus_dmamap_sync(rxr->rxtag,
3709		    rxbuf->map, BUS_DMASYNC_PREREAD);
3710		rxbuf->m_head = m;
3711		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3712
3713		cleaned = i;
3714		/* Calculate next index */
3715		if (++i == adapter->num_rx_desc)
3716			i = 0;
3717		/* This is the work marker for refresh */
3718		rxr->next_to_refresh = i;
3719	}
3720update:
3721	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3722	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723	if (cleaned != -1) /* Update tail index */
3724		E1000_WRITE_REG(&adapter->hw,
3725		    E1000_RDT(rxr->me), cleaned);
3726
3727	return;
3728}
3729
3730
3731/*********************************************************************
3732 *
3733 *  Allocate memory for rx_buffer structures. Since we use one
3734 *  rx_buffer per received packet, the maximum number of rx_buffer's
3735 *  that we'll need is equal to the number of receive descriptors
3736 *  that we've allocated.
3737 *
3738 **********************************************************************/
3739static int
3740em_allocate_receive_buffers(struct rx_ring *rxr)
3741{
3742	struct adapter		*adapter = rxr->adapter;
3743	device_t		dev = adapter->dev;
3744	struct em_buffer	*rxbuf;
3745	int			error;
3746
3747	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3748	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3749	if (rxr->rx_buffers == NULL) {
3750		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3751		return (ENOMEM);
3752	}
3753
3754	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3755				1, 0,			/* alignment, bounds */
3756				BUS_SPACE_MAXADDR,	/* lowaddr */
3757				BUS_SPACE_MAXADDR,	/* highaddr */
3758				NULL, NULL,		/* filter, filterarg */
3759				MCLBYTES,		/* maxsize */
3760				1,			/* nsegments */
3761				MCLBYTES,		/* maxsegsize */
3762				0,			/* flags */
3763				NULL,			/* lockfunc */
3764				NULL,			/* lockarg */
3765				&rxr->rxtag);
3766	if (error) {
3767		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3768		    __func__, error);
3769		goto fail;
3770	}
3771
3772	/* Create the spare map (used by getbuf) */
3773	error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3774	     &rxr->rx_sparemap);
3775	if (error) {
3776		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3777		    __func__, error);
3778		goto fail;
3779	}
3780
3781	rxbuf = rxr->rx_buffers;
3782	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3783		rxbuf = &rxr->rx_buffers[i];
3784		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3785		    &rxbuf->map);
3786		if (error) {
3787			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3788			    __func__, error);
3789			goto fail;
3790		}
3791	}
3792
3793	return (0);
3794
3795fail:
3796	em_free_receive_structures(adapter);
3797	return (error);
3798}
3799
3800
3801/*********************************************************************
3802 *
3803 *  Initialize a receive ring and its buffers.
3804 *
3805 **********************************************************************/
3806static int
3807em_setup_receive_ring(struct rx_ring *rxr)
3808{
3809	struct	adapter 	*adapter = rxr->adapter;
3810	struct em_buffer	*rxbuf;
3811	bus_dma_segment_t	seg[1];
3812	int			rsize, nsegs, error;
3813
3814
3815	/* Clear the ring contents */
3816	EM_RX_LOCK(rxr);
3817	rsize = roundup2(adapter->num_rx_desc *
3818	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3819	bzero((void *)rxr->rx_base, rsize);
3820
3821	/*
3822	** Free current RX buffer structs and their mbufs
3823	*/
3824	for (int i = 0; i < adapter->num_rx_desc; i++) {
3825		rxbuf = &rxr->rx_buffers[i];
3826		if (rxbuf->m_head != NULL) {
3827			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3828			    BUS_DMASYNC_POSTREAD);
3829			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3830			m_freem(rxbuf->m_head);
3831		}
3832	}
3833
3834	/* Now replenish the mbufs */
3835	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3836
3837		rxbuf = &rxr->rx_buffers[j];
3838		rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3839		if (rxbuf->m_head == NULL)
3840			panic("RX ring hdr initialization failed!\n");
3841		rxbuf->m_head->m_len = MCLBYTES;
3842		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
3843		rxbuf->m_head->m_pkthdr.len = MCLBYTES;
3844
3845		/* Get the memory mapping */
3846		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3847		    rxbuf->map, rxbuf->m_head, seg,
3848		    &nsegs, BUS_DMA_NOWAIT);
3849		if (error != 0)
3850			panic("RX ring dma initialization failed!\n");
3851		bus_dmamap_sync(rxr->rxtag,
3852		    rxbuf->map, BUS_DMASYNC_PREREAD);
3853
3854		/* Update descriptor */
3855		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
3856	}
3857
3858
3859	/* Setup our descriptor indices */
3860	rxr->next_to_check = 0;
3861	rxr->next_to_refresh = 0;
3862
3863	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3864	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3865
3866	EM_RX_UNLOCK(rxr);
3867	return (0);
3868}
3869
3870/*********************************************************************
3871 *
3872 *  Initialize all receive rings.
3873 *
3874 **********************************************************************/
3875static int
3876em_setup_receive_structures(struct adapter *adapter)
3877{
3878	struct rx_ring *rxr = adapter->rx_rings;
3879	int j;
3880
3881	for (j = 0; j < adapter->num_queues; j++, rxr++)
3882		if (em_setup_receive_ring(rxr))
3883			goto fail;
3884
3885	return (0);
3886fail:
3887	/*
3888	 * Free RX buffers allocated so far, we will only handle
3889	 * the rings that completed, the failing case will have
3890	 * cleaned up for itself. 'j' failed, so its the terminus.
3891	 */
3892	for (int i = 0; i < j; ++i) {
3893		rxr = &adapter->rx_rings[i];
3894		for (int n = 0; n < adapter->num_rx_desc; n++) {
3895			struct em_buffer *rxbuf;
3896			rxbuf = &rxr->rx_buffers[n];
3897			if (rxbuf->m_head != NULL) {
3898				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3899			  	  BUS_DMASYNC_POSTREAD);
3900				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3901				m_freem(rxbuf->m_head);
3902				rxbuf->m_head = NULL;
3903			}
3904		}
3905	}
3906
3907	return (ENOBUFS);
3908}
3909
3910/*********************************************************************
3911 *
3912 *  Free all receive rings.
3913 *
3914 **********************************************************************/
3915static void
3916em_free_receive_structures(struct adapter *adapter)
3917{
3918	struct rx_ring *rxr = adapter->rx_rings;
3919
3920	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3921		em_free_receive_buffers(rxr);
3922		/* Free the ring memory as well */
3923		em_dma_free(adapter, &rxr->rxdma);
3924		EM_RX_LOCK_DESTROY(rxr);
3925	}
3926
3927	free(adapter->rx_rings, M_DEVBUF);
3928}
3929
3930
3931/*********************************************************************
3932 *
3933 *  Free receive ring data structures
3934 *
3935 **********************************************************************/
3936static void
3937em_free_receive_buffers(struct rx_ring *rxr)
3938{
3939	struct adapter		*adapter = rxr->adapter;
3940	struct em_buffer	*rxbuf = NULL;
3941
3942	INIT_DEBUGOUT("free_receive_buffers: begin");
3943
3944	if (rxr->rx_sparemap) {
3945		bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap);
3946		rxr->rx_sparemap = NULL;
3947	}
3948
3949	if (rxr->rx_buffers != NULL) {
3950		for (int i = 0; i < adapter->num_rx_desc; i++) {
3951			rxbuf = &rxr->rx_buffers[i];
3952			if (rxbuf->map != NULL) {
3953				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3954				    BUS_DMASYNC_POSTREAD);
3955				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3956				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
3957			}
3958			if (rxbuf->m_head != NULL) {
3959				m_freem(rxbuf->m_head);
3960				rxbuf->m_head = NULL;
3961			}
3962		}
3963		free(rxr->rx_buffers, M_DEVBUF);
3964		rxr->rx_buffers = NULL;
3965	}
3966
3967	if (rxr->rxtag != NULL) {
3968		bus_dma_tag_destroy(rxr->rxtag);
3969		rxr->rxtag = NULL;
3970	}
3971
3972	return;
3973}
3974
3975
3976/*********************************************************************
3977 *
3978 *  Enable receive unit.
3979 *
3980 **********************************************************************/
3981#define MAX_INTS_PER_SEC	8000
3982#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
3983
3984static void
3985em_initialize_receive_unit(struct adapter *adapter)
3986{
3987	struct rx_ring	*rxr = adapter->rx_rings;
3988	struct ifnet	*ifp = adapter->ifp;
3989	struct e1000_hw	*hw = &adapter->hw;
3990	u64	bus_addr;
3991	u32	rctl, rxcsum;
3992
3993	INIT_DEBUGOUT("em_initialize_receive_units: begin");
3994
3995	/*
3996	 * Make sure receives are disabled while setting
3997	 * up the descriptor ring
3998	 */
3999	rctl = E1000_READ_REG(hw, E1000_RCTL);
4000	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4001
4002	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4003	    adapter->rx_abs_int_delay.value);
4004	/*
4005	 * Set the interrupt throttling rate. Value is calculated
4006	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4007	 */
4008	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4009
4010	/*
4011	** When using MSIX interrupts we need to throttle
4012	** using the EITR register (82574 only)
4013	*/
4014	if (hw->mac.type == e1000_82574)
4015		for (int i = 0; i < 4; i++)
4016			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4017			    DEFAULT_ITR);
4018
4019	/* Disable accelerated ackknowledge */
4020	if (adapter->hw.mac.type == e1000_82574)
4021		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4022
4023	if (ifp->if_capenable & IFCAP_RXCSUM) {
4024		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4025		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4026		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4027	}
4028
4029	/*
4030	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4031	** long latencies are observed, like Lenovo X60. This
4032	** change eliminates the problem, but since having positive
4033	** values in RDTR is a known source of problems on other
4034	** platforms another solution is being sought.
4035	*/
4036	if (hw->mac.type == e1000_82573)
4037		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4038
4039	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4040		/* Setup the Base and Length of the Rx Descriptor Ring */
4041		bus_addr = rxr->rxdma.dma_paddr;
4042		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4043		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4044		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4045		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4046		/* Setup the Head and Tail Descriptor Pointers */
4047		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4048		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4049	}
4050
4051	/* Setup the Receive Control Register */
4052	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4053	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4054	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4055	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4056
4057        /* Strip the CRC */
4058        rctl |= E1000_RCTL_SECRC;
4059
4060        /* Make sure VLAN Filters are off */
4061        rctl &= ~E1000_RCTL_VFE;
4062	rctl &= ~E1000_RCTL_SBP;
4063	rctl |= E1000_RCTL_SZ_2048;
4064	if (ifp->if_mtu > ETHERMTU)
4065		rctl |= E1000_RCTL_LPE;
4066	else
4067		rctl &= ~E1000_RCTL_LPE;
4068
4069	/* Write out the settings */
4070	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4071
4072	return;
4073}
4074
4075
4076/*********************************************************************
4077 *
4078 *  This routine executes in interrupt context. It replenishes
4079 *  the mbufs in the descriptor and sends data which has been
4080 *  dma'ed into host memory to upper layer.
4081 *
4082 *  We loop at most count times if count is > 0, or until done if
4083 *  count < 0.
4084 *
4085 *  For polling we also now return the number of cleaned packets
4086 *********************************************************************/
4087static bool
4088em_rxeof(struct rx_ring *rxr, int count, int *done)
4089{
4090	struct adapter		*adapter = rxr->adapter;
4091	struct ifnet		*ifp = adapter->ifp;
4092	struct mbuf		*mp, *sendmp;
4093	u8			status = 0;
4094	u16 			len;
4095	int			i, processed, rxdone = 0;
4096	bool			eop;
4097	struct e1000_rx_desc	*cur;
4098
4099	EM_RX_LOCK(rxr);
4100
4101	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4102
4103		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4104			break;
4105
4106		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4107		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4108
4109		cur = &rxr->rx_base[i];
4110		status = cur->status;
4111		mp = sendmp = NULL;
4112
4113		if ((status & E1000_RXD_STAT_DD) == 0)
4114			break;
4115
4116		len = le16toh(cur->length);
4117		eop = (status & E1000_RXD_STAT_EOP) != 0;
4118		count--;
4119
4120		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) {
4121
4122			/* Assign correct length to the current fragment */
4123			mp = rxr->rx_buffers[i].m_head;
4124			mp->m_len = len;
4125
4126			if (rxr->fmp == NULL) {
4127				mp->m_pkthdr.len = len;
4128				rxr->fmp = mp; /* Store the first mbuf */
4129				rxr->lmp = mp;
4130			} else {
4131				/* Chain mbuf's together */
4132				mp->m_flags &= ~M_PKTHDR;
4133				rxr->lmp->m_next = mp;
4134				rxr->lmp = rxr->lmp->m_next;
4135				rxr->fmp->m_pkthdr.len += len;
4136			}
4137
4138			if (eop) {
4139				rxr->fmp->m_pkthdr.rcvif = ifp;
4140				ifp->if_ipackets++;
4141				em_receive_checksum(cur, rxr->fmp);
4142#ifndef __NO_STRICT_ALIGNMENT
4143				if (adapter->max_frame_size >
4144				    (MCLBYTES - ETHER_ALIGN) &&
4145				    em_fixup_rx(rxr) != 0)
4146					goto skip;
4147#endif
4148				if (status & E1000_RXD_STAT_VP) {
4149					rxr->fmp->m_pkthdr.ether_vtag =
4150					    (le16toh(cur->special) &
4151					    E1000_RXD_SPC_VLAN_MASK);
4152					rxr->fmp->m_flags |= M_VLANTAG;
4153				}
4154#ifdef EM_MULTIQUEUE
4155				rxr->fmp->m_pkthdr.flowid = curcpu;
4156				rxr->fmp->m_flags |= M_FLOWID;
4157#endif
4158#ifndef __NO_STRICT_ALIGNMENT
4159skip:
4160#endif
4161				sendmp = rxr->fmp;
4162				rxr->fmp = NULL;
4163				rxr->lmp = NULL;
4164			}
4165		} else {
4166			ifp->if_ierrors++;
4167			/* Reuse loaded DMA map and just update mbuf chain */
4168			mp = rxr->rx_buffers[i].m_head;
4169			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
4170			mp->m_data = mp->m_ext.ext_buf;
4171			mp->m_next = NULL;
4172			if (adapter->max_frame_size <=
4173			    (MCLBYTES - ETHER_ALIGN))
4174				m_adj(mp, ETHER_ALIGN);
4175			if (rxr->fmp != NULL) {
4176				m_freem(rxr->fmp);
4177				rxr->fmp = NULL;
4178				rxr->lmp = NULL;
4179			}
4180			sendmp = NULL;
4181		}
4182
4183		/* Zero out the receive descriptors status. */
4184		cur->status = 0;
4185		++rxdone;	/* cumulative for POLL */
4186		++processed;
4187
4188		/* Advance our pointers to the next descriptor. */
4189		if (++i == adapter->num_rx_desc)
4190			i = 0;
4191
4192		/* Send to the stack */
4193		if (sendmp != NULL) {
4194			rxr->next_to_check = i;
4195			EM_RX_UNLOCK(rxr);
4196			(*ifp->if_input)(ifp, sendmp);
4197			EM_RX_LOCK(rxr);
4198			i = rxr->next_to_check;
4199		}
4200
4201		/* Only refresh mbufs every 8 descriptors */
4202		if (processed == 8) {
4203			em_refresh_mbufs(rxr, i);
4204			processed = 0;
4205		}
4206	}
4207
4208	/* Catch any remaining refresh work */
4209	if (processed != 0) {
4210		em_refresh_mbufs(rxr, i);
4211		processed = 0;
4212	}
4213
4214	rxr->next_to_check = i;
4215	if (done != NULL)
4216		*done = rxdone;
4217	EM_RX_UNLOCK(rxr);
4218
4219	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4220}
4221
4222#ifndef __NO_STRICT_ALIGNMENT
4223/*
4224 * When jumbo frames are enabled we should realign entire payload on
4225 * architecures with strict alignment. This is serious design mistake of 8254x
4226 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4227 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4228 * payload. On architecures without strict alignment restrictions 8254x still
4229 * performs unaligned memory access which would reduce the performance too.
4230 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4231 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4232 * existing mbuf chain.
4233 *
4234 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4235 * not used at all on architectures with strict alignment.
4236 */
4237static int
4238em_fixup_rx(struct rx_ring *rxr)
4239{
4240	struct adapter *adapter = rxr->adapter;
4241	struct mbuf *m, *n;
4242	int error;
4243
4244	error = 0;
4245	m = rxr->fmp;
4246	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4247		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4248		m->m_data += ETHER_HDR_LEN;
4249	} else {
4250		MGETHDR(n, M_DONTWAIT, MT_DATA);
4251		if (n != NULL) {
4252			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4253			m->m_data += ETHER_HDR_LEN;
4254			m->m_len -= ETHER_HDR_LEN;
4255			n->m_len = ETHER_HDR_LEN;
4256			M_MOVE_PKTHDR(n, m);
4257			n->m_next = m;
4258			rxr->fmp = n;
4259		} else {
4260			adapter->dropped_pkts++;
4261			m_freem(rxr->fmp);
4262			rxr->fmp = NULL;
4263			error = ENOMEM;
4264		}
4265	}
4266
4267	return (error);
4268}
4269#endif
4270
4271/*********************************************************************
4272 *
4273 *  Verify that the hardware indicated that the checksum is valid.
4274 *  Inform the stack about the status of checksum so that stack
4275 *  doesn't spend time verifying the checksum.
4276 *
4277 *********************************************************************/
4278static void
4279em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4280{
4281	/* Ignore Checksum bit is set */
4282	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4283		mp->m_pkthdr.csum_flags = 0;
4284		return;
4285	}
4286
4287	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4288		/* Did it pass? */
4289		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4290			/* IP Checksum Good */
4291			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4292			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4293
4294		} else {
4295			mp->m_pkthdr.csum_flags = 0;
4296		}
4297	}
4298
4299	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4300		/* Did it pass? */
4301		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4302			mp->m_pkthdr.csum_flags |=
4303			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4304			mp->m_pkthdr.csum_data = htons(0xffff);
4305		}
4306	}
4307}
4308
4309/*
4310 * This routine is run via an vlan
4311 * config EVENT
4312 */
4313static void
4314em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4315{
4316	struct adapter	*adapter = ifp->if_softc;
4317	u32		index, bit;
4318
4319	if (ifp->if_softc !=  arg)   /* Not our event */
4320		return;
4321
4322	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4323                return;
4324
4325	index = (vtag >> 5) & 0x7F;
4326	bit = vtag & 0x1F;
4327	em_shadow_vfta[index] |= (1 << bit);
4328	++adapter->num_vlans;
4329	/* Re-init to load the changes */
4330	em_init(adapter);
4331}
4332
4333/*
4334 * This routine is run via an vlan
4335 * unconfig EVENT
4336 */
4337static void
4338em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4339{
4340	struct adapter	*adapter = ifp->if_softc;
4341	u32		index, bit;
4342
4343	if (ifp->if_softc !=  arg)
4344		return;
4345
4346	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4347                return;
4348
4349	index = (vtag >> 5) & 0x7F;
4350	bit = vtag & 0x1F;
4351	em_shadow_vfta[index] &= ~(1 << bit);
4352	--adapter->num_vlans;
4353	/* Re-init to load the changes */
4354	em_init(adapter);
4355}
4356
4357static void
4358em_setup_vlan_hw_support(struct adapter *adapter)
4359{
4360	struct e1000_hw *hw = &adapter->hw;
4361	u32             reg;
4362
4363	/*
4364	** We get here thru init_locked, meaning
4365	** a soft reset, this has already cleared
4366	** the VFTA and other state, so if there
4367	** have been no vlan's registered do nothing.
4368	*/
4369	if (adapter->num_vlans == 0)
4370                return;
4371
4372	/*
4373	** A soft reset zero's out the VFTA, so
4374	** we need to repopulate it now.
4375	*/
4376	for (int i = 0; i < EM_VFTA_SIZE; i++)
4377                if (em_shadow_vfta[i] != 0)
4378			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4379                            i, em_shadow_vfta[i]);
4380
4381	reg = E1000_READ_REG(hw, E1000_CTRL);
4382	reg |= E1000_CTRL_VME;
4383	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4384
4385	/* Enable the Filter Table */
4386	reg = E1000_READ_REG(hw, E1000_RCTL);
4387	reg &= ~E1000_RCTL_CFIEN;
4388	reg |= E1000_RCTL_VFE;
4389	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4390
4391	/* Update the frame size */
4392	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4393	    adapter->max_frame_size + VLAN_TAG_SIZE);
4394}
4395
4396static void
4397em_enable_intr(struct adapter *adapter)
4398{
4399	struct e1000_hw *hw = &adapter->hw;
4400	u32 ims_mask = IMS_ENABLE_MASK;
4401
4402	if (hw->mac.type == e1000_82574) {
4403		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4404		ims_mask |= EM_MSIX_MASK;
4405	}
4406	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4407}
4408
4409static void
4410em_disable_intr(struct adapter *adapter)
4411{
4412	struct e1000_hw *hw = &adapter->hw;
4413
4414	if (hw->mac.type == e1000_82574)
4415		E1000_WRITE_REG(hw, EM_EIAC, 0);
4416	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4417}
4418
4419/*
4420 * Bit of a misnomer, what this really means is
4421 * to enable OS management of the system... aka
4422 * to disable special hardware management features
4423 */
4424static void
4425em_init_manageability(struct adapter *adapter)
4426{
4427	/* A shared code workaround */
4428#define E1000_82542_MANC2H E1000_MANC2H
4429	if (adapter->has_manage) {
4430		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4431		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4432
4433		/* disable hardware interception of ARP */
4434		manc &= ~(E1000_MANC_ARP_EN);
4435
4436                /* enable receiving management packets to the host */
4437		manc |= E1000_MANC_EN_MNG2HOST;
4438#define E1000_MNG2HOST_PORT_623 (1 << 5)
4439#define E1000_MNG2HOST_PORT_664 (1 << 6)
4440		manc2h |= E1000_MNG2HOST_PORT_623;
4441		manc2h |= E1000_MNG2HOST_PORT_664;
4442		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4443		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4444	}
4445}
4446
4447/*
4448 * Give control back to hardware management
4449 * controller if there is one.
4450 */
4451static void
4452em_release_manageability(struct adapter *adapter)
4453{
4454	if (adapter->has_manage) {
4455		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4456
4457		/* re-enable hardware interception of ARP */
4458		manc |= E1000_MANC_ARP_EN;
4459		manc &= ~E1000_MANC_EN_MNG2HOST;
4460
4461		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4462	}
4463}
4464
4465/*
4466 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4467 * For ASF and Pass Through versions of f/w this means
4468 * that the driver is loaded. For AMT version type f/w
4469 * this means that the network i/f is open.
4470 */
4471static void
4472em_get_hw_control(struct adapter *adapter)
4473{
4474	u32 ctrl_ext, swsm;
4475
4476	if (adapter->hw.mac.type == e1000_82573) {
4477		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4478		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4479		    swsm | E1000_SWSM_DRV_LOAD);
4480		return;
4481	}
4482	/* else */
4483	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4484	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4485	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4486	return;
4487}
4488
4489/*
4490 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4491 * For ASF and Pass Through versions of f/w this means that
4492 * the driver is no longer loaded. For AMT versions of the
4493 * f/w this means that the network i/f is closed.
4494 */
4495static void
4496em_release_hw_control(struct adapter *adapter)
4497{
4498	u32 ctrl_ext, swsm;
4499
4500	if (!adapter->has_manage)
4501		return;
4502
4503	if (adapter->hw.mac.type == e1000_82573) {
4504		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4505		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4506		    swsm & ~E1000_SWSM_DRV_LOAD);
4507		return;
4508	}
4509	/* else */
4510	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4511	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4512	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4513	return;
4514}
4515
4516static int
4517em_is_valid_ether_addr(u8 *addr)
4518{
4519	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4520
4521	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4522		return (FALSE);
4523	}
4524
4525	return (TRUE);
4526}
4527
4528/*
4529** Parse the interface capabilities with regard
4530** to both system management and wake-on-lan for
4531** later use.
4532*/
4533static void
4534em_get_wakeup(device_t dev)
4535{
4536	struct adapter	*adapter = device_get_softc(dev);
4537	u16		eeprom_data = 0, device_id, apme_mask;
4538
4539	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4540	apme_mask = EM_EEPROM_APME;
4541
4542	switch (adapter->hw.mac.type) {
4543	case e1000_82573:
4544	case e1000_82583:
4545		adapter->has_amt = TRUE;
4546		/* Falls thru */
4547	case e1000_82571:
4548	case e1000_82572:
4549	case e1000_80003es2lan:
4550		if (adapter->hw.bus.func == 1) {
4551			e1000_read_nvm(&adapter->hw,
4552			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4553			break;
4554		} else
4555			e1000_read_nvm(&adapter->hw,
4556			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4557		break;
4558	case e1000_ich8lan:
4559	case e1000_ich9lan:
4560	case e1000_ich10lan:
4561	case e1000_pchlan:
4562		apme_mask = E1000_WUC_APME;
4563		adapter->has_amt = TRUE;
4564		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4565		break;
4566	default:
4567		e1000_read_nvm(&adapter->hw,
4568		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4569		break;
4570	}
4571	if (eeprom_data & apme_mask)
4572		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4573	/*
4574         * We have the eeprom settings, now apply the special cases
4575         * where the eeprom may be wrong or the board won't support
4576         * wake on lan on a particular port
4577	 */
4578	device_id = pci_get_device(dev);
4579        switch (device_id) {
4580	case E1000_DEV_ID_82571EB_FIBER:
4581		/* Wake events only supported on port A for dual fiber
4582		 * regardless of eeprom setting */
4583		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4584		    E1000_STATUS_FUNC_1)
4585			adapter->wol = 0;
4586		break;
4587	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4588	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4589	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4590                /* if quad port adapter, disable WoL on all but port A */
4591		if (global_quad_port_a != 0)
4592			adapter->wol = 0;
4593		/* Reset for multiple quad port adapters */
4594		if (++global_quad_port_a == 4)
4595			global_quad_port_a = 0;
4596                break;
4597	}
4598	return;
4599}
4600
4601
4602/*
4603 * Enable PCI Wake On Lan capability
4604 */
4605static void
4606em_enable_wakeup(device_t dev)
4607{
4608	struct adapter	*adapter = device_get_softc(dev);
4609	struct ifnet	*ifp = adapter->ifp;
4610	u32		pmc, ctrl, ctrl_ext, rctl;
4611	u16     	status;
4612
4613	if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0))
4614		return;
4615
4616	/* Advertise the wakeup capability */
4617	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4618	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4619	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4620	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4621
4622	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4623	    (adapter->hw.mac.type == e1000_pchlan) ||
4624	    (adapter->hw.mac.type == e1000_ich9lan) ||
4625	    (adapter->hw.mac.type == e1000_ich10lan)) {
4626		e1000_disable_gig_wol_ich8lan(&adapter->hw);
4627		e1000_hv_phy_powerdown_workaround_ich8lan(&adapter->hw);
4628	}
4629
4630	/* Keep the laser running on Fiber adapters */
4631	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4632	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4633		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4635		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4636	}
4637
4638	/*
4639	** Determine type of Wakeup: note that wol
4640	** is set with all bits on by default.
4641	*/
4642	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4643		adapter->wol &= ~E1000_WUFC_MAG;
4644
4645	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4646		adapter->wol &= ~E1000_WUFC_MC;
4647	else {
4648		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4649		rctl |= E1000_RCTL_MPE;
4650		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4651	}
4652
4653	if (adapter->hw.mac.type == e1000_pchlan) {
4654		if (em_enable_phy_wakeup(adapter))
4655			return;
4656	} else {
4657		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4658		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4659	}
4660
4661	if (adapter->hw.phy.type == e1000_phy_igp_3)
4662		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4663
4664        /* Request PME */
4665        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4666	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4667	if (ifp->if_capenable & IFCAP_WOL)
4668		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4669        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4670
4671	return;
4672}
4673
4674/*
4675** WOL in the newer chipset interfaces (pchlan)
4676** require thing to be copied into the phy
4677*/
4678static int
4679em_enable_phy_wakeup(struct adapter *adapter)
4680{
4681	struct e1000_hw *hw = &adapter->hw;
4682	u32 mreg, ret = 0;
4683	u16 preg;
4684
4685	/* copy MAC RARs to PHY RARs */
4686	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4687		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4688		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4689		e1000_write_phy_reg(hw, BM_RAR_M(i),
4690		    (u16)((mreg >> 16) & 0xFFFF));
4691		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4692		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4693		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4694		    (u16)((mreg >> 16) & 0xFFFF));
4695	}
4696
4697	/* copy MAC MTA to PHY MTA */
4698	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4699		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4700		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4701		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4702		    (u16)((mreg >> 16) & 0xFFFF));
4703	}
4704
4705	/* configure PHY Rx Control register */
4706	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4707	mreg = E1000_READ_REG(hw, E1000_RCTL);
4708	if (mreg & E1000_RCTL_UPE)
4709		preg |= BM_RCTL_UPE;
4710	if (mreg & E1000_RCTL_MPE)
4711		preg |= BM_RCTL_MPE;
4712	preg &= ~(BM_RCTL_MO_MASK);
4713	if (mreg & E1000_RCTL_MO_3)
4714		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4715				<< BM_RCTL_MO_SHIFT);
4716	if (mreg & E1000_RCTL_BAM)
4717		preg |= BM_RCTL_BAM;
4718	if (mreg & E1000_RCTL_PMCF)
4719		preg |= BM_RCTL_PMCF;
4720	mreg = E1000_READ_REG(hw, E1000_CTRL);
4721	if (mreg & E1000_CTRL_RFCE)
4722		preg |= BM_RCTL_RFCE;
4723	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4724
4725	/* enable PHY wakeup in MAC register */
4726	E1000_WRITE_REG(hw, E1000_WUC,
4727	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4728	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4729
4730	/* configure and enable PHY wakeup in PHY registers */
4731	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4732	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4733
4734	/* activate PHY wakeup */
4735	ret = hw->phy.ops.acquire(hw);
4736	if (ret) {
4737		printf("Could not acquire PHY\n");
4738		return ret;
4739	}
4740	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4741	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4742	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4743	if (ret) {
4744		printf("Could not read PHY page 769\n");
4745		goto out;
4746	}
4747	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4748	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4749	if (ret)
4750		printf("Could not set PHY Host Wakeup bit\n");
4751out:
4752	hw->phy.ops.release(hw);
4753
4754	return ret;
4755}
4756
4757static void
4758em_led_func(void *arg, int onoff)
4759{
4760	struct adapter	*adapter = arg;
4761
4762	EM_CORE_LOCK(adapter);
4763	if (onoff) {
4764		e1000_setup_led(&adapter->hw);
4765		e1000_led_on(&adapter->hw);
4766	} else {
4767		e1000_led_off(&adapter->hw);
4768		e1000_cleanup_led(&adapter->hw);
4769	}
4770	EM_CORE_UNLOCK(adapter);
4771}
4772
4773/**********************************************************************
4774 *
4775 *  Update the board statistics counters.
4776 *
4777 **********************************************************************/
4778static void
4779em_update_stats_counters(struct adapter *adapter)
4780{
4781	struct ifnet   *ifp;
4782
4783	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4784	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4785		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4786		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4787	}
4788	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4789	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4790	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4791	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4792
4793	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4794	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4795	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4796	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4797	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4798	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4799	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4800	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4801	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4802	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4803	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4804	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4805	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4806	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4807	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4808	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4809	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4810	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4811	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4812	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4813
4814	/* For the 64-bit byte counters the low dword must be read first. */
4815	/* Both registers clear on the read of the high dword */
4816
4817	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4818	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4819
4820	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4821	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4822	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4823	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4824	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4825
4826	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4827	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4828
4829	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4830	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4831	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4832	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4833	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4834	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4835	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4836	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4837	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4838	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4839
4840	if (adapter->hw.mac.type >= e1000_82543) {
4841		adapter->stats.algnerrc +=
4842		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4843		adapter->stats.rxerrc +=
4844		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4845		adapter->stats.tncrs +=
4846		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4847		adapter->stats.cexterr +=
4848		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4849		adapter->stats.tsctc +=
4850		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4851		adapter->stats.tsctfc +=
4852		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4853	}
4854	ifp = adapter->ifp;
4855
4856	ifp->if_collisions = adapter->stats.colc;
4857
4858	/* Rx Errors */
4859	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4860	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4861	    adapter->stats.ruc + adapter->stats.roc +
4862	    adapter->stats.mpc + adapter->stats.cexterr;
4863
4864	/* Tx Errors */
4865	ifp->if_oerrors = adapter->stats.ecol +
4866	    adapter->stats.latecol + adapter->watchdog_events;
4867}
4868
4869
4870/**********************************************************************
4871 *
4872 *  This routine is called only when em_display_debug_stats is enabled.
4873 *  This routine provides a way to take a look at important statistics
4874 *  maintained by the driver and hardware.
4875 *
4876 **********************************************************************/
4877static void
4878em_print_debug_info(struct adapter *adapter)
4879{
4880	device_t dev = adapter->dev;
4881	u8 *hw_addr = adapter->hw.hw_addr;
4882	struct rx_ring *rxr = adapter->rx_rings;
4883	struct tx_ring *txr = adapter->tx_rings;
4884
4885	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4886	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4887	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4888	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4889	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4890	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4891	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4892	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4893	    adapter->hw.fc.high_water,
4894	    adapter->hw.fc.low_water);
4895	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4896	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4897	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4898	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4899	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4900	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4901
4902	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4903		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4904		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4905		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4906		device_printf(dev, "TX(%d) no descriptors avail event = %ld\n",
4907		    txr->me, txr->no_desc_avail);
4908		device_printf(dev, "TX(%d) MSIX IRQ Handled = %ld\n",
4909		    txr->me, txr->tx_irq);
4910		device_printf(dev, "Num Tx descriptors avail = %d\n",
4911		    txr->tx_avail);
4912		device_printf(dev, "Tx Descriptors not avail1 = %ld\n",
4913		    txr->no_desc_avail);
4914	}
4915	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4916		device_printf(dev, "RX(%d) MSIX IRQ Handled = %ld\n",
4917		    rxr->me, rxr->rx_irq);
4918		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
4919		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4920		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4921	}
4922	device_printf(dev, "Std mbuf failed = %ld\n",
4923	    adapter->mbuf_alloc_failed);
4924	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4925	    adapter->mbuf_cluster_failed);
4926	device_printf(dev, "Driver dropped packets = %ld\n",
4927	    adapter->dropped_pkts);
4928}
4929
4930static void
4931em_print_hw_stats(struct adapter *adapter)
4932{
4933	device_t dev = adapter->dev;
4934
4935	device_printf(dev, "Excessive collisions = %lld\n",
4936	    (long long)adapter->stats.ecol);
4937#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4938	device_printf(dev, "Symbol errors = %lld\n",
4939	    (long long)adapter->stats.symerrs);
4940#endif
4941	device_printf(dev, "Sequence errors = %lld\n",
4942	    (long long)adapter->stats.sec);
4943	device_printf(dev, "Defer count = %lld\n",
4944	    (long long)adapter->stats.dc);
4945	device_printf(dev, "Missed Packets = %lld\n",
4946	    (long long)adapter->stats.mpc);
4947	device_printf(dev, "Receive No Buffers = %lld\n",
4948	    (long long)adapter->stats.rnbc);
4949	/* RLEC is inaccurate on some hardware, calculate our own. */
4950	device_printf(dev, "Receive Length Errors = %lld\n",
4951	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4952	device_printf(dev, "Receive errors = %lld\n",
4953	    (long long)adapter->stats.rxerrc);
4954	device_printf(dev, "Crc errors = %lld\n",
4955	    (long long)adapter->stats.crcerrs);
4956	device_printf(dev, "Alignment errors = %lld\n",
4957	    (long long)adapter->stats.algnerrc);
4958	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4959	    (long long)adapter->stats.cexterr);
4960	device_printf(dev, "watchdog timeouts = %ld\n",
4961	    adapter->watchdog_events);
4962	device_printf(dev, "XON Rcvd = %lld\n",
4963	    (long long)adapter->stats.xonrxc);
4964	device_printf(dev, "XON Xmtd = %lld\n",
4965	    (long long)adapter->stats.xontxc);
4966	device_printf(dev, "XOFF Rcvd = %lld\n",
4967	    (long long)adapter->stats.xoffrxc);
4968	device_printf(dev, "XOFF Xmtd = %lld\n",
4969	    (long long)adapter->stats.xofftxc);
4970	device_printf(dev, "Good Packets Rcvd = %lld\n",
4971	    (long long)adapter->stats.gprc);
4972	device_printf(dev, "Good Packets Xmtd = %lld\n",
4973	    (long long)adapter->stats.gptc);
4974	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4975	    (long long)adapter->stats.tsctc);
4976	device_printf(dev, "TSO Contexts Failed = %lld\n",
4977	    (long long)adapter->stats.tsctfc);
4978}
4979
4980/**********************************************************************
4981 *
4982 *  This routine provides a way to dump out the adapter eeprom,
4983 *  often a useful debug/service tool. This only dumps the first
4984 *  32 words, stuff that matters is in that extent.
4985 *
4986 **********************************************************************/
4987static void
4988em_print_nvm_info(struct adapter *adapter)
4989{
4990	u16	eeprom_data;
4991	int	i, j, row = 0;
4992
4993	/* Its a bit crude, but it gets the job done */
4994	printf("\nInterface EEPROM Dump:\n");
4995	printf("Offset\n0x0000  ");
4996	for (i = 0, j = 0; i < 32; i++, j++) {
4997		if (j == 8) { /* Make the offset block */
4998			j = 0; ++row;
4999			printf("\n0x00%x0  ",row);
5000		}
5001		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5002		printf("%04x ", eeprom_data);
5003	}
5004	printf("\n");
5005}
5006
5007static int
5008em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5009{
5010	struct adapter *adapter;
5011	int error;
5012	int result;
5013
5014	result = -1;
5015	error = sysctl_handle_int(oidp, &result, 0, req);
5016
5017	if (error || !req->newptr)
5018		return (error);
5019
5020	if (result == 1) {
5021		adapter = (struct adapter *)arg1;
5022		em_print_debug_info(adapter);
5023	}
5024	/*
5025	 * This value will cause a hex dump of the
5026	 * first 32 16-bit words of the EEPROM to
5027	 * the screen.
5028	 */
5029	if (result == 2) {
5030		adapter = (struct adapter *)arg1;
5031		em_print_nvm_info(adapter);
5032        }
5033
5034	return (error);
5035}
5036
5037
5038static int
5039em_sysctl_stats(SYSCTL_HANDLER_ARGS)
5040{
5041	struct adapter *adapter;
5042	int error;
5043	int result;
5044
5045	result = -1;
5046	error = sysctl_handle_int(oidp, &result, 0, req);
5047
5048	if (error || !req->newptr)
5049		return (error);
5050
5051	if (result == 1) {
5052		adapter = (struct adapter *)arg1;
5053		em_print_hw_stats(adapter);
5054	}
5055
5056	return (error);
5057}
5058
5059static int
5060em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5061{
5062	struct em_int_delay_info *info;
5063	struct adapter *adapter;
5064	u32 regval;
5065	int error, usecs, ticks;
5066
5067	info = (struct em_int_delay_info *)arg1;
5068	usecs = info->value;
5069	error = sysctl_handle_int(oidp, &usecs, 0, req);
5070	if (error != 0 || req->newptr == NULL)
5071		return (error);
5072	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5073		return (EINVAL);
5074	info->value = usecs;
5075	ticks = EM_USECS_TO_TICKS(usecs);
5076
5077	adapter = info->adapter;
5078
5079	EM_CORE_LOCK(adapter);
5080	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5081	regval = (regval & ~0xffff) | (ticks & 0xffff);
5082	/* Handle a few special cases. */
5083	switch (info->offset) {
5084	case E1000_RDTR:
5085		break;
5086	case E1000_TIDV:
5087		if (ticks == 0) {
5088			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5089			/* Don't write 0 into the TIDV register. */
5090			regval++;
5091		} else
5092			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5093		break;
5094	}
5095	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5096	EM_CORE_UNLOCK(adapter);
5097	return (0);
5098}
5099
5100static void
5101em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5102	const char *description, struct em_int_delay_info *info,
5103	int offset, int value)
5104{
5105	info->adapter = adapter;
5106	info->offset = offset;
5107	info->value = value;
5108	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5109	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5110	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5111	    info, 0, em_sysctl_int_delay, "I", description);
5112}
5113
5114static void
5115em_add_rx_process_limit(struct adapter *adapter, const char *name,
5116	const char *description, int *limit, int value)
5117{
5118	*limit = value;
5119	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5120	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5121	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5122}
5123
5124
5125