if_em.c revision 235527
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 235527 2012-05-16 22:22:52Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	em_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.2";
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	/* required last entry */
176	{ 0, 0, 0, 0, 0}
177};
178
179/*********************************************************************
180 *  Table of branding strings for all supported NICs.
181 *********************************************************************/
182
183static char *em_strings[] = {
184	"Intel(R) PRO/1000 Network Connection"
185};
186
187/*********************************************************************
188 *  Function prototypes
189 *********************************************************************/
190static int	em_probe(device_t);
191static int	em_attach(device_t);
192static int	em_detach(device_t);
193static int	em_shutdown(device_t);
194static int	em_suspend(device_t);
195static int	em_resume(device_t);
196#ifdef EM_MULTIQUEUE
197static int	em_mq_start(struct ifnet *, struct mbuf *);
198static int	em_mq_start_locked(struct ifnet *,
199		    struct tx_ring *, struct mbuf *);
200static void	em_qflush(struct ifnet *);
201#else
202static void	em_start(struct ifnet *);
203static void	em_start_locked(struct ifnet *, struct tx_ring *);
204#endif
205static int	em_ioctl(struct ifnet *, u_long, caddr_t);
206static void	em_init(void *);
207static void	em_init_locked(struct adapter *);
208static void	em_stop(void *);
209static void	em_media_status(struct ifnet *, struct ifmediareq *);
210static int	em_media_change(struct ifnet *);
211static void	em_identify_hardware(struct adapter *);
212static int	em_allocate_pci_resources(struct adapter *);
213static int	em_allocate_legacy(struct adapter *);
214static int	em_allocate_msix(struct adapter *);
215static int	em_allocate_queues(struct adapter *);
216static int	em_setup_msix(struct adapter *);
217static void	em_free_pci_resources(struct adapter *);
218static void	em_local_timer(void *);
219static void	em_reset(struct adapter *);
220static int	em_setup_interface(device_t, struct adapter *);
221
222static void	em_setup_transmit_structures(struct adapter *);
223static void	em_initialize_transmit_unit(struct adapter *);
224static int	em_allocate_transmit_buffers(struct tx_ring *);
225static void	em_free_transmit_structures(struct adapter *);
226static void	em_free_transmit_buffers(struct tx_ring *);
227
228static int	em_setup_receive_structures(struct adapter *);
229static int	em_allocate_receive_buffers(struct rx_ring *);
230static void	em_initialize_receive_unit(struct adapter *);
231static void	em_free_receive_structures(struct adapter *);
232static void	em_free_receive_buffers(struct rx_ring *);
233
234static void	em_enable_intr(struct adapter *);
235static void	em_disable_intr(struct adapter *);
236static void	em_update_stats_counters(struct adapter *);
237static void	em_add_hw_stats(struct adapter *adapter);
238static void	em_txeof(struct tx_ring *);
239static bool	em_rxeof(struct rx_ring *, int, int *);
240#ifndef __NO_STRICT_ALIGNMENT
241static int	em_fixup_rx(struct rx_ring *);
242#endif
243static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245		    struct ip *, u32 *, u32 *);
246static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247		    struct tcphdr *, u32 *, u32 *);
248static void	em_set_promisc(struct adapter *);
249static void	em_disable_promisc(struct adapter *);
250static void	em_set_multi(struct adapter *);
251static void	em_update_link_status(struct adapter *);
252static void	em_refresh_mbufs(struct rx_ring *, int);
253static void	em_register_vlan(void *, struct ifnet *, u16);
254static void	em_unregister_vlan(void *, struct ifnet *, u16);
255static void	em_setup_vlan_hw_support(struct adapter *);
256static int	em_xmit(struct tx_ring *, struct mbuf **);
257static int	em_dma_malloc(struct adapter *, bus_size_t,
258		    struct em_dma_alloc *, int);
259static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
260static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261static void	em_print_nvm_info(struct adapter *);
262static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263static void	em_print_debug_info(struct adapter *);
264static int 	em_is_valid_ether_addr(u8 *);
265static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266static void	em_add_int_delay_sysctl(struct adapter *, const char *,
267		    const char *, struct em_int_delay_info *, int, int);
268/* Management and WOL Support */
269static void	em_init_manageability(struct adapter *);
270static void	em_release_manageability(struct adapter *);
271static void     em_get_hw_control(struct adapter *);
272static void     em_release_hw_control(struct adapter *);
273static void	em_get_wakeup(device_t);
274static void     em_enable_wakeup(device_t);
275static int	em_enable_phy_wakeup(struct adapter *);
276static void	em_led_func(void *, int);
277static void	em_disable_aspm(struct adapter *);
278
279static int	em_irq_fast(void *);
280
281/* MSIX handlers */
282static void	em_msix_tx(void *);
283static void	em_msix_rx(void *);
284static void	em_msix_link(void *);
285static void	em_handle_tx(void *context, int pending);
286static void	em_handle_rx(void *context, int pending);
287static void	em_handle_link(void *context, int pending);
288
289static void	em_set_sysctl_value(struct adapter *, const char *,
290		    const char *, int *, int);
291static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292
293static __inline void em_rx_discard(struct rx_ring *, int);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297#endif /* POLLING */
298
299/*********************************************************************
300 *  FreeBSD Device Interface Entry Points
301 *********************************************************************/
302
303static device_method_t em_methods[] = {
304	/* Device interface */
305	DEVMETHOD(device_probe, em_probe),
306	DEVMETHOD(device_attach, em_attach),
307	DEVMETHOD(device_detach, em_detach),
308	DEVMETHOD(device_shutdown, em_shutdown),
309	DEVMETHOD(device_suspend, em_suspend),
310	DEVMETHOD(device_resume, em_resume),
311	{0, 0}
312};
313
314static driver_t em_driver = {
315	"em", em_methods, sizeof(struct adapter),
316};
317
318devclass_t em_devclass;
319DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
320MODULE_DEPEND(em, pci, 1, 1, 1);
321MODULE_DEPEND(em, ether, 1, 1, 1);
322
323/*********************************************************************
324 *  Tunable default values.
325 *********************************************************************/
326
327#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
328#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
329#define M_TSO_LEN			66
330
331/* Allow common code without TSO */
332#ifndef CSUM_TSO
333#define CSUM_TSO	0
334#endif
335
336static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
341TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
342SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
343    0, "Default transmit interrupt delay in usecs");
344SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
345    0, "Default receive interrupt delay in usecs");
346
347static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
348static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
349TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
351SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
352    &em_tx_abs_int_delay_dflt, 0,
353    "Default transmit interrupt delay limit in usecs");
354SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
355    &em_rx_abs_int_delay_dflt, 0,
356    "Default receive interrupt delay limit in usecs");
357
358static int em_rxd = EM_DEFAULT_RXD;
359static int em_txd = EM_DEFAULT_TXD;
360TUNABLE_INT("hw.em.rxd", &em_rxd);
361TUNABLE_INT("hw.em.txd", &em_txd);
362SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
363    "Number of receive descriptors per queue");
364SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
365    "Number of transmit descriptors per queue");
366
367static int em_smart_pwr_down = FALSE;
368TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
369SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
370    0, "Set to true to leave smart power down enabled on newer adapters");
371
372/* Controls whether promiscuous also shows bad packets */
373static int em_debug_sbp = FALSE;
374TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
375SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
376    "Show bad packets in promiscuous mode");
377
378static int em_enable_msix = TRUE;
379TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
380SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
381    "Enable MSI-X interrupts");
382
383/* How many packets rxeof tries to clean at a time */
384static int em_rx_process_limit = 100;
385TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
386SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387    &em_rx_process_limit, 0,
388    "Maximum number of received packets to process "
389    "at a time, -1 means unlimited");
390
391/* Energy efficient ethernet - default to OFF */
392static int eee_setting = 0;
393TUNABLE_INT("hw.em.eee_setting", &eee_setting);
394SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
395    "Enable Energy Efficient Ethernet");
396
397/* Global used in WOL setup with multiport cards */
398static int global_quad_port_a = 0;
399
400#ifdef DEV_NETMAP	/* see ixgbe.c for details */
401#include <dev/netmap/if_em_netmap.h>
402#endif /* DEV_NETMAP */
403
404/*********************************************************************
405 *  Device identification routine
406 *
407 *  em_probe determines if the driver should be loaded on
408 *  adapter based on PCI vendor/device id of the adapter.
409 *
410 *  return BUS_PROBE_DEFAULT on success, positive on failure
411 *********************************************************************/
412
413static int
414em_probe(device_t dev)
415{
416	char		adapter_name[60];
417	u16		pci_vendor_id = 0;
418	u16		pci_device_id = 0;
419	u16		pci_subvendor_id = 0;
420	u16		pci_subdevice_id = 0;
421	em_vendor_info_t *ent;
422
423	INIT_DEBUGOUT("em_probe: begin");
424
425	pci_vendor_id = pci_get_vendor(dev);
426	if (pci_vendor_id != EM_VENDOR_ID)
427		return (ENXIO);
428
429	pci_device_id = pci_get_device(dev);
430	pci_subvendor_id = pci_get_subvendor(dev);
431	pci_subdevice_id = pci_get_subdevice(dev);
432
433	ent = em_vendor_info_array;
434	while (ent->vendor_id != 0) {
435		if ((pci_vendor_id == ent->vendor_id) &&
436		    (pci_device_id == ent->device_id) &&
437
438		    ((pci_subvendor_id == ent->subvendor_id) ||
439		    (ent->subvendor_id == PCI_ANY_ID)) &&
440
441		    ((pci_subdevice_id == ent->subdevice_id) ||
442		    (ent->subdevice_id == PCI_ANY_ID))) {
443			sprintf(adapter_name, "%s %s",
444				em_strings[ent->index],
445				em_driver_version);
446			device_set_desc_copy(dev, adapter_name);
447			return (BUS_PROBE_DEFAULT);
448		}
449		ent++;
450	}
451
452	return (ENXIO);
453}
454
455/*********************************************************************
456 *  Device initialization routine
457 *
458 *  The attach entry point is called when the driver is being loaded.
459 *  This routine identifies the type of hardware, allocates all resources
460 *  and initializes the hardware.
461 *
462 *  return 0 on success, positive on failure
463 *********************************************************************/
464
465static int
466em_attach(device_t dev)
467{
468	struct adapter	*adapter;
469	struct e1000_hw	*hw;
470	int		error = 0;
471
472	INIT_DEBUGOUT("em_attach: begin");
473
474	if (resource_disabled("em", device_get_unit(dev))) {
475		device_printf(dev, "Disabled by device hint\n");
476		return (ENXIO);
477	}
478
479	adapter = device_get_softc(dev);
480	adapter->dev = adapter->osdep.dev = dev;
481	hw = &adapter->hw;
482	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
483
484	/* SYSCTL stuff */
485	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
486	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
487	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
488	    em_sysctl_nvm_info, "I", "NVM Information");
489
490	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493	    em_sysctl_debug_info, "I", "Debug Information");
494
495	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498	    em_set_flowcntl, "I", "Flow Control");
499
500	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
501
502	/* Determine hardware and mac info */
503	em_identify_hardware(adapter);
504
505	/* Setup PCI resources */
506	if (em_allocate_pci_resources(adapter)) {
507		device_printf(dev, "Allocation of PCI resources failed\n");
508		error = ENXIO;
509		goto err_pci;
510	}
511
512	/*
513	** For ICH8 and family we need to
514	** map the flash memory, and this
515	** must happen after the MAC is
516	** identified
517	*/
518	if ((hw->mac.type == e1000_ich8lan) ||
519	    (hw->mac.type == e1000_ich9lan) ||
520	    (hw->mac.type == e1000_ich10lan) ||
521	    (hw->mac.type == e1000_pchlan) ||
522	    (hw->mac.type == e1000_pch2lan)) {
523		int rid = EM_BAR_TYPE_FLASH;
524		adapter->flash = bus_alloc_resource_any(dev,
525		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
526		if (adapter->flash == NULL) {
527			device_printf(dev, "Mapping of Flash failed\n");
528			error = ENXIO;
529			goto err_pci;
530		}
531		/* This is used in the shared code */
532		hw->flash_address = (u8 *)adapter->flash;
533		adapter->osdep.flash_bus_space_tag =
534		    rman_get_bustag(adapter->flash);
535		adapter->osdep.flash_bus_space_handle =
536		    rman_get_bushandle(adapter->flash);
537	}
538
539	/* Do Shared Code initialization */
540	if (e1000_setup_init_funcs(hw, TRUE)) {
541		device_printf(dev, "Setup of Shared code failed\n");
542		error = ENXIO;
543		goto err_pci;
544	}
545
546	e1000_get_bus_info(hw);
547
548	/* Set up some sysctls for the tunable interrupt delays */
549	em_add_int_delay_sysctl(adapter, "rx_int_delay",
550	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
551	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
552	em_add_int_delay_sysctl(adapter, "tx_int_delay",
553	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
554	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
555	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
556	    "receive interrupt delay limit in usecs",
557	    &adapter->rx_abs_int_delay,
558	    E1000_REGISTER(hw, E1000_RADV),
559	    em_rx_abs_int_delay_dflt);
560	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
561	    "transmit interrupt delay limit in usecs",
562	    &adapter->tx_abs_int_delay,
563	    E1000_REGISTER(hw, E1000_TADV),
564	    em_tx_abs_int_delay_dflt);
565
566	/* Sysctl for limiting the amount of work done in the taskqueue */
567	em_set_sysctl_value(adapter, "rx_processing_limit",
568	    "max number of rx packets to process", &adapter->rx_process_limit,
569	    em_rx_process_limit);
570
571	/*
572	 * Validate number of transmit and receive descriptors. It
573	 * must not exceed hardware maximum, and must be multiple
574	 * of E1000_DBA_ALIGN.
575	 */
576	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
577	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
578		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
579		    EM_DEFAULT_TXD, em_txd);
580		adapter->num_tx_desc = EM_DEFAULT_TXD;
581	} else
582		adapter->num_tx_desc = em_txd;
583
584	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
585	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
586		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
587		    EM_DEFAULT_RXD, em_rxd);
588		adapter->num_rx_desc = EM_DEFAULT_RXD;
589	} else
590		adapter->num_rx_desc = em_rxd;
591
592	hw->mac.autoneg = DO_AUTO_NEG;
593	hw->phy.autoneg_wait_to_complete = FALSE;
594	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
595
596	/* Copper options */
597	if (hw->phy.media_type == e1000_media_type_copper) {
598		hw->phy.mdix = AUTO_ALL_MODES;
599		hw->phy.disable_polarity_correction = FALSE;
600		hw->phy.ms_type = EM_MASTER_SLAVE;
601	}
602
603	/*
604	 * Set the frame limits assuming
605	 * standard ethernet sized frames.
606	 */
607	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
608	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
609
610	/*
611	 * This controls when hardware reports transmit completion
612	 * status.
613	 */
614	hw->mac.report_tx_early = 1;
615
616	/*
617	** Get queue/ring memory
618	*/
619	if (em_allocate_queues(adapter)) {
620		error = ENOMEM;
621		goto err_pci;
622	}
623
624	/* Allocate multicast array memory. */
625	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
626	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
627	if (adapter->mta == NULL) {
628		device_printf(dev, "Can not allocate multicast setup array\n");
629		error = ENOMEM;
630		goto err_late;
631	}
632
633	/* Check SOL/IDER usage */
634	if (e1000_check_reset_block(hw))
635		device_printf(dev, "PHY reset is blocked"
636		    " due to SOL/IDER session.\n");
637
638	/* Sysctl for setting Energy Efficient Ethernet */
639	em_set_sysctl_value(adapter, "eee_control",
640	    "enable Energy Efficient Ethernet",
641	    &hw->dev_spec.ich8lan.eee_disable, eee_setting);
642
643	/*
644	** Start from a known state, this is
645	** important in reading the nvm and
646	** mac from that.
647	*/
648	e1000_reset_hw(hw);
649
650
651	/* Make sure we have a good EEPROM before we read from it */
652	if (e1000_validate_nvm_checksum(hw) < 0) {
653		/*
654		** Some PCI-E parts fail the first check due to
655		** the link being in sleep state, call it again,
656		** if it fails a second time its a real issue.
657		*/
658		if (e1000_validate_nvm_checksum(hw) < 0) {
659			device_printf(dev,
660			    "The EEPROM Checksum Is Not Valid\n");
661			error = EIO;
662			goto err_late;
663		}
664	}
665
666	/* Copy the permanent MAC address out of the EEPROM */
667	if (e1000_read_mac_addr(hw) < 0) {
668		device_printf(dev, "EEPROM read error while reading MAC"
669		    " address\n");
670		error = EIO;
671		goto err_late;
672	}
673
674	if (!em_is_valid_ether_addr(hw->mac.addr)) {
675		device_printf(dev, "Invalid MAC address\n");
676		error = EIO;
677		goto err_late;
678	}
679
680	/*
681	**  Do interrupt configuration
682	*/
683	if (adapter->msix > 1) /* Do MSIX */
684		error = em_allocate_msix(adapter);
685	else  /* MSI or Legacy */
686		error = em_allocate_legacy(adapter);
687	if (error)
688		goto err_late;
689
690	/*
691	 * Get Wake-on-Lan and Management info for later use
692	 */
693	em_get_wakeup(dev);
694
695	/* Setup OS specific network interface */
696	if (em_setup_interface(dev, adapter) != 0)
697		goto err_late;
698
699	em_reset(adapter);
700
701	/* Initialize statistics */
702	em_update_stats_counters(adapter);
703
704	hw->mac.get_link_status = 1;
705	em_update_link_status(adapter);
706
707	/* Register for VLAN events */
708	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
709	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
710	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
711	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
712
713	em_add_hw_stats(adapter);
714
715	/* Non-AMT based hardware can now take control from firmware */
716	if (adapter->has_manage && !adapter->has_amt)
717		em_get_hw_control(adapter);
718
719	/* Tell the stack that the interface is not active */
720	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
721	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
722
723	adapter->led_dev = led_create(em_led_func, adapter,
724	    device_get_nameunit(dev));
725#ifdef DEV_NETMAP
726	em_netmap_attach(adapter);
727#endif /* DEV_NETMAP */
728
729	INIT_DEBUGOUT("em_attach: end");
730
731	return (0);
732
733err_late:
734	em_free_transmit_structures(adapter);
735	em_free_receive_structures(adapter);
736	em_release_hw_control(adapter);
737	if (adapter->ifp != NULL)
738		if_free(adapter->ifp);
739err_pci:
740	em_free_pci_resources(adapter);
741	free(adapter->mta, M_DEVBUF);
742	EM_CORE_LOCK_DESTROY(adapter);
743
744	return (error);
745}
746
747/*********************************************************************
748 *  Device removal routine
749 *
750 *  The detach entry point is called when the driver is being removed.
751 *  This routine stops the adapter and deallocates all the resources
752 *  that were allocated for driver operation.
753 *
754 *  return 0 on success, positive on failure
755 *********************************************************************/
756
757static int
758em_detach(device_t dev)
759{
760	struct adapter	*adapter = device_get_softc(dev);
761	struct ifnet	*ifp = adapter->ifp;
762
763	INIT_DEBUGOUT("em_detach: begin");
764
765	/* Make sure VLANS are not using driver */
766	if (adapter->ifp->if_vlantrunk != NULL) {
767		device_printf(dev,"Vlan in use, detach first\n");
768		return (EBUSY);
769	}
770
771#ifdef DEVICE_POLLING
772	if (ifp->if_capenable & IFCAP_POLLING)
773		ether_poll_deregister(ifp);
774#endif
775
776	if (adapter->led_dev != NULL)
777		led_destroy(adapter->led_dev);
778
779	EM_CORE_LOCK(adapter);
780	adapter->in_detach = 1;
781	em_stop(adapter);
782	EM_CORE_UNLOCK(adapter);
783	EM_CORE_LOCK_DESTROY(adapter);
784
785	e1000_phy_hw_reset(&adapter->hw);
786
787	em_release_manageability(adapter);
788	em_release_hw_control(adapter);
789
790	/* Unregister VLAN events */
791	if (adapter->vlan_attach != NULL)
792		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793	if (adapter->vlan_detach != NULL)
794		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796	ether_ifdetach(adapter->ifp);
797	callout_drain(&adapter->timer);
798
799#ifdef DEV_NETMAP
800	netmap_detach(ifp);
801#endif /* DEV_NETMAP */
802
803	em_free_pci_resources(adapter);
804	bus_generic_detach(dev);
805	if_free(ifp);
806
807	em_free_transmit_structures(adapter);
808	em_free_receive_structures(adapter);
809
810	em_release_hw_control(adapter);
811	free(adapter->mta, M_DEVBUF);
812
813	return (0);
814}
815
816/*********************************************************************
817 *
818 *  Shutdown entry point
819 *
820 **********************************************************************/
821
822static int
823em_shutdown(device_t dev)
824{
825	return em_suspend(dev);
826}
827
828/*
829 * Suspend/resume device methods.
830 */
831static int
832em_suspend(device_t dev)
833{
834	struct adapter *adapter = device_get_softc(dev);
835
836	EM_CORE_LOCK(adapter);
837
838        em_release_manageability(adapter);
839	em_release_hw_control(adapter);
840	em_enable_wakeup(dev);
841
842	EM_CORE_UNLOCK(adapter);
843
844	return bus_generic_suspend(dev);
845}
846
847static int
848em_resume(device_t dev)
849{
850	struct adapter *adapter = device_get_softc(dev);
851	struct tx_ring	*txr = adapter->tx_rings;
852	struct ifnet *ifp = adapter->ifp;
853
854	EM_CORE_LOCK(adapter);
855	if (adapter->hw.mac.type == e1000_pch2lan)
856		e1000_resume_workarounds_pchlan(&adapter->hw);
857	em_init_locked(adapter);
858	em_init_manageability(adapter);
859
860	if ((ifp->if_flags & IFF_UP) &&
861	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
862		for (int i = 0; i < adapter->num_queues; i++, txr++) {
863			EM_TX_LOCK(txr);
864#ifdef EM_MULTIQUEUE
865			if (!drbr_empty(ifp, txr->br))
866				em_mq_start_locked(ifp, txr, NULL);
867#else
868			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
869				em_start_locked(ifp, txr);
870#endif
871			EM_TX_UNLOCK(txr);
872		}
873	}
874	EM_CORE_UNLOCK(adapter);
875
876	return bus_generic_resume(dev);
877}
878
879
880#ifdef EM_MULTIQUEUE
881/*********************************************************************
882 *  Multiqueue Transmit routines
883 *
884 *  em_mq_start is called by the stack to initiate a transmit.
885 *  however, if busy the driver can queue the request rather
886 *  than do an immediate send. It is this that is an advantage
887 *  in this driver, rather than also having multiple tx queues.
888 **********************************************************************/
889static int
890em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
891{
892	struct adapter  *adapter = txr->adapter;
893        struct mbuf     *next;
894        int             err = 0, enq = 0;
895
896	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
897	    IFF_DRV_RUNNING || adapter->link_active == 0) {
898		if (m != NULL)
899			err = drbr_enqueue(ifp, txr->br, m);
900		return (err);
901	}
902
903	enq = 0;
904	if (m == NULL) {
905		next = drbr_dequeue(ifp, txr->br);
906	} else if (drbr_needs_enqueue(ifp, txr->br)) {
907		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
908			return (err);
909		next = drbr_dequeue(ifp, txr->br);
910	} else
911		next = m;
912
913	/* Process the queue */
914	while (next != NULL) {
915		if ((err = em_xmit(txr, &next)) != 0) {
916                        if (next != NULL)
917                                err = drbr_enqueue(ifp, txr->br, next);
918                        break;
919		}
920		enq++;
921		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
922		ETHER_BPF_MTAP(ifp, next);
923		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
924                        break;
925		next = drbr_dequeue(ifp, txr->br);
926	}
927
928	if (enq > 0) {
929                /* Set the watchdog */
930                txr->queue_status = EM_QUEUE_WORKING;
931		txr->watchdog_time = ticks;
932	}
933
934	if (txr->tx_avail < EM_MAX_SCATTER)
935		em_txeof(txr);
936	if (txr->tx_avail < EM_MAX_SCATTER)
937		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
938	return (err);
939}
940
941/*
942** Multiqueue capable stack interface
943*/
944static int
945em_mq_start(struct ifnet *ifp, struct mbuf *m)
946{
947	struct adapter	*adapter = ifp->if_softc;
948	struct tx_ring	*txr = adapter->tx_rings;
949	int 		error;
950
951	if (EM_TX_TRYLOCK(txr)) {
952		error = em_mq_start_locked(ifp, txr, m);
953		EM_TX_UNLOCK(txr);
954	} else
955		error = drbr_enqueue(ifp, txr->br, m);
956
957	return (error);
958}
959
960/*
961** Flush all ring buffers
962*/
963static void
964em_qflush(struct ifnet *ifp)
965{
966	struct adapter  *adapter = ifp->if_softc;
967	struct tx_ring  *txr = adapter->tx_rings;
968	struct mbuf     *m;
969
970	for (int i = 0; i < adapter->num_queues; i++, txr++) {
971		EM_TX_LOCK(txr);
972		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
973			m_freem(m);
974		EM_TX_UNLOCK(txr);
975	}
976	if_qflush(ifp);
977}
978#else  /* !EM_MULTIQUEUE */
979
980static void
981em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982{
983	struct adapter	*adapter = ifp->if_softc;
984	struct mbuf	*m_head;
985
986	EM_TX_LOCK_ASSERT(txr);
987
988	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
989	    IFF_DRV_RUNNING)
990		return;
991
992	if (!adapter->link_active)
993		return;
994
995	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
996        	/* Call cleanup if number of TX descriptors low */
997		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
998			em_txeof(txr);
999		if (txr->tx_avail < EM_MAX_SCATTER) {
1000			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1001			break;
1002		}
1003                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1004		if (m_head == NULL)
1005			break;
1006		/*
1007		 *  Encapsulation can modify our pointer, and or make it
1008		 *  NULL on failure.  In that event, we can't requeue.
1009		 */
1010		if (em_xmit(txr, &m_head)) {
1011			if (m_head == NULL)
1012				break;
1013			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1014			break;
1015		}
1016
1017		/* Send a copy of the frame to the BPF listener */
1018		ETHER_BPF_MTAP(ifp, m_head);
1019
1020		/* Set timeout in case hardware has problems transmitting. */
1021		txr->watchdog_time = ticks;
1022                txr->queue_status = EM_QUEUE_WORKING;
1023	}
1024
1025	return;
1026}
1027
1028static void
1029em_start(struct ifnet *ifp)
1030{
1031	struct adapter	*adapter = ifp->if_softc;
1032	struct tx_ring	*txr = adapter->tx_rings;
1033
1034	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1035		EM_TX_LOCK(txr);
1036		em_start_locked(ifp, txr);
1037		EM_TX_UNLOCK(txr);
1038	}
1039	return;
1040}
1041#endif /* EM_MULTIQUEUE */
1042
1043/*********************************************************************
1044 *  Ioctl entry point
1045 *
1046 *  em_ioctl is called when the user wants to configure the
1047 *  interface.
1048 *
1049 *  return 0 on success, positive on failure
1050 **********************************************************************/
1051
1052static int
1053em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054{
1055	struct adapter	*adapter = ifp->if_softc;
1056	struct ifreq	*ifr = (struct ifreq *)data;
1057#if defined(INET) || defined(INET6)
1058	struct ifaddr	*ifa = (struct ifaddr *)data;
1059#endif
1060	bool		avoid_reset = FALSE;
1061	int		error = 0;
1062
1063	if (adapter->in_detach)
1064		return (error);
1065
1066	switch (command) {
1067	case SIOCSIFADDR:
1068#ifdef INET
1069		if (ifa->ifa_addr->sa_family == AF_INET)
1070			avoid_reset = TRUE;
1071#endif
1072#ifdef INET6
1073		if (ifa->ifa_addr->sa_family == AF_INET6)
1074			avoid_reset = TRUE;
1075#endif
1076		/*
1077		** Calling init results in link renegotiation,
1078		** so we avoid doing it when possible.
1079		*/
1080		if (avoid_reset) {
1081			ifp->if_flags |= IFF_UP;
1082			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083				em_init(adapter);
1084#ifdef INET
1085			if (!(ifp->if_flags & IFF_NOARP))
1086				arp_ifinit(ifp, ifa);
1087#endif
1088		} else
1089			error = ether_ioctl(ifp, command, data);
1090		break;
1091	case SIOCSIFMTU:
1092	    {
1093		int max_frame_size;
1094
1095		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096
1097		EM_CORE_LOCK(adapter);
1098		switch (adapter->hw.mac.type) {
1099		case e1000_82571:
1100		case e1000_82572:
1101		case e1000_ich9lan:
1102		case e1000_ich10lan:
1103		case e1000_pch2lan:
1104		case e1000_82574:
1105		case e1000_82583:
1106		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1107			max_frame_size = 9234;
1108			break;
1109		case e1000_pchlan:
1110			max_frame_size = 4096;
1111			break;
1112			/* Adapters that do not support jumbo frames */
1113		case e1000_ich8lan:
1114			max_frame_size = ETHER_MAX_LEN;
1115			break;
1116		default:
1117			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1118		}
1119		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120		    ETHER_CRC_LEN) {
1121			EM_CORE_UNLOCK(adapter);
1122			error = EINVAL;
1123			break;
1124		}
1125
1126		ifp->if_mtu = ifr->ifr_mtu;
1127		adapter->max_frame_size =
1128		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129		em_init_locked(adapter);
1130		EM_CORE_UNLOCK(adapter);
1131		break;
1132	    }
1133	case SIOCSIFFLAGS:
1134		IOCTL_DEBUGOUT("ioctl rcv'd:\
1135		    SIOCSIFFLAGS (Set Interface Flags)");
1136		EM_CORE_LOCK(adapter);
1137		if (ifp->if_flags & IFF_UP) {
1138			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139				if ((ifp->if_flags ^ adapter->if_flags) &
1140				    (IFF_PROMISC | IFF_ALLMULTI)) {
1141					em_disable_promisc(adapter);
1142					em_set_promisc(adapter);
1143				}
1144			} else
1145				em_init_locked(adapter);
1146		} else
1147			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148				em_stop(adapter);
1149		adapter->if_flags = ifp->if_flags;
1150		EM_CORE_UNLOCK(adapter);
1151		break;
1152	case SIOCADDMULTI:
1153	case SIOCDELMULTI:
1154		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156			EM_CORE_LOCK(adapter);
1157			em_disable_intr(adapter);
1158			em_set_multi(adapter);
1159#ifdef DEVICE_POLLING
1160			if (!(ifp->if_capenable & IFCAP_POLLING))
1161#endif
1162				em_enable_intr(adapter);
1163			EM_CORE_UNLOCK(adapter);
1164		}
1165		break;
1166	case SIOCSIFMEDIA:
1167		/* Check SOL/IDER usage */
1168		EM_CORE_LOCK(adapter);
1169		if (e1000_check_reset_block(&adapter->hw)) {
1170			EM_CORE_UNLOCK(adapter);
1171			device_printf(adapter->dev, "Media change is"
1172			    " blocked due to SOL/IDER session.\n");
1173			break;
1174		}
1175		EM_CORE_UNLOCK(adapter);
1176		/* falls thru */
1177	case SIOCGIFMEDIA:
1178		IOCTL_DEBUGOUT("ioctl rcv'd: \
1179		    SIOCxIFMEDIA (Get/Set Interface Media)");
1180		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1181		break;
1182	case SIOCSIFCAP:
1183	    {
1184		int mask, reinit;
1185
1186		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1187		reinit = 0;
1188		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1189#ifdef DEVICE_POLLING
1190		if (mask & IFCAP_POLLING) {
1191			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1192				error = ether_poll_register(em_poll, ifp);
1193				if (error)
1194					return (error);
1195				EM_CORE_LOCK(adapter);
1196				em_disable_intr(adapter);
1197				ifp->if_capenable |= IFCAP_POLLING;
1198				EM_CORE_UNLOCK(adapter);
1199			} else {
1200				error = ether_poll_deregister(ifp);
1201				/* Enable interrupt even in error case */
1202				EM_CORE_LOCK(adapter);
1203				em_enable_intr(adapter);
1204				ifp->if_capenable &= ~IFCAP_POLLING;
1205				EM_CORE_UNLOCK(adapter);
1206			}
1207		}
1208#endif
1209		if (mask & IFCAP_HWCSUM) {
1210			ifp->if_capenable ^= IFCAP_HWCSUM;
1211			reinit = 1;
1212		}
1213		if (mask & IFCAP_TSO4) {
1214			ifp->if_capenable ^= IFCAP_TSO4;
1215			reinit = 1;
1216		}
1217		if (mask & IFCAP_VLAN_HWTAGGING) {
1218			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1219			reinit = 1;
1220		}
1221		if (mask & IFCAP_VLAN_HWFILTER) {
1222			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1223			reinit = 1;
1224		}
1225		if (mask & IFCAP_VLAN_HWTSO) {
1226			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1227			reinit = 1;
1228		}
1229		if ((mask & IFCAP_WOL) &&
1230		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1231			if (mask & IFCAP_WOL_MCAST)
1232				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1233			if (mask & IFCAP_WOL_MAGIC)
1234				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1235		}
1236		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237			em_init(adapter);
1238		VLAN_CAPABILITIES(ifp);
1239		break;
1240	    }
1241
1242	default:
1243		error = ether_ioctl(ifp, command, data);
1244		break;
1245	}
1246
1247	return (error);
1248}
1249
1250
1251/*********************************************************************
1252 *  Init entry point
1253 *
1254 *  This routine is used in two ways. It is used by the stack as
1255 *  init entry point in network interface structure. It is also used
1256 *  by the driver as a hw/sw initialization routine to get to a
1257 *  consistent state.
1258 *
1259 *  return 0 on success, positive on failure
1260 **********************************************************************/
1261
1262static void
1263em_init_locked(struct adapter *adapter)
1264{
1265	struct ifnet	*ifp = adapter->ifp;
1266	device_t	dev = adapter->dev;
1267
1268	INIT_DEBUGOUT("em_init: begin");
1269
1270	EM_CORE_LOCK_ASSERT(adapter);
1271
1272	em_disable_intr(adapter);
1273	callout_stop(&adapter->timer);
1274
1275	/* Get the latest mac address, User can use a LAA */
1276        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277              ETHER_ADDR_LEN);
1278
1279	/* Put the address into the Receive Address Array */
1280	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281
1282	/*
1283	 * With the 82571 adapter, RAR[0] may be overwritten
1284	 * when the other port is reset, we make a duplicate
1285	 * in RAR[14] for that eventuality, this assures
1286	 * the interface continues to function.
1287	 */
1288	if (adapter->hw.mac.type == e1000_82571) {
1289		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1290		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1291		    E1000_RAR_ENTRIES - 1);
1292	}
1293
1294	/* Initialize the hardware */
1295	em_reset(adapter);
1296	em_update_link_status(adapter);
1297
1298	/* Setup VLAN support, basic and offload if available */
1299	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1300
1301	/* Set hardware offload abilities */
1302	ifp->if_hwassist = 0;
1303	if (ifp->if_capenable & IFCAP_TXCSUM)
1304		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1305	if (ifp->if_capenable & IFCAP_TSO4)
1306		ifp->if_hwassist |= CSUM_TSO;
1307
1308	/* Configure for OS presence */
1309	em_init_manageability(adapter);
1310
1311	/* Prepare transmit descriptors and buffers */
1312	em_setup_transmit_structures(adapter);
1313	em_initialize_transmit_unit(adapter);
1314
1315	/* Setup Multicast table */
1316	em_set_multi(adapter);
1317
1318	/*
1319	** Figure out the desired mbuf
1320	** pool for doing jumbos
1321	*/
1322	if (adapter->max_frame_size <= 2048)
1323		adapter->rx_mbuf_sz = MCLBYTES;
1324	else if (adapter->max_frame_size <= 4096)
1325		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1326	else
1327		adapter->rx_mbuf_sz = MJUM9BYTES;
1328
1329	/* Prepare receive descriptors and buffers */
1330	if (em_setup_receive_structures(adapter)) {
1331		device_printf(dev, "Could not setup receive structures\n");
1332		em_stop(adapter);
1333		return;
1334	}
1335	em_initialize_receive_unit(adapter);
1336
1337	/* Use real VLAN Filter support? */
1338	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1339		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1340			/* Use real VLAN Filter support */
1341			em_setup_vlan_hw_support(adapter);
1342		else {
1343			u32 ctrl;
1344			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1345			ctrl |= E1000_CTRL_VME;
1346			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1347		}
1348	}
1349
1350	/* Don't lose promiscuous settings */
1351	em_set_promisc(adapter);
1352
1353	/* Set the interface as ACTIVE */
1354	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1355	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1356
1357	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1358	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1359
1360	/* MSI/X configuration for 82574 */
1361	if (adapter->hw.mac.type == e1000_82574) {
1362		int tmp;
1363		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1364		tmp |= E1000_CTRL_EXT_PBA_CLR;
1365		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1366		/* Set the IVAR - interrupt vector routing. */
1367		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1368	}
1369
1370#ifdef DEVICE_POLLING
1371	/*
1372	 * Only enable interrupts if we are not polling, make sure
1373	 * they are off otherwise.
1374	 */
1375	if (ifp->if_capenable & IFCAP_POLLING)
1376		em_disable_intr(adapter);
1377	else
1378#endif /* DEVICE_POLLING */
1379		em_enable_intr(adapter);
1380
1381	/* AMT based hardware can now take control from firmware */
1382	if (adapter->has_manage && adapter->has_amt)
1383		em_get_hw_control(adapter);
1384}
1385
1386static void
1387em_init(void *arg)
1388{
1389	struct adapter *adapter = arg;
1390
1391	EM_CORE_LOCK(adapter);
1392	em_init_locked(adapter);
1393	EM_CORE_UNLOCK(adapter);
1394}
1395
1396
1397#ifdef DEVICE_POLLING
1398/*********************************************************************
1399 *
1400 *  Legacy polling routine: note this only works with single queue
1401 *
1402 *********************************************************************/
1403static int
1404em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1405{
1406	struct adapter *adapter = ifp->if_softc;
1407	struct tx_ring	*txr = adapter->tx_rings;
1408	struct rx_ring	*rxr = adapter->rx_rings;
1409	u32		reg_icr;
1410	int		rx_done;
1411
1412	EM_CORE_LOCK(adapter);
1413	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1414		EM_CORE_UNLOCK(adapter);
1415		return (0);
1416	}
1417
1418	if (cmd == POLL_AND_CHECK_STATUS) {
1419		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1420		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1421			callout_stop(&adapter->timer);
1422			adapter->hw.mac.get_link_status = 1;
1423			em_update_link_status(adapter);
1424			callout_reset(&adapter->timer, hz,
1425			    em_local_timer, adapter);
1426		}
1427	}
1428	EM_CORE_UNLOCK(adapter);
1429
1430	em_rxeof(rxr, count, &rx_done);
1431
1432	EM_TX_LOCK(txr);
1433	em_txeof(txr);
1434#ifdef EM_MULTIQUEUE
1435	if (!drbr_empty(ifp, txr->br))
1436		em_mq_start_locked(ifp, txr, NULL);
1437#else
1438	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1439		em_start_locked(ifp, txr);
1440#endif
1441	EM_TX_UNLOCK(txr);
1442
1443	return (rx_done);
1444}
1445#endif /* DEVICE_POLLING */
1446
1447
1448/*********************************************************************
1449 *
1450 *  Fast Legacy/MSI Combined Interrupt Service routine
1451 *
1452 *********************************************************************/
1453static int
1454em_irq_fast(void *arg)
1455{
1456	struct adapter	*adapter = arg;
1457	struct ifnet	*ifp;
1458	u32		reg_icr;
1459
1460	ifp = adapter->ifp;
1461
1462	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1463
1464	/* Hot eject?  */
1465	if (reg_icr == 0xffffffff)
1466		return FILTER_STRAY;
1467
1468	/* Definitely not our interrupt.  */
1469	if (reg_icr == 0x0)
1470		return FILTER_STRAY;
1471
1472	/*
1473	 * Starting with the 82571 chip, bit 31 should be used to
1474	 * determine whether the interrupt belongs to us.
1475	 */
1476	if (adapter->hw.mac.type >= e1000_82571 &&
1477	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1478		return FILTER_STRAY;
1479
1480	em_disable_intr(adapter);
1481	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1482
1483	/* Link status change */
1484	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1485		adapter->hw.mac.get_link_status = 1;
1486		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1487	}
1488
1489	if (reg_icr & E1000_ICR_RXO)
1490		adapter->rx_overruns++;
1491	return FILTER_HANDLED;
1492}
1493
1494/* Combined RX/TX handler, used by Legacy and MSI */
1495static void
1496em_handle_que(void *context, int pending)
1497{
1498	struct adapter	*adapter = context;
1499	struct ifnet	*ifp = adapter->ifp;
1500	struct tx_ring	*txr = adapter->tx_rings;
1501	struct rx_ring	*rxr = adapter->rx_rings;
1502
1503
1504	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1505		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1506		EM_TX_LOCK(txr);
1507		em_txeof(txr);
1508#ifdef EM_MULTIQUEUE
1509		if (!drbr_empty(ifp, txr->br))
1510			em_mq_start_locked(ifp, txr, NULL);
1511#else
1512		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1513			em_start_locked(ifp, txr);
1514#endif
1515		EM_TX_UNLOCK(txr);
1516		if (more) {
1517			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1518			return;
1519		}
1520	}
1521
1522	em_enable_intr(adapter);
1523	return;
1524}
1525
1526
1527/*********************************************************************
1528 *
1529 *  MSIX Interrupt Service Routines
1530 *
1531 **********************************************************************/
1532static void
1533em_msix_tx(void *arg)
1534{
1535	struct tx_ring *txr = arg;
1536	struct adapter *adapter = txr->adapter;
1537	struct ifnet	*ifp = adapter->ifp;
1538
1539	++txr->tx_irq;
1540	EM_TX_LOCK(txr);
1541	em_txeof(txr);
1542#ifdef EM_MULTIQUEUE
1543	if (!drbr_empty(ifp, txr->br))
1544		em_mq_start_locked(ifp, txr, NULL);
1545#else
1546	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547		em_start_locked(ifp, txr);
1548#endif
1549	/* Reenable this interrupt */
1550	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1551	EM_TX_UNLOCK(txr);
1552	return;
1553}
1554
1555/*********************************************************************
1556 *
1557 *  MSIX RX Interrupt Service routine
1558 *
1559 **********************************************************************/
1560
1561static void
1562em_msix_rx(void *arg)
1563{
1564	struct rx_ring	*rxr = arg;
1565	struct adapter	*adapter = rxr->adapter;
1566	bool		more;
1567
1568	++rxr->rx_irq;
1569	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1570	if (more)
1571		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1572	else
1573		/* Reenable this interrupt */
1574		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1575	return;
1576}
1577
1578/*********************************************************************
1579 *
1580 *  MSIX Link Fast Interrupt Service routine
1581 *
1582 **********************************************************************/
1583static void
1584em_msix_link(void *arg)
1585{
1586	struct adapter	*adapter = arg;
1587	u32		reg_icr;
1588
1589	++adapter->link_irq;
1590	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1591
1592	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593		adapter->hw.mac.get_link_status = 1;
1594		em_handle_link(adapter, 0);
1595	} else
1596		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597		    EM_MSIX_LINK | E1000_IMS_LSC);
1598	return;
1599}
1600
1601static void
1602em_handle_rx(void *context, int pending)
1603{
1604	struct rx_ring	*rxr = context;
1605	struct adapter	*adapter = rxr->adapter;
1606        bool            more;
1607
1608	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1609	if (more)
1610		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1611	else
1612		/* Reenable this interrupt */
1613		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1614}
1615
1616static void
1617em_handle_tx(void *context, int pending)
1618{
1619	struct tx_ring	*txr = context;
1620	struct adapter	*adapter = txr->adapter;
1621	struct ifnet	*ifp = adapter->ifp;
1622
1623	EM_TX_LOCK(txr);
1624	em_txeof(txr);
1625#ifdef EM_MULTIQUEUE
1626	if (!drbr_empty(ifp, txr->br))
1627		em_mq_start_locked(ifp, txr, NULL);
1628#else
1629	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1630		em_start_locked(ifp, txr);
1631#endif
1632	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1633	EM_TX_UNLOCK(txr);
1634}
1635
1636static void
1637em_handle_link(void *context, int pending)
1638{
1639	struct adapter	*adapter = context;
1640	struct tx_ring	*txr = adapter->tx_rings;
1641	struct ifnet *ifp = adapter->ifp;
1642
1643	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1644		return;
1645
1646	EM_CORE_LOCK(adapter);
1647	callout_stop(&adapter->timer);
1648	em_update_link_status(adapter);
1649	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1650	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1651	    EM_MSIX_LINK | E1000_IMS_LSC);
1652	if (adapter->link_active) {
1653		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1654			EM_TX_LOCK(txr);
1655#ifdef EM_MULTIQUEUE
1656			if (!drbr_empty(ifp, txr->br))
1657				em_mq_start_locked(ifp, txr, NULL);
1658#else
1659			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1660				em_start_locked(ifp, txr);
1661#endif
1662			EM_TX_UNLOCK(txr);
1663		}
1664	}
1665	EM_CORE_UNLOCK(adapter);
1666}
1667
1668
1669/*********************************************************************
1670 *
1671 *  Media Ioctl callback
1672 *
1673 *  This routine is called whenever the user queries the status of
1674 *  the interface using ifconfig.
1675 *
1676 **********************************************************************/
1677static void
1678em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679{
1680	struct adapter *adapter = ifp->if_softc;
1681	u_char fiber_type = IFM_1000_SX;
1682
1683	INIT_DEBUGOUT("em_media_status: begin");
1684
1685	EM_CORE_LOCK(adapter);
1686	em_update_link_status(adapter);
1687
1688	ifmr->ifm_status = IFM_AVALID;
1689	ifmr->ifm_active = IFM_ETHER;
1690
1691	if (!adapter->link_active) {
1692		EM_CORE_UNLOCK(adapter);
1693		return;
1694	}
1695
1696	ifmr->ifm_status |= IFM_ACTIVE;
1697
1698	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1699	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1700		ifmr->ifm_active |= fiber_type | IFM_FDX;
1701	} else {
1702		switch (adapter->link_speed) {
1703		case 10:
1704			ifmr->ifm_active |= IFM_10_T;
1705			break;
1706		case 100:
1707			ifmr->ifm_active |= IFM_100_TX;
1708			break;
1709		case 1000:
1710			ifmr->ifm_active |= IFM_1000_T;
1711			break;
1712		}
1713		if (adapter->link_duplex == FULL_DUPLEX)
1714			ifmr->ifm_active |= IFM_FDX;
1715		else
1716			ifmr->ifm_active |= IFM_HDX;
1717	}
1718	EM_CORE_UNLOCK(adapter);
1719}
1720
1721/*********************************************************************
1722 *
1723 *  Media Ioctl callback
1724 *
1725 *  This routine is called when the user changes speed/duplex using
1726 *  media/mediopt option with ifconfig.
1727 *
1728 **********************************************************************/
1729static int
1730em_media_change(struct ifnet *ifp)
1731{
1732	struct adapter *adapter = ifp->if_softc;
1733	struct ifmedia  *ifm = &adapter->media;
1734
1735	INIT_DEBUGOUT("em_media_change: begin");
1736
1737	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1738		return (EINVAL);
1739
1740	EM_CORE_LOCK(adapter);
1741	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1742	case IFM_AUTO:
1743		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1744		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1745		break;
1746	case IFM_1000_LX:
1747	case IFM_1000_SX:
1748	case IFM_1000_T:
1749		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1751		break;
1752	case IFM_100_TX:
1753		adapter->hw.mac.autoneg = FALSE;
1754		adapter->hw.phy.autoneg_advertised = 0;
1755		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1756			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1757		else
1758			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1759		break;
1760	case IFM_10_T:
1761		adapter->hw.mac.autoneg = FALSE;
1762		adapter->hw.phy.autoneg_advertised = 0;
1763		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1765		else
1766			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1767		break;
1768	default:
1769		device_printf(adapter->dev, "Unsupported media type\n");
1770	}
1771
1772	em_init_locked(adapter);
1773	EM_CORE_UNLOCK(adapter);
1774
1775	return (0);
1776}
1777
1778/*********************************************************************
1779 *
1780 *  This routine maps the mbufs to tx descriptors.
1781 *
1782 *  return 0 on success, positive on failure
1783 **********************************************************************/
1784
1785static int
1786em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1787{
1788	struct adapter		*adapter = txr->adapter;
1789	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1790	bus_dmamap_t		map;
1791	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1792	struct e1000_tx_desc	*ctxd = NULL;
1793	struct mbuf		*m_head;
1794	struct ether_header	*eh;
1795	struct ip		*ip = NULL;
1796	struct tcphdr		*tp = NULL;
1797	u32			txd_upper, txd_lower, txd_used, txd_saved;
1798	int			ip_off, poff;
1799	int			nsegs, i, j, first, last = 0;
1800	int			error, do_tso, tso_desc = 0, remap = 1;
1801
1802retry:
1803	m_head = *m_headp;
1804	txd_upper = txd_lower = txd_used = txd_saved = 0;
1805	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1806	ip_off = poff = 0;
1807
1808	/*
1809	 * Intel recommends entire IP/TCP header length reside in a single
1810	 * buffer. If multiple descriptors are used to describe the IP and
1811	 * TCP header, each descriptor should describe one or more
1812	 * complete headers; descriptors referencing only parts of headers
1813	 * are not supported. If all layer headers are not coalesced into
1814	 * a single buffer, each buffer should not cross a 4KB boundary,
1815	 * or be larger than the maximum read request size.
1816	 * Controller also requires modifing IP/TCP header to make TSO work
1817	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1818	 * IP/TCP header into a single buffer to meet the requirement of
1819	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1820	 * which also has similiar restrictions.
1821	 */
1822	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1823		if (do_tso || (m_head->m_next != NULL &&
1824		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1825			if (M_WRITABLE(*m_headp) == 0) {
1826				m_head = m_dup(*m_headp, M_DONTWAIT);
1827				m_freem(*m_headp);
1828				if (m_head == NULL) {
1829					*m_headp = NULL;
1830					return (ENOBUFS);
1831				}
1832				*m_headp = m_head;
1833			}
1834		}
1835		/*
1836		 * XXX
1837		 * Assume IPv4, we don't have TSO/checksum offload support
1838		 * for IPv6 yet.
1839		 */
1840		ip_off = sizeof(struct ether_header);
1841		m_head = m_pullup(m_head, ip_off);
1842		if (m_head == NULL) {
1843			*m_headp = NULL;
1844			return (ENOBUFS);
1845		}
1846		eh = mtod(m_head, struct ether_header *);
1847		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1848			ip_off = sizeof(struct ether_vlan_header);
1849			m_head = m_pullup(m_head, ip_off);
1850			if (m_head == NULL) {
1851				*m_headp = NULL;
1852				return (ENOBUFS);
1853			}
1854		}
1855		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1856		if (m_head == NULL) {
1857			*m_headp = NULL;
1858			return (ENOBUFS);
1859		}
1860		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1861		poff = ip_off + (ip->ip_hl << 2);
1862		if (do_tso) {
1863			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1864			if (m_head == NULL) {
1865				*m_headp = NULL;
1866				return (ENOBUFS);
1867			}
1868			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1869			/*
1870			 * TSO workaround:
1871			 *   pull 4 more bytes of data into it.
1872			 */
1873			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879			ip->ip_len = 0;
1880			ip->ip_sum = 0;
1881			/*
1882			 * The pseudo TCP checksum does not include TCP payload
1883			 * length so driver should recompute the checksum here
1884			 * what hardware expect to see. This is adherence of
1885			 * Microsoft's Large Send specification.
1886			 */
1887			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1888			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1889			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1890		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1891			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1892			if (m_head == NULL) {
1893				*m_headp = NULL;
1894				return (ENOBUFS);
1895			}
1896			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1905			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1906			if (m_head == NULL) {
1907				*m_headp = NULL;
1908				return (ENOBUFS);
1909			}
1910			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911		}
1912		*m_headp = m_head;
1913	}
1914
1915	/*
1916	 * Map the packet for DMA
1917	 *
1918	 * Capture the first descriptor index,
1919	 * this descriptor will have the index
1920	 * of the EOP which is the only one that
1921	 * now gets a DONE bit writeback.
1922	 */
1923	first = txr->next_avail_desc;
1924	tx_buffer = &txr->tx_buffers[first];
1925	tx_buffer_mapped = tx_buffer;
1926	map = tx_buffer->map;
1927
1928	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1929	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1930
1931	/*
1932	 * There are two types of errors we can (try) to handle:
1933	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1934	 *   out of segments.  Defragment the mbuf chain and try again.
1935	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1936	 *   at this point in time.  Defer sending and try again later.
1937	 * All other errors, in particular EINVAL, are fatal and prevent the
1938	 * mbuf chain from ever going through.  Drop it and report error.
1939	 */
1940	if (error == EFBIG && remap) {
1941		struct mbuf *m;
1942
1943		m = m_defrag(*m_headp, M_DONTWAIT);
1944		if (m == NULL) {
1945			adapter->mbuf_alloc_failed++;
1946			m_freem(*m_headp);
1947			*m_headp = NULL;
1948			return (ENOBUFS);
1949		}
1950		*m_headp = m;
1951
1952		/* Try it again, but only once */
1953		remap = 0;
1954		goto retry;
1955	} else if (error == ENOMEM) {
1956		adapter->no_tx_dma_setup++;
1957		return (error);
1958	} else if (error != 0) {
1959		adapter->no_tx_dma_setup++;
1960		m_freem(*m_headp);
1961		*m_headp = NULL;
1962		return (error);
1963	}
1964
1965	/*
1966	 * TSO Hardware workaround, if this packet is not
1967	 * TSO, and is only a single descriptor long, and
1968	 * it follows a TSO burst, then we need to add a
1969	 * sentinel descriptor to prevent premature writeback.
1970	 */
1971	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1972		if (nsegs == 1)
1973			tso_desc = TRUE;
1974		txr->tx_tso = FALSE;
1975	}
1976
1977        if (nsegs > (txr->tx_avail - 2)) {
1978                txr->no_desc_avail++;
1979		bus_dmamap_unload(txr->txtag, map);
1980		return (ENOBUFS);
1981        }
1982	m_head = *m_headp;
1983
1984	/* Do hardware assists */
1985	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1986		em_tso_setup(txr, m_head, ip_off, ip, tp,
1987		    &txd_upper, &txd_lower);
1988		/* we need to make a final sentinel transmit desc */
1989		tso_desc = TRUE;
1990	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1991		em_transmit_checksum_setup(txr, m_head,
1992		    ip_off, ip, &txd_upper, &txd_lower);
1993
1994	if (m_head->m_flags & M_VLANTAG) {
1995		/* Set the vlan id. */
1996		txd_upper |=
1997		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1998                /* Tell hardware to add tag */
1999                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2000        }
2001
2002	i = txr->next_avail_desc;
2003
2004	/* Set up our transmit descriptors */
2005	for (j = 0; j < nsegs; j++) {
2006		bus_size_t seg_len;
2007		bus_addr_t seg_addr;
2008
2009		tx_buffer = &txr->tx_buffers[i];
2010		ctxd = &txr->tx_base[i];
2011		seg_addr = segs[j].ds_addr;
2012		seg_len  = segs[j].ds_len;
2013		/*
2014		** TSO Workaround:
2015		** If this is the last descriptor, we want to
2016		** split it so we have a small final sentinel
2017		*/
2018		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2019			seg_len -= 4;
2020			ctxd->buffer_addr = htole64(seg_addr);
2021			ctxd->lower.data = htole32(
2022			adapter->txd_cmd | txd_lower | seg_len);
2023			ctxd->upper.data =
2024			    htole32(txd_upper);
2025			if (++i == adapter->num_tx_desc)
2026				i = 0;
2027			/* Now make the sentinel */
2028			++txd_used; /* using an extra txd */
2029			ctxd = &txr->tx_base[i];
2030			tx_buffer = &txr->tx_buffers[i];
2031			ctxd->buffer_addr =
2032			    htole64(seg_addr + seg_len);
2033			ctxd->lower.data = htole32(
2034			adapter->txd_cmd | txd_lower | 4);
2035			ctxd->upper.data =
2036			    htole32(txd_upper);
2037			last = i;
2038			if (++i == adapter->num_tx_desc)
2039				i = 0;
2040		} else {
2041			ctxd->buffer_addr = htole64(seg_addr);
2042			ctxd->lower.data = htole32(
2043			adapter->txd_cmd | txd_lower | seg_len);
2044			ctxd->upper.data =
2045			    htole32(txd_upper);
2046			last = i;
2047			if (++i == adapter->num_tx_desc)
2048				i = 0;
2049		}
2050		tx_buffer->m_head = NULL;
2051		tx_buffer->next_eop = -1;
2052	}
2053
2054	txr->next_avail_desc = i;
2055	txr->tx_avail -= nsegs;
2056	if (tso_desc) /* TSO used an extra for sentinel */
2057		txr->tx_avail -= txd_used;
2058
2059        tx_buffer->m_head = m_head;
2060	/*
2061	** Here we swap the map so the last descriptor,
2062	** which gets the completion interrupt has the
2063	** real map, and the first descriptor gets the
2064	** unused map from this descriptor.
2065	*/
2066	tx_buffer_mapped->map = tx_buffer->map;
2067	tx_buffer->map = map;
2068        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2069
2070        /*
2071         * Last Descriptor of Packet
2072	 * needs End Of Packet (EOP)
2073	 * and Report Status (RS)
2074         */
2075        ctxd->lower.data |=
2076	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2077	/*
2078	 * Keep track in the first buffer which
2079	 * descriptor will be written back
2080	 */
2081	tx_buffer = &txr->tx_buffers[first];
2082	tx_buffer->next_eop = last;
2083	/* Update the watchdog time early and often */
2084	txr->watchdog_time = ticks;
2085
2086	/*
2087	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2088	 * that this frame is available to transmit.
2089	 */
2090	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2091	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2092	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2093
2094	return (0);
2095}
2096
2097static void
2098em_set_promisc(struct adapter *adapter)
2099{
2100	struct ifnet	*ifp = adapter->ifp;
2101	u32		reg_rctl;
2102
2103	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2104
2105	if (ifp->if_flags & IFF_PROMISC) {
2106		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2107		/* Turn this on if you want to see bad packets */
2108		if (em_debug_sbp)
2109			reg_rctl |= E1000_RCTL_SBP;
2110		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2111	} else if (ifp->if_flags & IFF_ALLMULTI) {
2112		reg_rctl |= E1000_RCTL_MPE;
2113		reg_rctl &= ~E1000_RCTL_UPE;
2114		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115	}
2116}
2117
2118static void
2119em_disable_promisc(struct adapter *adapter)
2120{
2121	u32	reg_rctl;
2122
2123	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124
2125	reg_rctl &=  (~E1000_RCTL_UPE);
2126	reg_rctl &=  (~E1000_RCTL_MPE);
2127	reg_rctl &=  (~E1000_RCTL_SBP);
2128	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129}
2130
2131
2132/*********************************************************************
2133 *  Multicast Update
2134 *
2135 *  This routine is called whenever multicast address list is updated.
2136 *
2137 **********************************************************************/
2138
2139static void
2140em_set_multi(struct adapter *adapter)
2141{
2142	struct ifnet	*ifp = adapter->ifp;
2143	struct ifmultiaddr *ifma;
2144	u32 reg_rctl = 0;
2145	u8  *mta; /* Multicast array memory */
2146	int mcnt = 0;
2147
2148	IOCTL_DEBUGOUT("em_set_multi: begin");
2149
2150	mta = adapter->mta;
2151	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2152
2153	if (adapter->hw.mac.type == e1000_82542 &&
2154	    adapter->hw.revision_id == E1000_REVISION_2) {
2155		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2156		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2157			e1000_pci_clear_mwi(&adapter->hw);
2158		reg_rctl |= E1000_RCTL_RST;
2159		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2160		msec_delay(5);
2161	}
2162
2163#if __FreeBSD_version < 800000
2164	IF_ADDR_LOCK(ifp);
2165#else
2166	if_maddr_rlock(ifp);
2167#endif
2168	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2169		if (ifma->ifma_addr->sa_family != AF_LINK)
2170			continue;
2171
2172		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2173			break;
2174
2175		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2176		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2177		mcnt++;
2178	}
2179#if __FreeBSD_version < 800000
2180	IF_ADDR_UNLOCK(ifp);
2181#else
2182	if_maddr_runlock(ifp);
2183#endif
2184	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2185		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2186		reg_rctl |= E1000_RCTL_MPE;
2187		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188	} else
2189		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2190
2191	if (adapter->hw.mac.type == e1000_82542 &&
2192	    adapter->hw.revision_id == E1000_REVISION_2) {
2193		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194		reg_rctl &= ~E1000_RCTL_RST;
2195		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196		msec_delay(5);
2197		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2198			e1000_pci_set_mwi(&adapter->hw);
2199	}
2200}
2201
2202
2203/*********************************************************************
2204 *  Timer routine
2205 *
2206 *  This routine checks for link status and updates statistics.
2207 *
2208 **********************************************************************/
2209
2210static void
2211em_local_timer(void *arg)
2212{
2213	struct adapter	*adapter = arg;
2214	struct ifnet	*ifp = adapter->ifp;
2215	struct tx_ring	*txr = adapter->tx_rings;
2216	struct rx_ring	*rxr = adapter->rx_rings;
2217	u32		trigger;
2218
2219	EM_CORE_LOCK_ASSERT(adapter);
2220
2221	em_update_link_status(adapter);
2222	em_update_stats_counters(adapter);
2223
2224	/* Reset LAA into RAR[0] on 82571 */
2225	if ((adapter->hw.mac.type == e1000_82571) &&
2226	    e1000_get_laa_state_82571(&adapter->hw))
2227		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2228
2229	/* Mask to use in the irq trigger */
2230	if (adapter->msix_mem)
2231		trigger = rxr->ims; /* RX for 82574 */
2232	else
2233		trigger = E1000_ICS_RXDMT0;
2234
2235	/*
2236	** Check on the state of the TX queue(s), this
2237	** can be done without the lock because its RO
2238	** and the HUNG state will be static if set.
2239	*/
2240	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2241		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2242		    (adapter->pause_frames == 0))
2243			goto hung;
2244		/* Schedule a TX tasklet if needed */
2245		if (txr->tx_avail <= EM_MAX_SCATTER)
2246			taskqueue_enqueue(txr->tq, &txr->tx_task);
2247	}
2248
2249	adapter->pause_frames = 0;
2250	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2251#ifndef DEVICE_POLLING
2252	/* Trigger an RX interrupt to guarantee mbuf refresh */
2253	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2254#endif
2255	return;
2256hung:
2257	/* Looks like we're hung */
2258	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2259	device_printf(adapter->dev,
2260	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2261	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2262	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2263	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2264	    "Next TX to Clean = %d\n",
2265	    txr->me, txr->tx_avail, txr->next_to_clean);
2266	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2267	adapter->watchdog_events++;
2268	adapter->pause_frames = 0;
2269	em_init_locked(adapter);
2270}
2271
2272
2273static void
2274em_update_link_status(struct adapter *adapter)
2275{
2276	struct e1000_hw *hw = &adapter->hw;
2277	struct ifnet *ifp = adapter->ifp;
2278	device_t dev = adapter->dev;
2279	struct tx_ring *txr = adapter->tx_rings;
2280	u32 link_check = 0;
2281
2282	/* Get the cached link value or read phy for real */
2283	switch (hw->phy.media_type) {
2284	case e1000_media_type_copper:
2285		if (hw->mac.get_link_status) {
2286			/* Do the work to read phy */
2287			e1000_check_for_link(hw);
2288			link_check = !hw->mac.get_link_status;
2289			if (link_check) /* ESB2 fix */
2290				e1000_cfg_on_link_up(hw);
2291		} else
2292			link_check = TRUE;
2293		break;
2294	case e1000_media_type_fiber:
2295		e1000_check_for_link(hw);
2296		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2297                                 E1000_STATUS_LU);
2298		break;
2299	case e1000_media_type_internal_serdes:
2300		e1000_check_for_link(hw);
2301		link_check = adapter->hw.mac.serdes_has_link;
2302		break;
2303	default:
2304	case e1000_media_type_unknown:
2305		break;
2306	}
2307
2308	/* Now check for a transition */
2309	if (link_check && (adapter->link_active == 0)) {
2310		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2311		    &adapter->link_duplex);
2312		/* Check if we must disable SPEED_MODE bit on PCI-E */
2313		if ((adapter->link_speed != SPEED_1000) &&
2314		    ((hw->mac.type == e1000_82571) ||
2315		    (hw->mac.type == e1000_82572))) {
2316			int tarc0;
2317			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2318			tarc0 &= ~SPEED_MODE_BIT;
2319			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2320		}
2321		if (bootverbose)
2322			device_printf(dev, "Link is up %d Mbps %s\n",
2323			    adapter->link_speed,
2324			    ((adapter->link_duplex == FULL_DUPLEX) ?
2325			    "Full Duplex" : "Half Duplex"));
2326		adapter->link_active = 1;
2327		adapter->smartspeed = 0;
2328		ifp->if_baudrate = adapter->link_speed * 1000000;
2329		if_link_state_change(ifp, LINK_STATE_UP);
2330	} else if (!link_check && (adapter->link_active == 1)) {
2331		ifp->if_baudrate = adapter->link_speed = 0;
2332		adapter->link_duplex = 0;
2333		if (bootverbose)
2334			device_printf(dev, "Link is Down\n");
2335		adapter->link_active = 0;
2336		/* Link down, disable watchdog */
2337		for (int i = 0; i < adapter->num_queues; i++, txr++)
2338			txr->queue_status = EM_QUEUE_IDLE;
2339		if_link_state_change(ifp, LINK_STATE_DOWN);
2340	}
2341}
2342
2343/*********************************************************************
2344 *
2345 *  This routine disables all traffic on the adapter by issuing a
2346 *  global reset on the MAC and deallocates TX/RX buffers.
2347 *
2348 *  This routine should always be called with BOTH the CORE
2349 *  and TX locks.
2350 **********************************************************************/
2351
2352static void
2353em_stop(void *arg)
2354{
2355	struct adapter	*adapter = arg;
2356	struct ifnet	*ifp = adapter->ifp;
2357	struct tx_ring	*txr = adapter->tx_rings;
2358
2359	EM_CORE_LOCK_ASSERT(adapter);
2360
2361	INIT_DEBUGOUT("em_stop: begin");
2362
2363	em_disable_intr(adapter);
2364	callout_stop(&adapter->timer);
2365
2366	/* Tell the stack that the interface is no longer active */
2367	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2368	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2369
2370        /* Unarm watchdog timer. */
2371	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2372		EM_TX_LOCK(txr);
2373		txr->queue_status = EM_QUEUE_IDLE;
2374		EM_TX_UNLOCK(txr);
2375	}
2376
2377	e1000_reset_hw(&adapter->hw);
2378	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2379
2380	e1000_led_off(&adapter->hw);
2381	e1000_cleanup_led(&adapter->hw);
2382}
2383
2384
2385/*********************************************************************
2386 *
2387 *  Determine hardware revision.
2388 *
2389 **********************************************************************/
2390static void
2391em_identify_hardware(struct adapter *adapter)
2392{
2393	device_t dev = adapter->dev;
2394
2395	/* Make sure our PCI config space has the necessary stuff set */
2396	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2397	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2398	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2399		device_printf(dev, "Memory Access and/or Bus Master bits "
2400		    "were not set!\n");
2401		adapter->hw.bus.pci_cmd_word |=
2402		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2403		pci_write_config(dev, PCIR_COMMAND,
2404		    adapter->hw.bus.pci_cmd_word, 2);
2405	}
2406
2407	/* Save off the information about this board */
2408	adapter->hw.vendor_id = pci_get_vendor(dev);
2409	adapter->hw.device_id = pci_get_device(dev);
2410	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2411	adapter->hw.subsystem_vendor_id =
2412	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2413	adapter->hw.subsystem_device_id =
2414	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2415
2416	/* Do Shared Code Init and Setup */
2417	if (e1000_set_mac_type(&adapter->hw)) {
2418		device_printf(dev, "Setup init failure\n");
2419		return;
2420	}
2421}
2422
2423static int
2424em_allocate_pci_resources(struct adapter *adapter)
2425{
2426	device_t	dev = adapter->dev;
2427	int		rid;
2428
2429	rid = PCIR_BAR(0);
2430	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2431	    &rid, RF_ACTIVE);
2432	if (adapter->memory == NULL) {
2433		device_printf(dev, "Unable to allocate bus resource: memory\n");
2434		return (ENXIO);
2435	}
2436	adapter->osdep.mem_bus_space_tag =
2437	    rman_get_bustag(adapter->memory);
2438	adapter->osdep.mem_bus_space_handle =
2439	    rman_get_bushandle(adapter->memory);
2440	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2441
2442	/* Default to a single queue */
2443	adapter->num_queues = 1;
2444
2445	/*
2446	 * Setup MSI/X or MSI if PCI Express
2447	 */
2448	adapter->msix = em_setup_msix(adapter);
2449
2450	adapter->hw.back = &adapter->osdep;
2451
2452	return (0);
2453}
2454
2455/*********************************************************************
2456 *
2457 *  Setup the Legacy or MSI Interrupt handler
2458 *
2459 **********************************************************************/
2460int
2461em_allocate_legacy(struct adapter *adapter)
2462{
2463	device_t dev = adapter->dev;
2464	struct tx_ring	*txr = adapter->tx_rings;
2465	int error, rid = 0;
2466
2467	/* Manually turn off all interrupts */
2468	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2469
2470	if (adapter->msix == 1) /* using MSI */
2471		rid = 1;
2472	/* We allocate a single interrupt resource */
2473	adapter->res = bus_alloc_resource_any(dev,
2474	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2475	if (adapter->res == NULL) {
2476		device_printf(dev, "Unable to allocate bus resource: "
2477		    "interrupt\n");
2478		return (ENXIO);
2479	}
2480
2481	/*
2482	 * Allocate a fast interrupt and the associated
2483	 * deferred processing contexts.
2484	 */
2485	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2486	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2487	    taskqueue_thread_enqueue, &adapter->tq);
2488	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2489	    device_get_nameunit(adapter->dev));
2490	/* Use a TX only tasklet for local timer */
2491	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2492	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2493	    taskqueue_thread_enqueue, &txr->tq);
2494	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2495	    device_get_nameunit(adapter->dev));
2496	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2497	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2498	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2499		device_printf(dev, "Failed to register fast interrupt "
2500			    "handler: %d\n", error);
2501		taskqueue_free(adapter->tq);
2502		adapter->tq = NULL;
2503		return (error);
2504	}
2505
2506	return (0);
2507}
2508
2509/*********************************************************************
2510 *
2511 *  Setup the MSIX Interrupt handlers
2512 *   This is not really Multiqueue, rather
2513 *   its just seperate interrupt vectors
2514 *   for TX, RX, and Link.
2515 *
2516 **********************************************************************/
2517int
2518em_allocate_msix(struct adapter *adapter)
2519{
2520	device_t	dev = adapter->dev;
2521	struct		tx_ring *txr = adapter->tx_rings;
2522	struct		rx_ring *rxr = adapter->rx_rings;
2523	int		error, rid, vector = 0;
2524
2525
2526	/* Make sure all interrupts are disabled */
2527	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2528
2529	/* First set up ring resources */
2530	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2531
2532		/* RX ring */
2533		rid = vector + 1;
2534
2535		rxr->res = bus_alloc_resource_any(dev,
2536		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2537		if (rxr->res == NULL) {
2538			device_printf(dev,
2539			    "Unable to allocate bus resource: "
2540			    "RX MSIX Interrupt %d\n", i);
2541			return (ENXIO);
2542		}
2543		if ((error = bus_setup_intr(dev, rxr->res,
2544		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2545		    rxr, &rxr->tag)) != 0) {
2546			device_printf(dev, "Failed to register RX handler");
2547			return (error);
2548		}
2549#if __FreeBSD_version >= 800504
2550		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2551#endif
2552		rxr->msix = vector++; /* NOTE increment vector for TX */
2553		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2554		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2555		    taskqueue_thread_enqueue, &rxr->tq);
2556		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2557		    device_get_nameunit(adapter->dev));
2558		/*
2559		** Set the bit to enable interrupt
2560		** in E1000_IMS -- bits 20 and 21
2561		** are for RX0 and RX1, note this has
2562		** NOTHING to do with the MSIX vector
2563		*/
2564		rxr->ims = 1 << (20 + i);
2565		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2566
2567		/* TX ring */
2568		rid = vector + 1;
2569		txr->res = bus_alloc_resource_any(dev,
2570		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2571		if (txr->res == NULL) {
2572			device_printf(dev,
2573			    "Unable to allocate bus resource: "
2574			    "TX MSIX Interrupt %d\n", i);
2575			return (ENXIO);
2576		}
2577		if ((error = bus_setup_intr(dev, txr->res,
2578		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2579		    txr, &txr->tag)) != 0) {
2580			device_printf(dev, "Failed to register TX handler");
2581			return (error);
2582		}
2583#if __FreeBSD_version >= 800504
2584		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2585#endif
2586		txr->msix = vector++; /* Increment vector for next pass */
2587		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2588		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2589		    taskqueue_thread_enqueue, &txr->tq);
2590		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2591		    device_get_nameunit(adapter->dev));
2592		/*
2593		** Set the bit to enable interrupt
2594		** in E1000_IMS -- bits 22 and 23
2595		** are for TX0 and TX1, note this has
2596		** NOTHING to do with the MSIX vector
2597		*/
2598		txr->ims = 1 << (22 + i);
2599		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2600	}
2601
2602	/* Link interrupt */
2603	++rid;
2604	adapter->res = bus_alloc_resource_any(dev,
2605	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2606	if (!adapter->res) {
2607		device_printf(dev,"Unable to allocate "
2608		    "bus resource: Link interrupt [%d]\n", rid);
2609		return (ENXIO);
2610        }
2611	/* Set the link handler function */
2612	error = bus_setup_intr(dev, adapter->res,
2613	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2614	    em_msix_link, adapter, &adapter->tag);
2615	if (error) {
2616		adapter->res = NULL;
2617		device_printf(dev, "Failed to register LINK handler");
2618		return (error);
2619	}
2620#if __FreeBSD_version >= 800504
2621		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2622#endif
2623	adapter->linkvec = vector;
2624	adapter->ivars |=  (8 | vector) << 16;
2625	adapter->ivars |= 0x80000000;
2626
2627	return (0);
2628}
2629
2630
2631static void
2632em_free_pci_resources(struct adapter *adapter)
2633{
2634	device_t	dev = adapter->dev;
2635	struct tx_ring	*txr;
2636	struct rx_ring	*rxr;
2637	int		rid;
2638
2639
2640	/*
2641	** Release all the queue interrupt resources:
2642	*/
2643	for (int i = 0; i < adapter->num_queues; i++) {
2644		txr = &adapter->tx_rings[i];
2645		rxr = &adapter->rx_rings[i];
2646		/* an early abort? */
2647		if ((txr == NULL) || (rxr == NULL))
2648			break;
2649		rid = txr->msix +1;
2650		if (txr->tag != NULL) {
2651			bus_teardown_intr(dev, txr->res, txr->tag);
2652			txr->tag = NULL;
2653		}
2654		if (txr->res != NULL)
2655			bus_release_resource(dev, SYS_RES_IRQ,
2656			    rid, txr->res);
2657		rid = rxr->msix +1;
2658		if (rxr->tag != NULL) {
2659			bus_teardown_intr(dev, rxr->res, rxr->tag);
2660			rxr->tag = NULL;
2661		}
2662		if (rxr->res != NULL)
2663			bus_release_resource(dev, SYS_RES_IRQ,
2664			    rid, rxr->res);
2665	}
2666
2667        if (adapter->linkvec) /* we are doing MSIX */
2668                rid = adapter->linkvec + 1;
2669        else
2670                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2671
2672	if (adapter->tag != NULL) {
2673		bus_teardown_intr(dev, adapter->res, adapter->tag);
2674		adapter->tag = NULL;
2675	}
2676
2677	if (adapter->res != NULL)
2678		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2679
2680
2681	if (adapter->msix)
2682		pci_release_msi(dev);
2683
2684	if (adapter->msix_mem != NULL)
2685		bus_release_resource(dev, SYS_RES_MEMORY,
2686		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2687
2688	if (adapter->memory != NULL)
2689		bus_release_resource(dev, SYS_RES_MEMORY,
2690		    PCIR_BAR(0), adapter->memory);
2691
2692	if (adapter->flash != NULL)
2693		bus_release_resource(dev, SYS_RES_MEMORY,
2694		    EM_FLASH, adapter->flash);
2695}
2696
2697/*
2698 * Setup MSI or MSI/X
2699 */
2700static int
2701em_setup_msix(struct adapter *adapter)
2702{
2703	device_t dev = adapter->dev;
2704	int val = 0;
2705
2706	/*
2707	** Setup MSI/X for Hartwell: tests have shown
2708	** use of two queues to be unstable, and to
2709	** provide no great gain anyway, so we simply
2710	** seperate the interrupts and use a single queue.
2711	*/
2712	if ((adapter->hw.mac.type == e1000_82574) &&
2713	    (em_enable_msix == TRUE)) {
2714		/* Map the MSIX BAR */
2715		int rid = PCIR_BAR(EM_MSIX_BAR);
2716		adapter->msix_mem = bus_alloc_resource_any(dev,
2717		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2718       		if (!adapter->msix_mem) {
2719			/* May not be enabled */
2720               		device_printf(adapter->dev,
2721			    "Unable to map MSIX table \n");
2722			goto msi;
2723       		}
2724		val = pci_msix_count(dev);
2725		/* We only need 3 vectors */
2726		if (val > 3)
2727			val = 3;
2728		if ((val != 3) && (val != 5)) {
2729			bus_release_resource(dev, SYS_RES_MEMORY,
2730			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2731			adapter->msix_mem = NULL;
2732               		device_printf(adapter->dev,
2733			    "MSIX: incorrect vectors, using MSI\n");
2734			goto msi;
2735		}
2736
2737		if (pci_alloc_msix(dev, &val) == 0) {
2738			device_printf(adapter->dev,
2739			    "Using MSIX interrupts "
2740			    "with %d vectors\n", val);
2741		}
2742
2743		return (val);
2744	}
2745msi:
2746       	val = pci_msi_count(dev);
2747       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2748               	adapter->msix = 1;
2749               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2750		return (val);
2751	}
2752	/* Should only happen due to manual configuration */
2753	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2754	return (0);
2755}
2756
2757
2758/*********************************************************************
2759 *
2760 *  Initialize the hardware to a configuration
2761 *  as specified by the adapter structure.
2762 *
2763 **********************************************************************/
2764static void
2765em_reset(struct adapter *adapter)
2766{
2767	device_t	dev = adapter->dev;
2768	struct ifnet	*ifp = adapter->ifp;
2769	struct e1000_hw	*hw = &adapter->hw;
2770	u16		rx_buffer_size;
2771	u32		pba;
2772
2773	INIT_DEBUGOUT("em_reset: begin");
2774
2775	/* Set up smart power down as default off on newer adapters. */
2776	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2777	    hw->mac.type == e1000_82572)) {
2778		u16 phy_tmp = 0;
2779
2780		/* Speed up time to link by disabling smart power down. */
2781		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2782		phy_tmp &= ~IGP02E1000_PM_SPD;
2783		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2784	}
2785
2786	/*
2787	 * Packet Buffer Allocation (PBA)
2788	 * Writing PBA sets the receive portion of the buffer
2789	 * the remainder is used for the transmit buffer.
2790	 */
2791	switch (hw->mac.type) {
2792	/* Total Packet Buffer on these is 48K */
2793	case e1000_82571:
2794	case e1000_82572:
2795	case e1000_80003es2lan:
2796			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2797		break;
2798	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2799			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2800		break;
2801	case e1000_82574:
2802	case e1000_82583:
2803			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2804		break;
2805	case e1000_ich8lan:
2806		pba = E1000_PBA_8K;
2807		break;
2808	case e1000_ich9lan:
2809	case e1000_ich10lan:
2810		/* Boost Receive side for jumbo frames */
2811		if (adapter->max_frame_size > 4096)
2812			pba = E1000_PBA_14K;
2813		else
2814			pba = E1000_PBA_10K;
2815		break;
2816	case e1000_pchlan:
2817	case e1000_pch2lan:
2818		pba = E1000_PBA_26K;
2819		break;
2820	default:
2821		if (adapter->max_frame_size > 8192)
2822			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2823		else
2824			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2825	}
2826	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2827
2828	/*
2829	 * These parameters control the automatic generation (Tx) and
2830	 * response (Rx) to Ethernet PAUSE frames.
2831	 * - High water mark should allow for at least two frames to be
2832	 *   received after sending an XOFF.
2833	 * - Low water mark works best when it is very near the high water mark.
2834	 *   This allows the receiver to restart by sending XON when it has
2835	 *   drained a bit. Here we use an arbitary value of 1500 which will
2836	 *   restart after one full frame is pulled from the buffer. There
2837	 *   could be several smaller frames in the buffer and if so they will
2838	 *   not trigger the XON until their total number reduces the buffer
2839	 *   by 1500.
2840	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2841	 */
2842	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2843	hw->fc.high_water = rx_buffer_size -
2844	    roundup2(adapter->max_frame_size, 1024);
2845	hw->fc.low_water = hw->fc.high_water - 1500;
2846
2847	if (adapter->fc) /* locally set flow control value? */
2848		hw->fc.requested_mode = adapter->fc;
2849	else
2850		hw->fc.requested_mode = e1000_fc_full;
2851
2852	if (hw->mac.type == e1000_80003es2lan)
2853		hw->fc.pause_time = 0xFFFF;
2854	else
2855		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2856
2857	hw->fc.send_xon = TRUE;
2858
2859	/* Device specific overrides/settings */
2860	switch (hw->mac.type) {
2861	case e1000_pchlan:
2862		/* Workaround: no TX flow ctrl for PCH */
2863                hw->fc.requested_mode = e1000_fc_rx_pause;
2864		hw->fc.pause_time = 0xFFFF; /* override */
2865		if (ifp->if_mtu > ETHERMTU) {
2866			hw->fc.high_water = 0x3500;
2867			hw->fc.low_water = 0x1500;
2868		} else {
2869			hw->fc.high_water = 0x5000;
2870			hw->fc.low_water = 0x3000;
2871		}
2872		hw->fc.refresh_time = 0x1000;
2873		break;
2874	case e1000_pch2lan:
2875		hw->fc.high_water = 0x5C20;
2876		hw->fc.low_water = 0x5048;
2877		hw->fc.pause_time = 0x0650;
2878		hw->fc.refresh_time = 0x0400;
2879		/* Jumbos need adjusted PBA */
2880		if (ifp->if_mtu > ETHERMTU)
2881			E1000_WRITE_REG(hw, E1000_PBA, 12);
2882		else
2883			E1000_WRITE_REG(hw, E1000_PBA, 26);
2884		break;
2885        case e1000_ich9lan:
2886        case e1000_ich10lan:
2887		if (ifp->if_mtu > ETHERMTU) {
2888			hw->fc.high_water = 0x2800;
2889			hw->fc.low_water = hw->fc.high_water - 8;
2890			break;
2891		}
2892		/* else fall thru */
2893	default:
2894		if (hw->mac.type == e1000_80003es2lan)
2895			hw->fc.pause_time = 0xFFFF;
2896		break;
2897	}
2898
2899	/* Issue a global reset */
2900	e1000_reset_hw(hw);
2901	E1000_WRITE_REG(hw, E1000_WUC, 0);
2902	em_disable_aspm(adapter);
2903	/* and a re-init */
2904	if (e1000_init_hw(hw) < 0) {
2905		device_printf(dev, "Hardware Initialization Failed\n");
2906		return;
2907	}
2908
2909	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2910	e1000_get_phy_info(hw);
2911	e1000_check_for_link(hw);
2912	return;
2913}
2914
2915/*********************************************************************
2916 *
2917 *  Setup networking device structure and register an interface.
2918 *
2919 **********************************************************************/
2920static int
2921em_setup_interface(device_t dev, struct adapter *adapter)
2922{
2923	struct ifnet   *ifp;
2924
2925	INIT_DEBUGOUT("em_setup_interface: begin");
2926
2927	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2928	if (ifp == NULL) {
2929		device_printf(dev, "can not allocate ifnet structure\n");
2930		return (-1);
2931	}
2932	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2933	ifp->if_init =  em_init;
2934	ifp->if_softc = adapter;
2935	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2936	ifp->if_ioctl = em_ioctl;
2937#ifdef EM_MULTIQUEUE
2938	/* Multiqueue stack interface */
2939	ifp->if_transmit = em_mq_start;
2940	ifp->if_qflush = em_qflush;
2941#else
2942	ifp->if_start = em_start;
2943	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2944	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2945	IFQ_SET_READY(&ifp->if_snd);
2946#endif
2947
2948	ether_ifattach(ifp, adapter->hw.mac.addr);
2949
2950	ifp->if_capabilities = ifp->if_capenable = 0;
2951
2952
2953	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2954	ifp->if_capabilities |= IFCAP_TSO4;
2955	/*
2956	 * Tell the upper layer(s) we
2957	 * support full VLAN capability
2958	 */
2959	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2960	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2961			     |  IFCAP_VLAN_HWTSO
2962			     |  IFCAP_VLAN_MTU;
2963	ifp->if_capenable = ifp->if_capabilities;
2964
2965	/*
2966	** Don't turn this on by default, if vlans are
2967	** created on another pseudo device (eg. lagg)
2968	** then vlan events are not passed thru, breaking
2969	** operation, but with HW FILTER off it works. If
2970	** using vlans directly on the em driver you can
2971	** enable this and get full hardware tag filtering.
2972	*/
2973	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2974
2975#ifdef DEVICE_POLLING
2976	ifp->if_capabilities |= IFCAP_POLLING;
2977#endif
2978
2979	/* Enable only WOL MAGIC by default */
2980	if (adapter->wol) {
2981		ifp->if_capabilities |= IFCAP_WOL;
2982		ifp->if_capenable |= IFCAP_WOL_MAGIC;
2983	}
2984
2985	/*
2986	 * Specify the media types supported by this adapter and register
2987	 * callbacks to update media and link information
2988	 */
2989	ifmedia_init(&adapter->media, IFM_IMASK,
2990	    em_media_change, em_media_status);
2991	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2992	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2993		u_char fiber_type = IFM_1000_SX;	/* default type */
2994
2995		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2996			    0, NULL);
2997		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2998	} else {
2999		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3000		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3001			    0, NULL);
3002		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3003			    0, NULL);
3004		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3005			    0, NULL);
3006		if (adapter->hw.phy.type != e1000_phy_ife) {
3007			ifmedia_add(&adapter->media,
3008				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3009			ifmedia_add(&adapter->media,
3010				IFM_ETHER | IFM_1000_T, 0, NULL);
3011		}
3012	}
3013	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3014	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3015	return (0);
3016}
3017
3018
3019/*
3020 * Manage DMA'able memory.
3021 */
3022static void
3023em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3024{
3025	if (error)
3026		return;
3027	*(bus_addr_t *) arg = segs[0].ds_addr;
3028}
3029
3030static int
3031em_dma_malloc(struct adapter *adapter, bus_size_t size,
3032        struct em_dma_alloc *dma, int mapflags)
3033{
3034	int error;
3035
3036	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3037				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3038				BUS_SPACE_MAXADDR,	/* lowaddr */
3039				BUS_SPACE_MAXADDR,	/* highaddr */
3040				NULL, NULL,		/* filter, filterarg */
3041				size,			/* maxsize */
3042				1,			/* nsegments */
3043				size,			/* maxsegsize */
3044				0,			/* flags */
3045				NULL,			/* lockfunc */
3046				NULL,			/* lockarg */
3047				&dma->dma_tag);
3048	if (error) {
3049		device_printf(adapter->dev,
3050		    "%s: bus_dma_tag_create failed: %d\n",
3051		    __func__, error);
3052		goto fail_0;
3053	}
3054
3055	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3056	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3057	if (error) {
3058		device_printf(adapter->dev,
3059		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3060		    __func__, (uintmax_t)size, error);
3061		goto fail_2;
3062	}
3063
3064	dma->dma_paddr = 0;
3065	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3066	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3067	if (error || dma->dma_paddr == 0) {
3068		device_printf(adapter->dev,
3069		    "%s: bus_dmamap_load failed: %d\n",
3070		    __func__, error);
3071		goto fail_3;
3072	}
3073
3074	return (0);
3075
3076fail_3:
3077	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3078fail_2:
3079	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3080	bus_dma_tag_destroy(dma->dma_tag);
3081fail_0:
3082	dma->dma_map = NULL;
3083	dma->dma_tag = NULL;
3084
3085	return (error);
3086}
3087
3088static void
3089em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3090{
3091	if (dma->dma_tag == NULL)
3092		return;
3093	if (dma->dma_map != NULL) {
3094		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3095		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3096		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3097		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3098		dma->dma_map = NULL;
3099	}
3100	bus_dma_tag_destroy(dma->dma_tag);
3101	dma->dma_tag = NULL;
3102}
3103
3104
3105/*********************************************************************
3106 *
3107 *  Allocate memory for the transmit and receive rings, and then
3108 *  the descriptors associated with each, called only once at attach.
3109 *
3110 **********************************************************************/
3111static int
3112em_allocate_queues(struct adapter *adapter)
3113{
3114	device_t		dev = adapter->dev;
3115	struct tx_ring		*txr = NULL;
3116	struct rx_ring		*rxr = NULL;
3117	int rsize, tsize, error = E1000_SUCCESS;
3118	int txconf = 0, rxconf = 0;
3119
3120
3121	/* Allocate the TX ring struct memory */
3122	if (!(adapter->tx_rings =
3123	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3124	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3125		device_printf(dev, "Unable to allocate TX ring memory\n");
3126		error = ENOMEM;
3127		goto fail;
3128	}
3129
3130	/* Now allocate the RX */
3131	if (!(adapter->rx_rings =
3132	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3133	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134		device_printf(dev, "Unable to allocate RX ring memory\n");
3135		error = ENOMEM;
3136		goto rx_fail;
3137	}
3138
3139	tsize = roundup2(adapter->num_tx_desc *
3140	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3141	/*
3142	 * Now set up the TX queues, txconf is needed to handle the
3143	 * possibility that things fail midcourse and we need to
3144	 * undo memory gracefully
3145	 */
3146	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3147		/* Set up some basics */
3148		txr = &adapter->tx_rings[i];
3149		txr->adapter = adapter;
3150		txr->me = i;
3151
3152		/* Initialize the TX lock */
3153		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3154		    device_get_nameunit(dev), txr->me);
3155		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3156
3157		if (em_dma_malloc(adapter, tsize,
3158			&txr->txdma, BUS_DMA_NOWAIT)) {
3159			device_printf(dev,
3160			    "Unable to allocate TX Descriptor memory\n");
3161			error = ENOMEM;
3162			goto err_tx_desc;
3163		}
3164		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3165		bzero((void *)txr->tx_base, tsize);
3166
3167        	if (em_allocate_transmit_buffers(txr)) {
3168			device_printf(dev,
3169			    "Critical Failure setting up transmit buffers\n");
3170			error = ENOMEM;
3171			goto err_tx_desc;
3172        	}
3173#if __FreeBSD_version >= 800000
3174		/* Allocate a buf ring */
3175		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3176		    M_WAITOK, &txr->tx_mtx);
3177#endif
3178	}
3179
3180	/*
3181	 * Next the RX queues...
3182	 */
3183	rsize = roundup2(adapter->num_rx_desc *
3184	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3185	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3186		rxr = &adapter->rx_rings[i];
3187		rxr->adapter = adapter;
3188		rxr->me = i;
3189
3190		/* Initialize the RX lock */
3191		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3192		    device_get_nameunit(dev), txr->me);
3193		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3194
3195		if (em_dma_malloc(adapter, rsize,
3196			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3197			device_printf(dev,
3198			    "Unable to allocate RxDescriptor memory\n");
3199			error = ENOMEM;
3200			goto err_rx_desc;
3201		}
3202		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3203		bzero((void *)rxr->rx_base, rsize);
3204
3205        	/* Allocate receive buffers for the ring*/
3206		if (em_allocate_receive_buffers(rxr)) {
3207			device_printf(dev,
3208			    "Critical Failure setting up receive buffers\n");
3209			error = ENOMEM;
3210			goto err_rx_desc;
3211		}
3212	}
3213
3214	return (0);
3215
3216err_rx_desc:
3217	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3218		em_dma_free(adapter, &rxr->rxdma);
3219err_tx_desc:
3220	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3221		em_dma_free(adapter, &txr->txdma);
3222	free(adapter->rx_rings, M_DEVBUF);
3223rx_fail:
3224#if __FreeBSD_version >= 800000
3225	buf_ring_free(txr->br, M_DEVBUF);
3226#endif
3227	free(adapter->tx_rings, M_DEVBUF);
3228fail:
3229	return (error);
3230}
3231
3232
3233/*********************************************************************
3234 *
3235 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3236 *  the information needed to transmit a packet on the wire. This is
3237 *  called only once at attach, setup is done every reset.
3238 *
3239 **********************************************************************/
3240static int
3241em_allocate_transmit_buffers(struct tx_ring *txr)
3242{
3243	struct adapter *adapter = txr->adapter;
3244	device_t dev = adapter->dev;
3245	struct em_buffer *txbuf;
3246	int error, i;
3247
3248	/*
3249	 * Setup DMA descriptor areas.
3250	 */
3251	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3252			       1, 0,			/* alignment, bounds */
3253			       BUS_SPACE_MAXADDR,	/* lowaddr */
3254			       BUS_SPACE_MAXADDR,	/* highaddr */
3255			       NULL, NULL,		/* filter, filterarg */
3256			       EM_TSO_SIZE,		/* maxsize */
3257			       EM_MAX_SCATTER,		/* nsegments */
3258			       PAGE_SIZE,		/* maxsegsize */
3259			       0,			/* flags */
3260			       NULL,			/* lockfunc */
3261			       NULL,			/* lockfuncarg */
3262			       &txr->txtag))) {
3263		device_printf(dev,"Unable to allocate TX DMA tag\n");
3264		goto fail;
3265	}
3266
3267	if (!(txr->tx_buffers =
3268	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3269	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3270		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3271		error = ENOMEM;
3272		goto fail;
3273	}
3274
3275        /* Create the descriptor buffer dma maps */
3276	txbuf = txr->tx_buffers;
3277	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3278		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3279		if (error != 0) {
3280			device_printf(dev, "Unable to create TX DMA map\n");
3281			goto fail;
3282		}
3283	}
3284
3285	return 0;
3286fail:
3287	/* We free all, it handles case where we are in the middle */
3288	em_free_transmit_structures(adapter);
3289	return (error);
3290}
3291
3292/*********************************************************************
3293 *
3294 *  Initialize a transmit ring.
3295 *
3296 **********************************************************************/
3297static void
3298em_setup_transmit_ring(struct tx_ring *txr)
3299{
3300	struct adapter *adapter = txr->adapter;
3301	struct em_buffer *txbuf;
3302	int i;
3303#ifdef DEV_NETMAP
3304	struct netmap_adapter *na = NA(adapter->ifp);
3305	struct netmap_slot *slot;
3306#endif /* DEV_NETMAP */
3307
3308	/* Clear the old descriptor contents */
3309	EM_TX_LOCK(txr);
3310#ifdef DEV_NETMAP
3311	slot = netmap_reset(na, NR_TX, txr->me, 0);
3312#endif /* DEV_NETMAP */
3313
3314	bzero((void *)txr->tx_base,
3315	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3316	/* Reset indices */
3317	txr->next_avail_desc = 0;
3318	txr->next_to_clean = 0;
3319
3320	/* Free any existing tx buffers. */
3321        txbuf = txr->tx_buffers;
3322	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3323		if (txbuf->m_head != NULL) {
3324			bus_dmamap_sync(txr->txtag, txbuf->map,
3325			    BUS_DMASYNC_POSTWRITE);
3326			bus_dmamap_unload(txr->txtag, txbuf->map);
3327			m_freem(txbuf->m_head);
3328			txbuf->m_head = NULL;
3329		}
3330#ifdef DEV_NETMAP
3331		if (slot) {
3332			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3333			uint64_t paddr;
3334			void *addr;
3335
3336			addr = PNMB(slot + si, &paddr);
3337			txr->tx_base[i].buffer_addr = htole64(paddr);
3338			/* reload the map for netmap mode */
3339			netmap_load_map(txr->txtag, txbuf->map, addr);
3340		}
3341#endif /* DEV_NETMAP */
3342
3343		/* clear the watch index */
3344		txbuf->next_eop = -1;
3345        }
3346
3347	/* Set number of descriptors available */
3348	txr->tx_avail = adapter->num_tx_desc;
3349	txr->queue_status = EM_QUEUE_IDLE;
3350
3351	/* Clear checksum offload context. */
3352	txr->last_hw_offload = 0;
3353	txr->last_hw_ipcss = 0;
3354	txr->last_hw_ipcso = 0;
3355	txr->last_hw_tucss = 0;
3356	txr->last_hw_tucso = 0;
3357
3358	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3359	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3360	EM_TX_UNLOCK(txr);
3361}
3362
3363/*********************************************************************
3364 *
3365 *  Initialize all transmit rings.
3366 *
3367 **********************************************************************/
3368static void
3369em_setup_transmit_structures(struct adapter *adapter)
3370{
3371	struct tx_ring *txr = adapter->tx_rings;
3372
3373	for (int i = 0; i < adapter->num_queues; i++, txr++)
3374		em_setup_transmit_ring(txr);
3375
3376	return;
3377}
3378
3379/*********************************************************************
3380 *
3381 *  Enable transmit unit.
3382 *
3383 **********************************************************************/
3384static void
3385em_initialize_transmit_unit(struct adapter *adapter)
3386{
3387	struct tx_ring	*txr = adapter->tx_rings;
3388	struct e1000_hw	*hw = &adapter->hw;
3389	u32	tctl, tarc, tipg = 0;
3390
3391	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3392
3393	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3394		u64 bus_addr = txr->txdma.dma_paddr;
3395		/* Base and Len of TX Ring */
3396		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3397	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3398		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3399	    	    (u32)(bus_addr >> 32));
3400		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3401	    	    (u32)bus_addr);
3402		/* Init the HEAD/TAIL indices */
3403		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3404		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3405
3406		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3407		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3408		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3409
3410		txr->queue_status = EM_QUEUE_IDLE;
3411	}
3412
3413	/* Set the default values for the Tx Inter Packet Gap timer */
3414	switch (adapter->hw.mac.type) {
3415	case e1000_80003es2lan:
3416		tipg = DEFAULT_82543_TIPG_IPGR1;
3417		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3418		    E1000_TIPG_IPGR2_SHIFT;
3419		break;
3420	default:
3421		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3422		    (adapter->hw.phy.media_type ==
3423		    e1000_media_type_internal_serdes))
3424			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3425		else
3426			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3427		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3428		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3429	}
3430
3431	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3432	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3433
3434	if(adapter->hw.mac.type >= e1000_82540)
3435		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3436		    adapter->tx_abs_int_delay.value);
3437
3438	if ((adapter->hw.mac.type == e1000_82571) ||
3439	    (adapter->hw.mac.type == e1000_82572)) {
3440		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3441		tarc |= SPEED_MODE_BIT;
3442		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3443	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3444		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445		tarc |= 1;
3446		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3448		tarc |= 1;
3449		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3450	}
3451
3452	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3453	if (adapter->tx_int_delay.value > 0)
3454		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3455
3456	/* Program the Transmit Control Register */
3457	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3458	tctl &= ~E1000_TCTL_CT;
3459	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3460		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3461
3462	if (adapter->hw.mac.type >= e1000_82571)
3463		tctl |= E1000_TCTL_MULR;
3464
3465	/* This write will effectively turn on the transmit unit. */
3466	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3467
3468}
3469
3470
3471/*********************************************************************
3472 *
3473 *  Free all transmit rings.
3474 *
3475 **********************************************************************/
3476static void
3477em_free_transmit_structures(struct adapter *adapter)
3478{
3479	struct tx_ring *txr = adapter->tx_rings;
3480
3481	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3482		EM_TX_LOCK(txr);
3483		em_free_transmit_buffers(txr);
3484		em_dma_free(adapter, &txr->txdma);
3485		EM_TX_UNLOCK(txr);
3486		EM_TX_LOCK_DESTROY(txr);
3487	}
3488
3489	free(adapter->tx_rings, M_DEVBUF);
3490}
3491
3492/*********************************************************************
3493 *
3494 *  Free transmit ring related data structures.
3495 *
3496 **********************************************************************/
3497static void
3498em_free_transmit_buffers(struct tx_ring *txr)
3499{
3500	struct adapter		*adapter = txr->adapter;
3501	struct em_buffer	*txbuf;
3502
3503	INIT_DEBUGOUT("free_transmit_ring: begin");
3504
3505	if (txr->tx_buffers == NULL)
3506		return;
3507
3508	for (int i = 0; i < adapter->num_tx_desc; i++) {
3509		txbuf = &txr->tx_buffers[i];
3510		if (txbuf->m_head != NULL) {
3511			bus_dmamap_sync(txr->txtag, txbuf->map,
3512			    BUS_DMASYNC_POSTWRITE);
3513			bus_dmamap_unload(txr->txtag,
3514			    txbuf->map);
3515			m_freem(txbuf->m_head);
3516			txbuf->m_head = NULL;
3517			if (txbuf->map != NULL) {
3518				bus_dmamap_destroy(txr->txtag,
3519				    txbuf->map);
3520				txbuf->map = NULL;
3521			}
3522		} else if (txbuf->map != NULL) {
3523			bus_dmamap_unload(txr->txtag,
3524			    txbuf->map);
3525			bus_dmamap_destroy(txr->txtag,
3526			    txbuf->map);
3527			txbuf->map = NULL;
3528		}
3529	}
3530#if __FreeBSD_version >= 800000
3531	if (txr->br != NULL)
3532		buf_ring_free(txr->br, M_DEVBUF);
3533#endif
3534	if (txr->tx_buffers != NULL) {
3535		free(txr->tx_buffers, M_DEVBUF);
3536		txr->tx_buffers = NULL;
3537	}
3538	if (txr->txtag != NULL) {
3539		bus_dma_tag_destroy(txr->txtag);
3540		txr->txtag = NULL;
3541	}
3542	return;
3543}
3544
3545
3546/*********************************************************************
3547 *  The offload context is protocol specific (TCP/UDP) and thus
3548 *  only needs to be set when the protocol changes. The occasion
3549 *  of a context change can be a performance detriment, and
3550 *  might be better just disabled. The reason arises in the way
3551 *  in which the controller supports pipelined requests from the
3552 *  Tx data DMA. Up to four requests can be pipelined, and they may
3553 *  belong to the same packet or to multiple packets. However all
3554 *  requests for one packet are issued before a request is issued
3555 *  for a subsequent packet and if a request for the next packet
3556 *  requires a context change, that request will be stalled
3557 *  until the previous request completes. This means setting up
3558 *  a new context effectively disables pipelined Tx data DMA which
3559 *  in turn greatly slow down performance to send small sized
3560 *  frames.
3561 **********************************************************************/
3562static void
3563em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3564    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3565{
3566	struct adapter			*adapter = txr->adapter;
3567	struct e1000_context_desc	*TXD = NULL;
3568	struct em_buffer		*tx_buffer;
3569	int				cur, hdr_len;
3570	u32				cmd = 0;
3571	u16				offload = 0;
3572	u8				ipcso, ipcss, tucso, tucss;
3573
3574	ipcss = ipcso = tucss = tucso = 0;
3575	hdr_len = ip_off + (ip->ip_hl << 2);
3576	cur = txr->next_avail_desc;
3577
3578	/* Setup of IP header checksum. */
3579	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3580		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3581		offload |= CSUM_IP;
3582		ipcss = ip_off;
3583		ipcso = ip_off + offsetof(struct ip, ip_sum);
3584		/*
3585		 * Start offset for header checksum calculation.
3586		 * End offset for header checksum calculation.
3587		 * Offset of place to put the checksum.
3588		 */
3589		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3590		TXD->lower_setup.ip_fields.ipcss = ipcss;
3591		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3592		TXD->lower_setup.ip_fields.ipcso = ipcso;
3593		cmd |= E1000_TXD_CMD_IP;
3594	}
3595
3596	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3597 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3598 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3599 		offload |= CSUM_TCP;
3600 		tucss = hdr_len;
3601 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3602 		/*
3603 		 * Setting up new checksum offload context for every frames
3604 		 * takes a lot of processing time for hardware. This also
3605 		 * reduces performance a lot for small sized frames so avoid
3606 		 * it if driver can use previously configured checksum
3607 		 * offload context.
3608 		 */
3609 		if (txr->last_hw_offload == offload) {
3610 			if (offload & CSUM_IP) {
3611 				if (txr->last_hw_ipcss == ipcss &&
3612 				    txr->last_hw_ipcso == ipcso &&
3613 				    txr->last_hw_tucss == tucss &&
3614 				    txr->last_hw_tucso == tucso)
3615 					return;
3616 			} else {
3617 				if (txr->last_hw_tucss == tucss &&
3618 				    txr->last_hw_tucso == tucso)
3619 					return;
3620 			}
3621  		}
3622 		txr->last_hw_offload = offload;
3623 		txr->last_hw_tucss = tucss;
3624 		txr->last_hw_tucso = tucso;
3625 		/*
3626 		 * Start offset for payload checksum calculation.
3627 		 * End offset for payload checksum calculation.
3628 		 * Offset of place to put the checksum.
3629 		 */
3630		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3631 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3632 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3633 		TXD->upper_setup.tcp_fields.tucso = tucso;
3634 		cmd |= E1000_TXD_CMD_TCP;
3635 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3636 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3637 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3638 		tucss = hdr_len;
3639 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3640 		/*
3641 		 * Setting up new checksum offload context for every frames
3642 		 * takes a lot of processing time for hardware. This also
3643 		 * reduces performance a lot for small sized frames so avoid
3644 		 * it if driver can use previously configured checksum
3645 		 * offload context.
3646 		 */
3647 		if (txr->last_hw_offload == offload) {
3648 			if (offload & CSUM_IP) {
3649 				if (txr->last_hw_ipcss == ipcss &&
3650 				    txr->last_hw_ipcso == ipcso &&
3651 				    txr->last_hw_tucss == tucss &&
3652 				    txr->last_hw_tucso == tucso)
3653 					return;
3654 			} else {
3655 				if (txr->last_hw_tucss == tucss &&
3656 				    txr->last_hw_tucso == tucso)
3657 					return;
3658 			}
3659 		}
3660 		txr->last_hw_offload = offload;
3661 		txr->last_hw_tucss = tucss;
3662 		txr->last_hw_tucso = tucso;
3663 		/*
3664 		 * Start offset for header checksum calculation.
3665 		 * End offset for header checksum calculation.
3666 		 * Offset of place to put the checksum.
3667 		 */
3668		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3669 		TXD->upper_setup.tcp_fields.tucss = tucss;
3670 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3671 		TXD->upper_setup.tcp_fields.tucso = tucso;
3672  	}
3673
3674 	if (offload & CSUM_IP) {
3675 		txr->last_hw_ipcss = ipcss;
3676 		txr->last_hw_ipcso = ipcso;
3677  	}
3678
3679	TXD->tcp_seg_setup.data = htole32(0);
3680	TXD->cmd_and_length =
3681	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3682	tx_buffer = &txr->tx_buffers[cur];
3683	tx_buffer->m_head = NULL;
3684	tx_buffer->next_eop = -1;
3685
3686	if (++cur == adapter->num_tx_desc)
3687		cur = 0;
3688
3689	txr->tx_avail--;
3690	txr->next_avail_desc = cur;
3691}
3692
3693
3694/**********************************************************************
3695 *
3696 *  Setup work for hardware segmentation offload (TSO)
3697 *
3698 **********************************************************************/
3699static void
3700em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3701    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3702{
3703	struct adapter			*adapter = txr->adapter;
3704	struct e1000_context_desc	*TXD;
3705	struct em_buffer		*tx_buffer;
3706	int cur, hdr_len;
3707
3708	/*
3709	 * In theory we can use the same TSO context if and only if
3710	 * frame is the same type(IP/TCP) and the same MSS. However
3711	 * checking whether a frame has the same IP/TCP structure is
3712	 * hard thing so just ignore that and always restablish a
3713	 * new TSO context.
3714	 */
3715	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3716	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3717		      E1000_TXD_DTYP_D |	/* Data descr type */
3718		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3719
3720	/* IP and/or TCP header checksum calculation and insertion. */
3721	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3722
3723	cur = txr->next_avail_desc;
3724	tx_buffer = &txr->tx_buffers[cur];
3725	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3726
3727	/*
3728	 * Start offset for header checksum calculation.
3729	 * End offset for header checksum calculation.
3730	 * Offset of place put the checksum.
3731	 */
3732	TXD->lower_setup.ip_fields.ipcss = ip_off;
3733	TXD->lower_setup.ip_fields.ipcse =
3734	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3735	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3736	/*
3737	 * Start offset for payload checksum calculation.
3738	 * End offset for payload checksum calculation.
3739	 * Offset of place to put the checksum.
3740	 */
3741	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3742	TXD->upper_setup.tcp_fields.tucse = 0;
3743	TXD->upper_setup.tcp_fields.tucso =
3744	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3745	/*
3746	 * Payload size per packet w/o any headers.
3747	 * Length of all headers up to payload.
3748	 */
3749	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3750	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3751
3752	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3753				E1000_TXD_CMD_DEXT |	/* Extended descr */
3754				E1000_TXD_CMD_TSE |	/* TSE context */
3755				E1000_TXD_CMD_IP |	/* Do IP csum */
3756				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3757				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3758
3759	tx_buffer->m_head = NULL;
3760	tx_buffer->next_eop = -1;
3761
3762	if (++cur == adapter->num_tx_desc)
3763		cur = 0;
3764
3765	txr->tx_avail--;
3766	txr->next_avail_desc = cur;
3767	txr->tx_tso = TRUE;
3768}
3769
3770
3771/**********************************************************************
3772 *
3773 *  Examine each tx_buffer in the used queue. If the hardware is done
3774 *  processing the packet then free associated resources. The
3775 *  tx_buffer is put back on the free queue.
3776 *
3777 **********************************************************************/
3778static void
3779em_txeof(struct tx_ring *txr)
3780{
3781	struct adapter	*adapter = txr->adapter;
3782        int first, last, done, processed;
3783        struct em_buffer *tx_buffer;
3784        struct e1000_tx_desc   *tx_desc, *eop_desc;
3785	struct ifnet   *ifp = adapter->ifp;
3786
3787	EM_TX_LOCK_ASSERT(txr);
3788#ifdef DEV_NETMAP
3789	if (ifp->if_capenable & IFCAP_NETMAP) {
3790		struct netmap_adapter *na = NA(ifp);
3791
3792		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3793		EM_TX_UNLOCK(txr);
3794		EM_CORE_LOCK(adapter);
3795		selwakeuppri(&na->tx_si, PI_NET);
3796		EM_CORE_UNLOCK(adapter);
3797		EM_TX_LOCK(txr);
3798		return;
3799	}
3800#endif /* DEV_NETMAP */
3801
3802	/* No work, make sure watchdog is off */
3803        if (txr->tx_avail == adapter->num_tx_desc) {
3804		txr->queue_status = EM_QUEUE_IDLE;
3805                return;
3806	}
3807
3808	processed = 0;
3809        first = txr->next_to_clean;
3810        tx_desc = &txr->tx_base[first];
3811        tx_buffer = &txr->tx_buffers[first];
3812	last = tx_buffer->next_eop;
3813        eop_desc = &txr->tx_base[last];
3814
3815	/*
3816	 * What this does is get the index of the
3817	 * first descriptor AFTER the EOP of the
3818	 * first packet, that way we can do the
3819	 * simple comparison on the inner while loop.
3820	 */
3821	if (++last == adapter->num_tx_desc)
3822 		last = 0;
3823	done = last;
3824
3825        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826            BUS_DMASYNC_POSTREAD);
3827
3828        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829		/* We clean the range of the packet */
3830		while (first != done) {
3831                	tx_desc->upper.data = 0;
3832                	tx_desc->lower.data = 0;
3833                	tx_desc->buffer_addr = 0;
3834                	++txr->tx_avail;
3835			++processed;
3836
3837			if (tx_buffer->m_head) {
3838				bus_dmamap_sync(txr->txtag,
3839				    tx_buffer->map,
3840				    BUS_DMASYNC_POSTWRITE);
3841				bus_dmamap_unload(txr->txtag,
3842				    tx_buffer->map);
3843                        	m_freem(tx_buffer->m_head);
3844                        	tx_buffer->m_head = NULL;
3845                	}
3846			tx_buffer->next_eop = -1;
3847			txr->watchdog_time = ticks;
3848
3849	                if (++first == adapter->num_tx_desc)
3850				first = 0;
3851
3852	                tx_buffer = &txr->tx_buffers[first];
3853			tx_desc = &txr->tx_base[first];
3854		}
3855		++ifp->if_opackets;
3856		/* See if we can continue to the next packet */
3857		last = tx_buffer->next_eop;
3858		if (last != -1) {
3859        		eop_desc = &txr->tx_base[last];
3860			/* Get new done point */
3861			if (++last == adapter->num_tx_desc) last = 0;
3862			done = last;
3863		} else
3864			break;
3865        }
3866        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868
3869        txr->next_to_clean = first;
3870
3871	/*
3872	** Watchdog calculation, we know there's
3873	** work outstanding or the first return
3874	** would have been taken, so none processed
3875	** for too long indicates a hang. local timer
3876	** will examine this and do a reset if needed.
3877	*/
3878	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879		txr->queue_status = EM_QUEUE_HUNG;
3880
3881        /*
3882         * If we have a minimum free, clear IFF_DRV_OACTIVE
3883         * to tell the stack that it is OK to send packets.
3884	 * Notice that all writes of OACTIVE happen under the
3885	 * TX lock which, with a single queue, guarantees
3886	 * sanity.
3887         */
3888        if (txr->tx_avail >= EM_MAX_SCATTER)
3889		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3890
3891	/* Disable watchdog if all clean */
3892	if (txr->tx_avail == adapter->num_tx_desc) {
3893		txr->queue_status = EM_QUEUE_IDLE;
3894	}
3895}
3896
3897
3898/*********************************************************************
3899 *
3900 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3901 *
3902 **********************************************************************/
3903static void
3904em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905{
3906	struct adapter		*adapter = rxr->adapter;
3907	struct mbuf		*m;
3908	bus_dma_segment_t	segs[1];
3909	struct em_buffer	*rxbuf;
3910	int			i, j, error, nsegs;
3911	bool			cleaned = FALSE;
3912
3913	i = j = rxr->next_to_refresh;
3914	/*
3915	** Get one descriptor beyond
3916	** our work mark to control
3917	** the loop.
3918	*/
3919	if (++j == adapter->num_rx_desc)
3920		j = 0;
3921
3922	while (j != limit) {
3923		rxbuf = &rxr->rx_buffers[i];
3924		if (rxbuf->m_head == NULL) {
3925			m = m_getjcl(M_DONTWAIT, MT_DATA,
3926			    M_PKTHDR, adapter->rx_mbuf_sz);
3927			/*
3928			** If we have a temporary resource shortage
3929			** that causes a failure, just abort refresh
3930			** for now, we will return to this point when
3931			** reinvoked from em_rxeof.
3932			*/
3933			if (m == NULL)
3934				goto update;
3935		} else
3936			m = rxbuf->m_head;
3937
3938		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939		m->m_flags |= M_PKTHDR;
3940		m->m_data = m->m_ext.ext_buf;
3941
3942		/* Use bus_dma machinery to setup the memory mapping  */
3943		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3945		if (error != 0) {
3946			printf("Refresh mbufs: hdr dmamap load"
3947			    " failure - %d\n", error);
3948			m_free(m);
3949			rxbuf->m_head = NULL;
3950			goto update;
3951		}
3952		rxbuf->m_head = m;
3953		bus_dmamap_sync(rxr->rxtag,
3954		    rxbuf->map, BUS_DMASYNC_PREREAD);
3955		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956		cleaned = TRUE;
3957
3958		i = j; /* Next is precalulated for us */
3959		rxr->next_to_refresh = i;
3960		/* Calculate next controlling index */
3961		if (++j == adapter->num_rx_desc)
3962			j = 0;
3963	}
3964update:
3965	/*
3966	** Update the tail pointer only if,
3967	** and as far as we have refreshed.
3968	*/
3969	if (cleaned)
3970		E1000_WRITE_REG(&adapter->hw,
3971		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3972
3973	return;
3974}
3975
3976
3977/*********************************************************************
3978 *
3979 *  Allocate memory for rx_buffer structures. Since we use one
3980 *  rx_buffer per received packet, the maximum number of rx_buffer's
3981 *  that we'll need is equal to the number of receive descriptors
3982 *  that we've allocated.
3983 *
3984 **********************************************************************/
3985static int
3986em_allocate_receive_buffers(struct rx_ring *rxr)
3987{
3988	struct adapter		*adapter = rxr->adapter;
3989	device_t		dev = adapter->dev;
3990	struct em_buffer	*rxbuf;
3991	int			error;
3992
3993	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995	if (rxr->rx_buffers == NULL) {
3996		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997		return (ENOMEM);
3998	}
3999
4000	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001				1, 0,			/* alignment, bounds */
4002				BUS_SPACE_MAXADDR,	/* lowaddr */
4003				BUS_SPACE_MAXADDR,	/* highaddr */
4004				NULL, NULL,		/* filter, filterarg */
4005				MJUM9BYTES,		/* maxsize */
4006				1,			/* nsegments */
4007				MJUM9BYTES,		/* maxsegsize */
4008				0,			/* flags */
4009				NULL,			/* lockfunc */
4010				NULL,			/* lockarg */
4011				&rxr->rxtag);
4012	if (error) {
4013		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014		    __func__, error);
4015		goto fail;
4016	}
4017
4018	rxbuf = rxr->rx_buffers;
4019	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020		rxbuf = &rxr->rx_buffers[i];
4021		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4022		    &rxbuf->map);
4023		if (error) {
4024			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4025			    __func__, error);
4026			goto fail;
4027		}
4028	}
4029
4030	return (0);
4031
4032fail:
4033	em_free_receive_structures(adapter);
4034	return (error);
4035}
4036
4037
4038/*********************************************************************
4039 *
4040 *  Initialize a receive ring and its buffers.
4041 *
4042 **********************************************************************/
4043static int
4044em_setup_receive_ring(struct rx_ring *rxr)
4045{
4046	struct	adapter 	*adapter = rxr->adapter;
4047	struct em_buffer	*rxbuf;
4048	bus_dma_segment_t	seg[1];
4049	int			rsize, nsegs, error = 0;
4050#ifdef DEV_NETMAP
4051	struct netmap_adapter *na = NA(adapter->ifp);
4052	struct netmap_slot *slot;
4053#endif
4054
4055
4056	/* Clear the ring contents */
4057	EM_RX_LOCK(rxr);
4058	rsize = roundup2(adapter->num_rx_desc *
4059	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4060	bzero((void *)rxr->rx_base, rsize);
4061#ifdef DEV_NETMAP
4062	slot = netmap_reset(na, NR_RX, 0, 0);
4063#endif
4064
4065	/*
4066	** Free current RX buffer structs and their mbufs
4067	*/
4068	for (int i = 0; i < adapter->num_rx_desc; i++) {
4069		rxbuf = &rxr->rx_buffers[i];
4070		if (rxbuf->m_head != NULL) {
4071			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4072			    BUS_DMASYNC_POSTREAD);
4073			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4074			m_freem(rxbuf->m_head);
4075			rxbuf->m_head = NULL; /* mark as freed */
4076		}
4077	}
4078
4079	/* Now replenish the mbufs */
4080        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4081		rxbuf = &rxr->rx_buffers[j];
4082#ifdef DEV_NETMAP
4083		if (slot) {
4084			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4085			uint64_t paddr;
4086			void *addr;
4087
4088			addr = PNMB(slot + si, &paddr);
4089			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4090			/* Update descriptor */
4091			rxr->rx_base[j].buffer_addr = htole64(paddr);
4092			continue;
4093		}
4094#endif /* DEV_NETMAP */
4095		rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4096		    M_PKTHDR, adapter->rx_mbuf_sz);
4097		if (rxbuf->m_head == NULL) {
4098			error = ENOBUFS;
4099			goto fail;
4100		}
4101		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4102		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4103		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4104
4105		/* Get the memory mapping */
4106		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4107		    rxbuf->map, rxbuf->m_head, seg,
4108		    &nsegs, BUS_DMA_NOWAIT);
4109		if (error != 0) {
4110			m_freem(rxbuf->m_head);
4111			rxbuf->m_head = NULL;
4112			goto fail;
4113		}
4114		bus_dmamap_sync(rxr->rxtag,
4115		    rxbuf->map, BUS_DMASYNC_PREREAD);
4116
4117		/* Update descriptor */
4118		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4119	}
4120	rxr->next_to_check = 0;
4121	rxr->next_to_refresh = 0;
4122	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4123	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4124
4125fail:
4126	EM_RX_UNLOCK(rxr);
4127	return (error);
4128}
4129
4130/*********************************************************************
4131 *
4132 *  Initialize all receive rings.
4133 *
4134 **********************************************************************/
4135static int
4136em_setup_receive_structures(struct adapter *adapter)
4137{
4138	struct rx_ring *rxr = adapter->rx_rings;
4139	int q;
4140
4141	for (q = 0; q < adapter->num_queues; q++, rxr++)
4142		if (em_setup_receive_ring(rxr))
4143			goto fail;
4144
4145	return (0);
4146fail:
4147	/*
4148	 * Free RX buffers allocated so far, we will only handle
4149	 * the rings that completed, the failing case will have
4150	 * cleaned up for itself. 'q' failed, so its the terminus.
4151	 */
4152	for (int i = 0; i < q; ++i) {
4153		rxr = &adapter->rx_rings[i];
4154		for (int n = 0; n < adapter->num_rx_desc; n++) {
4155			struct em_buffer *rxbuf;
4156			rxbuf = &rxr->rx_buffers[n];
4157			if (rxbuf->m_head != NULL) {
4158				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4159			  	  BUS_DMASYNC_POSTREAD);
4160				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4161				m_freem(rxbuf->m_head);
4162				rxbuf->m_head = NULL;
4163			}
4164		}
4165		rxr->next_to_check = 0;
4166		rxr->next_to_refresh = 0;
4167	}
4168
4169	return (ENOBUFS);
4170}
4171
4172/*********************************************************************
4173 *
4174 *  Free all receive rings.
4175 *
4176 **********************************************************************/
4177static void
4178em_free_receive_structures(struct adapter *adapter)
4179{
4180	struct rx_ring *rxr = adapter->rx_rings;
4181
4182	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4183		em_free_receive_buffers(rxr);
4184		/* Free the ring memory as well */
4185		em_dma_free(adapter, &rxr->rxdma);
4186		EM_RX_LOCK_DESTROY(rxr);
4187	}
4188
4189	free(adapter->rx_rings, M_DEVBUF);
4190}
4191
4192
4193/*********************************************************************
4194 *
4195 *  Free receive ring data structures
4196 *
4197 **********************************************************************/
4198static void
4199em_free_receive_buffers(struct rx_ring *rxr)
4200{
4201	struct adapter		*adapter = rxr->adapter;
4202	struct em_buffer	*rxbuf = NULL;
4203
4204	INIT_DEBUGOUT("free_receive_buffers: begin");
4205
4206	if (rxr->rx_buffers != NULL) {
4207		for (int i = 0; i < adapter->num_rx_desc; i++) {
4208			rxbuf = &rxr->rx_buffers[i];
4209			if (rxbuf->map != NULL) {
4210				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4211				    BUS_DMASYNC_POSTREAD);
4212				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4213				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4214			}
4215			if (rxbuf->m_head != NULL) {
4216				m_freem(rxbuf->m_head);
4217				rxbuf->m_head = NULL;
4218			}
4219		}
4220		free(rxr->rx_buffers, M_DEVBUF);
4221		rxr->rx_buffers = NULL;
4222		rxr->next_to_check = 0;
4223		rxr->next_to_refresh = 0;
4224	}
4225
4226	if (rxr->rxtag != NULL) {
4227		bus_dma_tag_destroy(rxr->rxtag);
4228		rxr->rxtag = NULL;
4229	}
4230
4231	return;
4232}
4233
4234
4235/*********************************************************************
4236 *
4237 *  Enable receive unit.
4238 *
4239 **********************************************************************/
4240#define MAX_INTS_PER_SEC	8000
4241#define DEFAULT_ITR	     1000000000/(MAX_INTS_PER_SEC * 256)
4242
4243static void
4244em_initialize_receive_unit(struct adapter *adapter)
4245{
4246	struct rx_ring	*rxr = adapter->rx_rings;
4247	struct ifnet	*ifp = adapter->ifp;
4248	struct e1000_hw	*hw = &adapter->hw;
4249	u64	bus_addr;
4250	u32	rctl, rxcsum;
4251
4252	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4253
4254	/*
4255	 * Make sure receives are disabled while setting
4256	 * up the descriptor ring
4257	 */
4258	rctl = E1000_READ_REG(hw, E1000_RCTL);
4259	/* Do not disable if ever enabled on this hardware */
4260	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4261		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4262
4263	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4264	    adapter->rx_abs_int_delay.value);
4265	/*
4266	 * Set the interrupt throttling rate. Value is calculated
4267	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4268	 */
4269	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4270
4271	/*
4272	** When using MSIX interrupts we need to throttle
4273	** using the EITR register (82574 only)
4274	*/
4275	if (hw->mac.type == e1000_82574) {
4276		for (int i = 0; i < 4; i++)
4277			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4278			    DEFAULT_ITR);
4279		/* Disable accelerated acknowledge */
4280		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4281	}
4282
4283	if (ifp->if_capenable & IFCAP_RXCSUM) {
4284		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4285		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4286		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4287	}
4288
4289	/*
4290	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4291	** long latencies are observed, like Lenovo X60. This
4292	** change eliminates the problem, but since having positive
4293	** values in RDTR is a known source of problems on other
4294	** platforms another solution is being sought.
4295	*/
4296	if (hw->mac.type == e1000_82573)
4297		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4298
4299	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4300		/* Setup the Base and Length of the Rx Descriptor Ring */
4301		bus_addr = rxr->rxdma.dma_paddr;
4302		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306		/* Setup the Head and Tail Descriptor Pointers */
4307		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308#ifdef DEV_NETMAP
4309		/*
4310		 * an init() while a netmap client is active must
4311		 * preserve the rx buffers passed to userspace.
4312		 * In this driver it means we adjust RDT to
4313		 * something different from na->num_rx_desc - 1.
4314		 */
4315		if (ifp->if_capenable & IFCAP_NETMAP) {
4316			struct netmap_adapter *na = NA(adapter->ifp);
4317			struct netmap_kring *kring = &na->rx_rings[i];
4318			int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4319
4320			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4321		} else
4322#endif /* DEV_NETMAP */
4323		E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4324	}
4325
4326	/* Set PTHRESH for improved jumbo performance */
4327	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4328	    (adapter->hw.mac.type == e1000_pch2lan) ||
4329	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4330	    (ifp->if_mtu > ETHERMTU)) {
4331		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4332		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4333	}
4334
4335	if (adapter->hw.mac.type == e1000_pch2lan) {
4336		if (ifp->if_mtu > ETHERMTU)
4337			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4338		else
4339			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4340	}
4341
4342	/* Setup the Receive Control Register */
4343	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4344	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4345	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4346	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4347
4348        /* Strip the CRC */
4349        rctl |= E1000_RCTL_SECRC;
4350
4351        /* Make sure VLAN Filters are off */
4352        rctl &= ~E1000_RCTL_VFE;
4353	rctl &= ~E1000_RCTL_SBP;
4354
4355	if (adapter->rx_mbuf_sz == MCLBYTES)
4356		rctl |= E1000_RCTL_SZ_2048;
4357	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4358		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4359	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4360		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4361
4362	if (ifp->if_mtu > ETHERMTU)
4363		rctl |= E1000_RCTL_LPE;
4364	else
4365		rctl &= ~E1000_RCTL_LPE;
4366
4367	/* Write out the settings */
4368	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4369
4370	return;
4371}
4372
4373
4374/*********************************************************************
4375 *
4376 *  This routine executes in interrupt context. It replenishes
4377 *  the mbufs in the descriptor and sends data which has been
4378 *  dma'ed into host memory to upper layer.
4379 *
4380 *  We loop at most count times if count is > 0, or until done if
4381 *  count < 0.
4382 *
4383 *  For polling we also now return the number of cleaned packets
4384 *********************************************************************/
4385static bool
4386em_rxeof(struct rx_ring *rxr, int count, int *done)
4387{
4388	struct adapter		*adapter = rxr->adapter;
4389	struct ifnet		*ifp = adapter->ifp;
4390	struct mbuf		*mp, *sendmp;
4391	u8			status = 0;
4392	u16 			len;
4393	int			i, processed, rxdone = 0;
4394	bool			eop;
4395	struct e1000_rx_desc	*cur;
4396
4397	EM_RX_LOCK(rxr);
4398
4399#ifdef DEV_NETMAP
4400	if (ifp->if_capenable & IFCAP_NETMAP) {
4401		struct netmap_adapter *na = NA(ifp);
4402
4403		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4404		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4405		EM_RX_UNLOCK(rxr);
4406		EM_CORE_LOCK(adapter);
4407		selwakeuppri(&na->rx_si, PI_NET);
4408		EM_CORE_UNLOCK(adapter);
4409		return (0);
4410	}
4411#endif /* DEV_NETMAP */
4412
4413	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4414
4415		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4416			break;
4417
4418		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4419		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4420
4421		cur = &rxr->rx_base[i];
4422		status = cur->status;
4423		mp = sendmp = NULL;
4424
4425		if ((status & E1000_RXD_STAT_DD) == 0)
4426			break;
4427
4428		len = le16toh(cur->length);
4429		eop = (status & E1000_RXD_STAT_EOP) != 0;
4430
4431		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4432		    (rxr->discard == TRUE)) {
4433			ifp->if_ierrors++;
4434			++rxr->rx_discarded;
4435			if (!eop) /* Catch subsequent segs */
4436				rxr->discard = TRUE;
4437			else
4438				rxr->discard = FALSE;
4439			em_rx_discard(rxr, i);
4440			goto next_desc;
4441		}
4442
4443		/* Assign correct length to the current fragment */
4444		mp = rxr->rx_buffers[i].m_head;
4445		mp->m_len = len;
4446
4447		/* Trigger for refresh */
4448		rxr->rx_buffers[i].m_head = NULL;
4449
4450		/* First segment? */
4451		if (rxr->fmp == NULL) {
4452			mp->m_pkthdr.len = len;
4453			rxr->fmp = rxr->lmp = mp;
4454		} else {
4455			/* Chain mbuf's together */
4456			mp->m_flags &= ~M_PKTHDR;
4457			rxr->lmp->m_next = mp;
4458			rxr->lmp = mp;
4459			rxr->fmp->m_pkthdr.len += len;
4460		}
4461
4462		if (eop) {
4463			--count;
4464			sendmp = rxr->fmp;
4465			sendmp->m_pkthdr.rcvif = ifp;
4466			ifp->if_ipackets++;
4467			em_receive_checksum(cur, sendmp);
4468#ifndef __NO_STRICT_ALIGNMENT
4469			if (adapter->max_frame_size >
4470			    (MCLBYTES - ETHER_ALIGN) &&
4471			    em_fixup_rx(rxr) != 0)
4472				goto skip;
4473#endif
4474			if (status & E1000_RXD_STAT_VP) {
4475				sendmp->m_pkthdr.ether_vtag =
4476				    le16toh(cur->special);
4477				sendmp->m_flags |= M_VLANTAG;
4478			}
4479#ifndef __NO_STRICT_ALIGNMENT
4480skip:
4481#endif
4482			rxr->fmp = rxr->lmp = NULL;
4483		}
4484next_desc:
4485		/* Zero out the receive descriptors status. */
4486		cur->status = 0;
4487		++rxdone;	/* cumulative for POLL */
4488		++processed;
4489
4490		/* Advance our pointers to the next descriptor. */
4491		if (++i == adapter->num_rx_desc)
4492			i = 0;
4493
4494		/* Send to the stack */
4495		if (sendmp != NULL) {
4496			rxr->next_to_check = i;
4497			EM_RX_UNLOCK(rxr);
4498			(*ifp->if_input)(ifp, sendmp);
4499			EM_RX_LOCK(rxr);
4500			i = rxr->next_to_check;
4501		}
4502
4503		/* Only refresh mbufs every 8 descriptors */
4504		if (processed == 8) {
4505			em_refresh_mbufs(rxr, i);
4506			processed = 0;
4507		}
4508	}
4509
4510	/* Catch any remaining refresh work */
4511	if (e1000_rx_unrefreshed(rxr))
4512		em_refresh_mbufs(rxr, i);
4513
4514	rxr->next_to_check = i;
4515	if (done != NULL)
4516		*done = rxdone;
4517	EM_RX_UNLOCK(rxr);
4518
4519	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4520}
4521
4522static __inline void
4523em_rx_discard(struct rx_ring *rxr, int i)
4524{
4525	struct em_buffer	*rbuf;
4526
4527	rbuf = &rxr->rx_buffers[i];
4528	/* Free any previous pieces */
4529	if (rxr->fmp != NULL) {
4530		rxr->fmp->m_flags |= M_PKTHDR;
4531		m_freem(rxr->fmp);
4532		rxr->fmp = NULL;
4533		rxr->lmp = NULL;
4534	}
4535	/*
4536	** Free buffer and allow em_refresh_mbufs()
4537	** to clean up and recharge buffer.
4538	*/
4539	if (rbuf->m_head) {
4540		m_free(rbuf->m_head);
4541		rbuf->m_head = NULL;
4542	}
4543	return;
4544}
4545
4546#ifndef __NO_STRICT_ALIGNMENT
4547/*
4548 * When jumbo frames are enabled we should realign entire payload on
4549 * architecures with strict alignment. This is serious design mistake of 8254x
4550 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4551 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4552 * payload. On architecures without strict alignment restrictions 8254x still
4553 * performs unaligned memory access which would reduce the performance too.
4554 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4555 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4556 * existing mbuf chain.
4557 *
4558 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4559 * not used at all on architectures with strict alignment.
4560 */
4561static int
4562em_fixup_rx(struct rx_ring *rxr)
4563{
4564	struct adapter *adapter = rxr->adapter;
4565	struct mbuf *m, *n;
4566	int error;
4567
4568	error = 0;
4569	m = rxr->fmp;
4570	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4571		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4572		m->m_data += ETHER_HDR_LEN;
4573	} else {
4574		MGETHDR(n, M_DONTWAIT, MT_DATA);
4575		if (n != NULL) {
4576			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4577			m->m_data += ETHER_HDR_LEN;
4578			m->m_len -= ETHER_HDR_LEN;
4579			n->m_len = ETHER_HDR_LEN;
4580			M_MOVE_PKTHDR(n, m);
4581			n->m_next = m;
4582			rxr->fmp = n;
4583		} else {
4584			adapter->dropped_pkts++;
4585			m_freem(rxr->fmp);
4586			rxr->fmp = NULL;
4587			error = ENOMEM;
4588		}
4589	}
4590
4591	return (error);
4592}
4593#endif
4594
4595/*********************************************************************
4596 *
4597 *  Verify that the hardware indicated that the checksum is valid.
4598 *  Inform the stack about the status of checksum so that stack
4599 *  doesn't spend time verifying the checksum.
4600 *
4601 *********************************************************************/
4602static void
4603em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4604{
4605	/* Ignore Checksum bit is set */
4606	if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4607		mp->m_pkthdr.csum_flags = 0;
4608		return;
4609	}
4610
4611	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4612		/* Did it pass? */
4613		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4614			/* IP Checksum Good */
4615			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4616			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4617
4618		} else {
4619			mp->m_pkthdr.csum_flags = 0;
4620		}
4621	}
4622
4623	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4624		/* Did it pass? */
4625		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4626			mp->m_pkthdr.csum_flags |=
4627			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4628			mp->m_pkthdr.csum_data = htons(0xffff);
4629		}
4630	}
4631}
4632
4633/*
4634 * This routine is run via an vlan
4635 * config EVENT
4636 */
4637static void
4638em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4639{
4640	struct adapter	*adapter = ifp->if_softc;
4641	u32		index, bit;
4642
4643	if (ifp->if_softc !=  arg)   /* Not our event */
4644		return;
4645
4646	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4647                return;
4648
4649	EM_CORE_LOCK(adapter);
4650	index = (vtag >> 5) & 0x7F;
4651	bit = vtag & 0x1F;
4652	adapter->shadow_vfta[index] |= (1 << bit);
4653	++adapter->num_vlans;
4654	/* Re-init to load the changes */
4655	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4656		em_init_locked(adapter);
4657	EM_CORE_UNLOCK(adapter);
4658}
4659
4660/*
4661 * This routine is run via an vlan
4662 * unconfig EVENT
4663 */
4664static void
4665em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4666{
4667	struct adapter	*adapter = ifp->if_softc;
4668	u32		index, bit;
4669
4670	if (ifp->if_softc !=  arg)
4671		return;
4672
4673	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4674                return;
4675
4676	EM_CORE_LOCK(adapter);
4677	index = (vtag >> 5) & 0x7F;
4678	bit = vtag & 0x1F;
4679	adapter->shadow_vfta[index] &= ~(1 << bit);
4680	--adapter->num_vlans;
4681	/* Re-init to load the changes */
4682	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4683		em_init_locked(adapter);
4684	EM_CORE_UNLOCK(adapter);
4685}
4686
4687static void
4688em_setup_vlan_hw_support(struct adapter *adapter)
4689{
4690	struct e1000_hw *hw = &adapter->hw;
4691	u32             reg;
4692
4693	/*
4694	** We get here thru init_locked, meaning
4695	** a soft reset, this has already cleared
4696	** the VFTA and other state, so if there
4697	** have been no vlan's registered do nothing.
4698	*/
4699	if (adapter->num_vlans == 0)
4700                return;
4701
4702	/*
4703	** A soft reset zero's out the VFTA, so
4704	** we need to repopulate it now.
4705	*/
4706	for (int i = 0; i < EM_VFTA_SIZE; i++)
4707                if (adapter->shadow_vfta[i] != 0)
4708			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4709                            i, adapter->shadow_vfta[i]);
4710
4711	reg = E1000_READ_REG(hw, E1000_CTRL);
4712	reg |= E1000_CTRL_VME;
4713	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4714
4715	/* Enable the Filter Table */
4716	reg = E1000_READ_REG(hw, E1000_RCTL);
4717	reg &= ~E1000_RCTL_CFIEN;
4718	reg |= E1000_RCTL_VFE;
4719	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4720}
4721
4722static void
4723em_enable_intr(struct adapter *adapter)
4724{
4725	struct e1000_hw *hw = &adapter->hw;
4726	u32 ims_mask = IMS_ENABLE_MASK;
4727
4728	if (hw->mac.type == e1000_82574) {
4729		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4730		ims_mask |= EM_MSIX_MASK;
4731	}
4732	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4733}
4734
4735static void
4736em_disable_intr(struct adapter *adapter)
4737{
4738	struct e1000_hw *hw = &adapter->hw;
4739
4740	if (hw->mac.type == e1000_82574)
4741		E1000_WRITE_REG(hw, EM_EIAC, 0);
4742	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4743}
4744
4745/*
4746 * Bit of a misnomer, what this really means is
4747 * to enable OS management of the system... aka
4748 * to disable special hardware management features
4749 */
4750static void
4751em_init_manageability(struct adapter *adapter)
4752{
4753	/* A shared code workaround */
4754#define E1000_82542_MANC2H E1000_MANC2H
4755	if (adapter->has_manage) {
4756		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4757		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4758
4759		/* disable hardware interception of ARP */
4760		manc &= ~(E1000_MANC_ARP_EN);
4761
4762                /* enable receiving management packets to the host */
4763		manc |= E1000_MANC_EN_MNG2HOST;
4764#define E1000_MNG2HOST_PORT_623 (1 << 5)
4765#define E1000_MNG2HOST_PORT_664 (1 << 6)
4766		manc2h |= E1000_MNG2HOST_PORT_623;
4767		manc2h |= E1000_MNG2HOST_PORT_664;
4768		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4769		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4770	}
4771}
4772
4773/*
4774 * Give control back to hardware management
4775 * controller if there is one.
4776 */
4777static void
4778em_release_manageability(struct adapter *adapter)
4779{
4780	if (adapter->has_manage) {
4781		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4782
4783		/* re-enable hardware interception of ARP */
4784		manc |= E1000_MANC_ARP_EN;
4785		manc &= ~E1000_MANC_EN_MNG2HOST;
4786
4787		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788	}
4789}
4790
4791/*
4792 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4793 * For ASF and Pass Through versions of f/w this means
4794 * that the driver is loaded. For AMT version type f/w
4795 * this means that the network i/f is open.
4796 */
4797static void
4798em_get_hw_control(struct adapter *adapter)
4799{
4800	u32 ctrl_ext, swsm;
4801
4802	if (adapter->hw.mac.type == e1000_82573) {
4803		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4804		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4805		    swsm | E1000_SWSM_DRV_LOAD);
4806		return;
4807	}
4808	/* else */
4809	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4810	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4811	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4812	return;
4813}
4814
4815/*
4816 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4817 * For ASF and Pass Through versions of f/w this means that
4818 * the driver is no longer loaded. For AMT versions of the
4819 * f/w this means that the network i/f is closed.
4820 */
4821static void
4822em_release_hw_control(struct adapter *adapter)
4823{
4824	u32 ctrl_ext, swsm;
4825
4826	if (!adapter->has_manage)
4827		return;
4828
4829	if (adapter->hw.mac.type == e1000_82573) {
4830		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4831		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4832		    swsm & ~E1000_SWSM_DRV_LOAD);
4833		return;
4834	}
4835	/* else */
4836	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4837	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4838	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4839	return;
4840}
4841
4842static int
4843em_is_valid_ether_addr(u8 *addr)
4844{
4845	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4846
4847	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4848		return (FALSE);
4849	}
4850
4851	return (TRUE);
4852}
4853
4854/*
4855** Parse the interface capabilities with regard
4856** to both system management and wake-on-lan for
4857** later use.
4858*/
4859static void
4860em_get_wakeup(device_t dev)
4861{
4862	struct adapter	*adapter = device_get_softc(dev);
4863	u16		eeprom_data = 0, device_id, apme_mask;
4864
4865	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4866	apme_mask = EM_EEPROM_APME;
4867
4868	switch (adapter->hw.mac.type) {
4869	case e1000_82573:
4870	case e1000_82583:
4871		adapter->has_amt = TRUE;
4872		/* Falls thru */
4873	case e1000_82571:
4874	case e1000_82572:
4875	case e1000_80003es2lan:
4876		if (adapter->hw.bus.func == 1) {
4877			e1000_read_nvm(&adapter->hw,
4878			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4879			break;
4880		} else
4881			e1000_read_nvm(&adapter->hw,
4882			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4883		break;
4884	case e1000_ich8lan:
4885	case e1000_ich9lan:
4886	case e1000_ich10lan:
4887	case e1000_pchlan:
4888	case e1000_pch2lan:
4889		apme_mask = E1000_WUC_APME;
4890		adapter->has_amt = TRUE;
4891		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4892		break;
4893	default:
4894		e1000_read_nvm(&adapter->hw,
4895		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4896		break;
4897	}
4898	if (eeprom_data & apme_mask)
4899		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4900	/*
4901         * We have the eeprom settings, now apply the special cases
4902         * where the eeprom may be wrong or the board won't support
4903         * wake on lan on a particular port
4904	 */
4905	device_id = pci_get_device(dev);
4906        switch (device_id) {
4907	case E1000_DEV_ID_82571EB_FIBER:
4908		/* Wake events only supported on port A for dual fiber
4909		 * regardless of eeprom setting */
4910		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4911		    E1000_STATUS_FUNC_1)
4912			adapter->wol = 0;
4913		break;
4914	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4915	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4916	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4917                /* if quad port adapter, disable WoL on all but port A */
4918		if (global_quad_port_a != 0)
4919			adapter->wol = 0;
4920		/* Reset for multiple quad port adapters */
4921		if (++global_quad_port_a == 4)
4922			global_quad_port_a = 0;
4923                break;
4924	}
4925	return;
4926}
4927
4928
4929/*
4930 * Enable PCI Wake On Lan capability
4931 */
4932static void
4933em_enable_wakeup(device_t dev)
4934{
4935	struct adapter	*adapter = device_get_softc(dev);
4936	struct ifnet	*ifp = adapter->ifp;
4937	u32		pmc, ctrl, ctrl_ext, rctl;
4938	u16     	status;
4939
4940	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4941		return;
4942
4943	/* Advertise the wakeup capability */
4944	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4945	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4946	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4947	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4948
4949	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4950	    (adapter->hw.mac.type == e1000_pchlan) ||
4951	    (adapter->hw.mac.type == e1000_ich9lan) ||
4952	    (adapter->hw.mac.type == e1000_ich10lan))
4953		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4954
4955	/* Keep the laser running on Fiber adapters */
4956	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4957	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4958		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4959		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4960		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4961	}
4962
4963	/*
4964	** Determine type of Wakeup: note that wol
4965	** is set with all bits on by default.
4966	*/
4967	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4968		adapter->wol &= ~E1000_WUFC_MAG;
4969
4970	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4971		adapter->wol &= ~E1000_WUFC_MC;
4972	else {
4973		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4974		rctl |= E1000_RCTL_MPE;
4975		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4976	}
4977
4978	if ((adapter->hw.mac.type == e1000_pchlan) ||
4979	    (adapter->hw.mac.type == e1000_pch2lan)) {
4980		if (em_enable_phy_wakeup(adapter))
4981			return;
4982	} else {
4983		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4984		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4985	}
4986
4987	if (adapter->hw.phy.type == e1000_phy_igp_3)
4988		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4989
4990        /* Request PME */
4991        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4992	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4993	if (ifp->if_capenable & IFCAP_WOL)
4994		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4995        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4996
4997	return;
4998}
4999
5000/*
5001** WOL in the newer chipset interfaces (pchlan)
5002** require thing to be copied into the phy
5003*/
5004static int
5005em_enable_phy_wakeup(struct adapter *adapter)
5006{
5007	struct e1000_hw *hw = &adapter->hw;
5008	u32 mreg, ret = 0;
5009	u16 preg;
5010
5011	/* copy MAC RARs to PHY RARs */
5012	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5013
5014	/* copy MAC MTA to PHY MTA */
5015	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5016		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5017		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5018		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5019		    (u16)((mreg >> 16) & 0xFFFF));
5020	}
5021
5022	/* configure PHY Rx Control register */
5023	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5024	mreg = E1000_READ_REG(hw, E1000_RCTL);
5025	if (mreg & E1000_RCTL_UPE)
5026		preg |= BM_RCTL_UPE;
5027	if (mreg & E1000_RCTL_MPE)
5028		preg |= BM_RCTL_MPE;
5029	preg &= ~(BM_RCTL_MO_MASK);
5030	if (mreg & E1000_RCTL_MO_3)
5031		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5032				<< BM_RCTL_MO_SHIFT);
5033	if (mreg & E1000_RCTL_BAM)
5034		preg |= BM_RCTL_BAM;
5035	if (mreg & E1000_RCTL_PMCF)
5036		preg |= BM_RCTL_PMCF;
5037	mreg = E1000_READ_REG(hw, E1000_CTRL);
5038	if (mreg & E1000_CTRL_RFCE)
5039		preg |= BM_RCTL_RFCE;
5040	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5041
5042	/* enable PHY wakeup in MAC register */
5043	E1000_WRITE_REG(hw, E1000_WUC,
5044	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5045	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5046
5047	/* configure and enable PHY wakeup in PHY registers */
5048	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5049	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5050
5051	/* activate PHY wakeup */
5052	ret = hw->phy.ops.acquire(hw);
5053	if (ret) {
5054		printf("Could not acquire PHY\n");
5055		return ret;
5056	}
5057	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5058	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5059	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5060	if (ret) {
5061		printf("Could not read PHY page 769\n");
5062		goto out;
5063	}
5064	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5065	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5066	if (ret)
5067		printf("Could not set PHY Host Wakeup bit\n");
5068out:
5069	hw->phy.ops.release(hw);
5070
5071	return ret;
5072}
5073
5074static void
5075em_led_func(void *arg, int onoff)
5076{
5077	struct adapter	*adapter = arg;
5078
5079	EM_CORE_LOCK(adapter);
5080	if (onoff) {
5081		e1000_setup_led(&adapter->hw);
5082		e1000_led_on(&adapter->hw);
5083	} else {
5084		e1000_led_off(&adapter->hw);
5085		e1000_cleanup_led(&adapter->hw);
5086	}
5087	EM_CORE_UNLOCK(adapter);
5088}
5089
5090/*
5091** Disable the L0S and L1 LINK states
5092*/
5093static void
5094em_disable_aspm(struct adapter *adapter)
5095{
5096	int		base, reg;
5097	u16		link_cap,link_ctrl;
5098	device_t	dev = adapter->dev;
5099
5100	switch (adapter->hw.mac.type) {
5101		case e1000_82573:
5102		case e1000_82574:
5103		case e1000_82583:
5104			break;
5105		default:
5106			return;
5107	}
5108	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5109		return;
5110	reg = base + PCIR_EXPRESS_LINK_CAP;
5111	link_cap = pci_read_config(dev, reg, 2);
5112	if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5113		return;
5114	reg = base + PCIR_EXPRESS_LINK_CTL;
5115	link_ctrl = pci_read_config(dev, reg, 2);
5116	link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5117	pci_write_config(dev, reg, link_ctrl, 2);
5118	return;
5119}
5120
5121/**********************************************************************
5122 *
5123 *  Update the board statistics counters.
5124 *
5125 **********************************************************************/
5126static void
5127em_update_stats_counters(struct adapter *adapter)
5128{
5129	struct ifnet   *ifp;
5130
5131	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5132	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5133		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5134		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5135	}
5136	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5137	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5138	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5139	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5140
5141	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5142	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5143	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5144	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5145	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5146	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5147	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5148	/*
5149	** For watchdog management we need to know if we have been
5150	** paused during the last interval, so capture that here.
5151	*/
5152	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5153	adapter->stats.xoffrxc += adapter->pause_frames;
5154	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5155	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5156	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5157	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5158	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5159	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5160	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5161	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5162	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5163	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5164	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5165	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5166
5167	/* For the 64-bit byte counters the low dword must be read first. */
5168	/* Both registers clear on the read of the high dword */
5169
5170	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5171	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5172	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5173	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5174
5175	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5176	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5177	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5178	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5179	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5180
5181	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5182	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5183
5184	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5185	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5186	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5187	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5188	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5189	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5190	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5191	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5192	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5193	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5194
5195	/* Interrupt Counts */
5196
5197	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5198	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5199	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5200	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5201	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5202	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5203	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5204	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5205	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5206
5207	if (adapter->hw.mac.type >= e1000_82543) {
5208		adapter->stats.algnerrc +=
5209		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5210		adapter->stats.rxerrc +=
5211		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5212		adapter->stats.tncrs +=
5213		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5214		adapter->stats.cexterr +=
5215		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5216		adapter->stats.tsctc +=
5217		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5218		adapter->stats.tsctfc +=
5219		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5220	}
5221	ifp = adapter->ifp;
5222
5223	ifp->if_collisions = adapter->stats.colc;
5224
5225	/* Rx Errors */
5226	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5227	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5228	    adapter->stats.ruc + adapter->stats.roc +
5229	    adapter->stats.mpc + adapter->stats.cexterr;
5230
5231	/* Tx Errors */
5232	ifp->if_oerrors = adapter->stats.ecol +
5233	    adapter->stats.latecol + adapter->watchdog_events;
5234}
5235
5236/* Export a single 32-bit register via a read-only sysctl. */
5237static int
5238em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5239{
5240	struct adapter *adapter;
5241	u_int val;
5242
5243	adapter = oidp->oid_arg1;
5244	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5245	return (sysctl_handle_int(oidp, &val, 0, req));
5246}
5247
5248/*
5249 * Add sysctl variables, one per statistic, to the system.
5250 */
5251static void
5252em_add_hw_stats(struct adapter *adapter)
5253{
5254	device_t dev = adapter->dev;
5255
5256	struct tx_ring *txr = adapter->tx_rings;
5257	struct rx_ring *rxr = adapter->rx_rings;
5258
5259	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5260	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5261	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5262	struct e1000_hw_stats *stats = &adapter->stats;
5263
5264	struct sysctl_oid *stat_node, *queue_node, *int_node;
5265	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5266
5267#define QUEUE_NAME_LEN 32
5268	char namebuf[QUEUE_NAME_LEN];
5269
5270	/* Driver Statistics */
5271	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5272			CTLFLAG_RD, &adapter->link_irq,
5273			"Link MSIX IRQ Handled");
5274	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5275			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5276			 "Std mbuf failed");
5277	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5278			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5279			 "Std mbuf cluster failed");
5280	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5281			CTLFLAG_RD, &adapter->dropped_pkts,
5282			"Driver dropped packets");
5283	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5284			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5285			"Driver tx dma failure in xmit");
5286	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5287			CTLFLAG_RD, &adapter->rx_overruns,
5288			"RX overruns");
5289	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5290			CTLFLAG_RD, &adapter->watchdog_events,
5291			"Watchdog timeouts");
5292
5293	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5294			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5295			em_sysctl_reg_handler, "IU",
5296			"Device Control Register");
5297	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5298			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5299			em_sysctl_reg_handler, "IU",
5300			"Receiver Control Register");
5301	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5302			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5303			"Flow Control High Watermark");
5304	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5305			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5306			"Flow Control Low Watermark");
5307
5308	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5309		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5310		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5311					    CTLFLAG_RD, NULL, "Queue Name");
5312		queue_list = SYSCTL_CHILDREN(queue_node);
5313
5314		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5315				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316				E1000_TDH(txr->me),
5317				em_sysctl_reg_handler, "IU",
5318 				"Transmit Descriptor Head");
5319		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5320				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5321				E1000_TDT(txr->me),
5322				em_sysctl_reg_handler, "IU",
5323 				"Transmit Descriptor Tail");
5324		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5325				CTLFLAG_RD, &txr->tx_irq,
5326				"Queue MSI-X Transmit Interrupts");
5327		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5328				CTLFLAG_RD, &txr->no_desc_avail,
5329				"Queue No Descriptor Available");
5330
5331		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5332				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333				E1000_RDH(rxr->me),
5334				em_sysctl_reg_handler, "IU",
5335				"Receive Descriptor Head");
5336		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5337				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338				E1000_RDT(rxr->me),
5339				em_sysctl_reg_handler, "IU",
5340				"Receive Descriptor Tail");
5341		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5342				CTLFLAG_RD, &rxr->rx_irq,
5343				"Queue MSI-X Receive Interrupts");
5344	}
5345
5346	/* MAC stats get their own sub node */
5347
5348	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5349				    CTLFLAG_RD, NULL, "Statistics");
5350	stat_list = SYSCTL_CHILDREN(stat_node);
5351
5352	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5353			CTLFLAG_RD, &stats->ecol,
5354			"Excessive collisions");
5355	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5356			CTLFLAG_RD, &stats->scc,
5357			"Single collisions");
5358	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5359			CTLFLAG_RD, &stats->mcc,
5360			"Multiple collisions");
5361	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5362			CTLFLAG_RD, &stats->latecol,
5363			"Late collisions");
5364	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5365			CTLFLAG_RD, &stats->colc,
5366			"Collision Count");
5367	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5368			CTLFLAG_RD, &adapter->stats.symerrs,
5369			"Symbol Errors");
5370	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5371			CTLFLAG_RD, &adapter->stats.sec,
5372			"Sequence Errors");
5373	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5374			CTLFLAG_RD, &adapter->stats.dc,
5375			"Defer Count");
5376	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5377			CTLFLAG_RD, &adapter->stats.mpc,
5378			"Missed Packets");
5379	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5380			CTLFLAG_RD, &adapter->stats.rnbc,
5381			"Receive No Buffers");
5382	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5383			CTLFLAG_RD, &adapter->stats.ruc,
5384			"Receive Undersize");
5385	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5386			CTLFLAG_RD, &adapter->stats.rfc,
5387			"Fragmented Packets Received ");
5388	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5389			CTLFLAG_RD, &adapter->stats.roc,
5390			"Oversized Packets Received");
5391	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5392			CTLFLAG_RD, &adapter->stats.rjc,
5393			"Recevied Jabber");
5394	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5395			CTLFLAG_RD, &adapter->stats.rxerrc,
5396			"Receive Errors");
5397	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5398			CTLFLAG_RD, &adapter->stats.crcerrs,
5399			"CRC errors");
5400	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5401			CTLFLAG_RD, &adapter->stats.algnerrc,
5402			"Alignment Errors");
5403	/* On 82575 these are collision counts */
5404	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5405			CTLFLAG_RD, &adapter->stats.cexterr,
5406			"Collision/Carrier extension errors");
5407	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5408			CTLFLAG_RD, &adapter->stats.xonrxc,
5409			"XON Received");
5410	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5411			CTLFLAG_RD, &adapter->stats.xontxc,
5412			"XON Transmitted");
5413	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5414			CTLFLAG_RD, &adapter->stats.xoffrxc,
5415			"XOFF Received");
5416	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5417			CTLFLAG_RD, &adapter->stats.xofftxc,
5418			"XOFF Transmitted");
5419
5420	/* Packet Reception Stats */
5421	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5422			CTLFLAG_RD, &adapter->stats.tpr,
5423			"Total Packets Received ");
5424	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5425			CTLFLAG_RD, &adapter->stats.gprc,
5426			"Good Packets Received");
5427	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5428			CTLFLAG_RD, &adapter->stats.bprc,
5429			"Broadcast Packets Received");
5430	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5431			CTLFLAG_RD, &adapter->stats.mprc,
5432			"Multicast Packets Received");
5433	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5434			CTLFLAG_RD, &adapter->stats.prc64,
5435			"64 byte frames received ");
5436	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5437			CTLFLAG_RD, &adapter->stats.prc127,
5438			"65-127 byte frames received");
5439	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5440			CTLFLAG_RD, &adapter->stats.prc255,
5441			"128-255 byte frames received");
5442	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5443			CTLFLAG_RD, &adapter->stats.prc511,
5444			"256-511 byte frames received");
5445	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5446			CTLFLAG_RD, &adapter->stats.prc1023,
5447			"512-1023 byte frames received");
5448	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5449			CTLFLAG_RD, &adapter->stats.prc1522,
5450			"1023-1522 byte frames received");
5451 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5452 			CTLFLAG_RD, &adapter->stats.gorc,
5453 			"Good Octets Received");
5454
5455	/* Packet Transmission Stats */
5456 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5457 			CTLFLAG_RD, &adapter->stats.gotc,
5458 			"Good Octets Transmitted");
5459	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5460			CTLFLAG_RD, &adapter->stats.tpt,
5461			"Total Packets Transmitted");
5462	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5463			CTLFLAG_RD, &adapter->stats.gptc,
5464			"Good Packets Transmitted");
5465	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5466			CTLFLAG_RD, &adapter->stats.bptc,
5467			"Broadcast Packets Transmitted");
5468	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5469			CTLFLAG_RD, &adapter->stats.mptc,
5470			"Multicast Packets Transmitted");
5471	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5472			CTLFLAG_RD, &adapter->stats.ptc64,
5473			"64 byte frames transmitted ");
5474	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5475			CTLFLAG_RD, &adapter->stats.ptc127,
5476			"65-127 byte frames transmitted");
5477	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5478			CTLFLAG_RD, &adapter->stats.ptc255,
5479			"128-255 byte frames transmitted");
5480	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5481			CTLFLAG_RD, &adapter->stats.ptc511,
5482			"256-511 byte frames transmitted");
5483	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5484			CTLFLAG_RD, &adapter->stats.ptc1023,
5485			"512-1023 byte frames transmitted");
5486	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5487			CTLFLAG_RD, &adapter->stats.ptc1522,
5488			"1024-1522 byte frames transmitted");
5489	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5490			CTLFLAG_RD, &adapter->stats.tsctc,
5491			"TSO Contexts Transmitted");
5492	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5493			CTLFLAG_RD, &adapter->stats.tsctfc,
5494			"TSO Contexts Failed");
5495
5496
5497	/* Interrupt Stats */
5498
5499	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5500				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5501	int_list = SYSCTL_CHILDREN(int_node);
5502
5503	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5504			CTLFLAG_RD, &adapter->stats.iac,
5505			"Interrupt Assertion Count");
5506
5507	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5508			CTLFLAG_RD, &adapter->stats.icrxptc,
5509			"Interrupt Cause Rx Pkt Timer Expire Count");
5510
5511	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5512			CTLFLAG_RD, &adapter->stats.icrxatc,
5513			"Interrupt Cause Rx Abs Timer Expire Count");
5514
5515	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5516			CTLFLAG_RD, &adapter->stats.ictxptc,
5517			"Interrupt Cause Tx Pkt Timer Expire Count");
5518
5519	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5520			CTLFLAG_RD, &adapter->stats.ictxatc,
5521			"Interrupt Cause Tx Abs Timer Expire Count");
5522
5523	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5524			CTLFLAG_RD, &adapter->stats.ictxqec,
5525			"Interrupt Cause Tx Queue Empty Count");
5526
5527	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5528			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5529			"Interrupt Cause Tx Queue Min Thresh Count");
5530
5531	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5532			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5533			"Interrupt Cause Rx Desc Min Thresh Count");
5534
5535	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5536			CTLFLAG_RD, &adapter->stats.icrxoc,
5537			"Interrupt Cause Receiver Overrun Count");
5538}
5539
5540/**********************************************************************
5541 *
5542 *  This routine provides a way to dump out the adapter eeprom,
5543 *  often a useful debug/service tool. This only dumps the first
5544 *  32 words, stuff that matters is in that extent.
5545 *
5546 **********************************************************************/
5547static int
5548em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5549{
5550	struct adapter *adapter = (struct adapter *)arg1;
5551	int error;
5552	int result;
5553
5554	result = -1;
5555	error = sysctl_handle_int(oidp, &result, 0, req);
5556
5557	if (error || !req->newptr)
5558		return (error);
5559
5560	/*
5561	 * This value will cause a hex dump of the
5562	 * first 32 16-bit words of the EEPROM to
5563	 * the screen.
5564	 */
5565	if (result == 1)
5566		em_print_nvm_info(adapter);
5567
5568	return (error);
5569}
5570
5571static void
5572em_print_nvm_info(struct adapter *adapter)
5573{
5574	u16	eeprom_data;
5575	int	i, j, row = 0;
5576
5577	/* Its a bit crude, but it gets the job done */
5578	printf("\nInterface EEPROM Dump:\n");
5579	printf("Offset\n0x0000  ");
5580	for (i = 0, j = 0; i < 32; i++, j++) {
5581		if (j == 8) { /* Make the offset block */
5582			j = 0; ++row;
5583			printf("\n0x00%x0  ",row);
5584		}
5585		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5586		printf("%04x ", eeprom_data);
5587	}
5588	printf("\n");
5589}
5590
5591static int
5592em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5593{
5594	struct em_int_delay_info *info;
5595	struct adapter *adapter;
5596	u32 regval;
5597	int error, usecs, ticks;
5598
5599	info = (struct em_int_delay_info *)arg1;
5600	usecs = info->value;
5601	error = sysctl_handle_int(oidp, &usecs, 0, req);
5602	if (error != 0 || req->newptr == NULL)
5603		return (error);
5604	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5605		return (EINVAL);
5606	info->value = usecs;
5607	ticks = EM_USECS_TO_TICKS(usecs);
5608
5609	adapter = info->adapter;
5610
5611	EM_CORE_LOCK(adapter);
5612	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5613	regval = (regval & ~0xffff) | (ticks & 0xffff);
5614	/* Handle a few special cases. */
5615	switch (info->offset) {
5616	case E1000_RDTR:
5617		break;
5618	case E1000_TIDV:
5619		if (ticks == 0) {
5620			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5621			/* Don't write 0 into the TIDV register. */
5622			regval++;
5623		} else
5624			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5625		break;
5626	}
5627	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5628	EM_CORE_UNLOCK(adapter);
5629	return (0);
5630}
5631
5632static void
5633em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5634	const char *description, struct em_int_delay_info *info,
5635	int offset, int value)
5636{
5637	info->adapter = adapter;
5638	info->offset = offset;
5639	info->value = value;
5640	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5641	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5642	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5643	    info, 0, em_sysctl_int_delay, "I", description);
5644}
5645
5646static void
5647em_set_sysctl_value(struct adapter *adapter, const char *name,
5648	const char *description, int *limit, int value)
5649{
5650	*limit = value;
5651	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5652	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5653	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5654}
5655
5656
5657/*
5658** Set flow control using sysctl:
5659** Flow control values:
5660**      0 - off
5661**      1 - rx pause
5662**      2 - tx pause
5663**      3 - full
5664*/
5665static int
5666em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5667{
5668        int		error;
5669	static int	input = 3; /* default is full */
5670        struct adapter	*adapter = (struct adapter *) arg1;
5671
5672        error = sysctl_handle_int(oidp, &input, 0, req);
5673
5674        if ((error) || (req->newptr == NULL))
5675                return (error);
5676
5677	if (input == adapter->fc) /* no change? */
5678		return (error);
5679
5680        switch (input) {
5681                case e1000_fc_rx_pause:
5682                case e1000_fc_tx_pause:
5683                case e1000_fc_full:
5684                case e1000_fc_none:
5685                        adapter->hw.fc.requested_mode = input;
5686			adapter->fc = input;
5687                        break;
5688                default:
5689			/* Do nothing */
5690			return (error);
5691        }
5692
5693        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5694        e1000_force_mac_fc(&adapter->hw);
5695        return (error);
5696}
5697
5698
5699static int
5700em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5701{
5702	struct adapter *adapter;
5703	int error;
5704	int result;
5705
5706	result = -1;
5707	error = sysctl_handle_int(oidp, &result, 0, req);
5708
5709	if (error || !req->newptr)
5710		return (error);
5711
5712	if (result == 1) {
5713		adapter = (struct adapter *)arg1;
5714		em_print_debug_info(adapter);
5715        }
5716
5717	return (error);
5718}
5719
5720/*
5721** This routine is meant to be fluid, add whatever is
5722** needed for debugging a problem.  -jfv
5723*/
5724static void
5725em_print_debug_info(struct adapter *adapter)
5726{
5727	device_t dev = adapter->dev;
5728	struct tx_ring *txr = adapter->tx_rings;
5729	struct rx_ring *rxr = adapter->rx_rings;
5730
5731	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5732		printf("Interface is RUNNING ");
5733	else
5734		printf("Interface is NOT RUNNING\n");
5735
5736	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5737		printf("and INACTIVE\n");
5738	else
5739		printf("and ACTIVE\n");
5740
5741	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5742	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5743	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5744	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5745	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5746	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5747	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5748	device_printf(dev, "TX descriptors avail = %d\n",
5749	    txr->tx_avail);
5750	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5751	    txr->no_desc_avail);
5752	device_printf(dev, "RX discarded packets = %ld\n",
5753	    rxr->rx_discarded);
5754	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5755	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5756}
5757