if_em.c revision 257768
1184610Salfred/******************************************************************************
2184610Salfred
3184610Salfred  Copyright (c) 2001-2013, Intel Corporation
4184610Salfred  All rights reserved.
5184610Salfred
6184610Salfred  Redistribution and use in source and binary forms, with or without
7184610Salfred  modification, are permitted provided that the following conditions are met:
8184610Salfred
9184610Salfred   1. Redistributions of source code must retain the above copyright notice,
10184610Salfred      this list of conditions and the following disclaimer.
11184610Salfred
12184610Salfred   2. Redistributions in binary form must reproduce the above copyright
13184610Salfred      notice, this list of conditions and the following disclaimer in the
14184610Salfred      documentation and/or other materials provided with the distribution.
15184610Salfred
16184610Salfred   3. Neither the name of the Intel Corporation nor the names of its
17184610Salfred      contributors may be used to endorse or promote products derived from
18184610Salfred      this software without specific prior written permission.
19184610Salfred
20184610Salfred  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21184610Salfred  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22184610Salfred  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23184610Salfred  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24184610Salfred  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25184610Salfred  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26184610Salfred  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27184610Salfred  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28184610Salfred  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29184610Salfred  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30193640Sariff  POSSIBILITY OF SUCH DAMAGE.
31193640Sariff
32193640Sariff******************************************************************************/
33193640Sariff/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 257768 2013-11-06 22:36:36Z luigi $*/
34184610Salfred
35184610Salfred#include "opt_inet.h"
36188957Sthompsa#include "opt_inet6.h"
37184610Salfred
38184610Salfred#ifdef HAVE_KERNEL_OPTION_HEADERS
39184610Salfred#include "opt_device_polling.h"
40184610Salfred#endif
41184610Salfred
42184610Salfred#include <sys/param.h>
43184610Salfred#include <sys/systm.h>
44184610Salfred#if __FreeBSD_version >= 800000
45184610Salfred#include <sys/buf_ring.h>
46184610Salfred#endif
47184610Salfred#include <sys/bus.h>
48184610Salfred#include <sys/endian.h>
49184610Salfred#include <sys/kernel.h>
50184610Salfred#include <sys/kthread.h>
51184610Salfred#include <sys/malloc.h>
52184610Salfred#include <sys/mbuf.h>
53184610Salfred#include <sys/module.h>
54184610Salfred#include <sys/rman.h>
55184610Salfred#include <sys/socket.h>
56184610Salfred#include <sys/sockio.h>
57184610Salfred#include <sys/sysctl.h>
58184610Salfred#include <sys/taskqueue.h>
59184610Salfred#include <sys/eventhandler.h>
60184610Salfred#include <machine/bus.h>
61184610Salfred#include <machine/resource.h>
62184610Salfred
63193640Sariff#include <net/bpf.h>
64184610Salfred#include <net/ethernet.h>
65184610Salfred#include <net/if.h>
66184610Salfred#include <net/if_arp.h>
67184610Salfred#include <net/if_dl.h>
68184610Salfred#include <net/if_media.h>
69193640Sariff
70184610Salfred#include <net/if_types.h>
71184610Salfred#include <net/if_vlan_var.h>
72184610Salfred
73184610Salfred#include <netinet/in_systm.h>
74184610Salfred#include <netinet/in.h>
75184610Salfred#include <netinet/if_ether.h>
76184610Salfred#include <netinet/ip.h>
77184610Salfred#include <netinet/ip6.h>
78184610Salfred#include <netinet/tcp.h>
79184610Salfred#include <netinet/udp.h>
80184610Salfred
81184610Salfred#include <machine/in_cksum.h>
82184610Salfred#include <dev/led/led.h>
83184610Salfred#include <dev/pci/pcivar.h>
84280322Shselasky#include <dev/pci/pcireg.h>
85280322Shselasky
86280322Shselasky#include "e1000_api.h"
87280322Shselasky#include "e1000_82571.h"
88280322Shselasky#include "if_em.h"
89280322Shselasky
90184610Salfred/*********************************************************************
91280322Shselasky *  Set this to one to display debug statistics
92184610Salfred *********************************************************************/
93184610Salfredint	em_display_debug_stats = 0;
94193640Sariff
95184610Salfred/*********************************************************************
96184610Salfred *  Driver version:
97184610Salfred *********************************************************************/
98184610Salfredchar em_driver_version[] = "7.3.8";
99184610Salfred
100184610Salfred/*********************************************************************
101184610Salfred *  PCI Device ID Table
102184610Salfred *
103184610Salfred *  Used by probe to select devices to load on
104184610Salfred *  Last field stores an index into e1000_strings
105184610Salfred *  Last entry must be all 0s
106193640Sariff *
107193640Sariff *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108193640Sariff *********************************************************************/
109193640Sariff
110193640Sariffstatic em_vendor_info_t em_vendor_info_array[] =
111193640Sariff{
112184610Salfred	/* Intel(R) PRO/1000 Network Connection */
113184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
118184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
120184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
122193640Sariff	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123193640Sariff						PCI_ANY_ID, PCI_ANY_ID, 0},
124184610Salfred	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
126184610Salfred	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
128184610Salfred	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129184610Salfred	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130184610Salfred	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131184610Salfred	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132184610Salfred
133184610Salfred	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134184610Salfred	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135184610Salfred	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136184610Salfred	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137184610Salfred	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
139184610Salfred	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
141184610Salfred	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
143184610Salfred	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
145184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152184610Salfred	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154193640Sariff	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161184610Salfred	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162184610Salfred	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163184610Salfred	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169184610Salfred	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174184610Salfred	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175184610Salfred	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
180184610Salfred	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181184610Salfred						PCI_ANY_ID, PCI_ANY_ID, 0},
182184610Salfred	/* required last entry */
183184610Salfred	{ 0, 0, 0, 0, 0}
184184610Salfred};
185193640Sariff
186184610Salfred/*********************************************************************
187184610Salfred *  Table of branding strings for all supported NICs.
188184610Salfred *********************************************************************/
189184610Salfred
190184610Salfredstatic char *em_strings[] = {
191184610Salfred	"Intel(R) PRO/1000 Network Connection"
192184610Salfred};
193184610Salfred
194184610Salfred/*********************************************************************
195184610Salfred *  Function prototypes
196184610Salfred *********************************************************************/
197184610Salfredstatic int	em_probe(device_t);
198184610Salfredstatic int	em_attach(device_t);
199184610Salfredstatic int	em_detach(device_t);
200184610Salfredstatic int	em_shutdown(device_t);
201184610Salfredstatic int	em_suspend(device_t);
202184610Salfredstatic int	em_resume(device_t);
203184610Salfred#ifdef EM_MULTIQUEUE
204184610Salfredstatic int	em_mq_start(struct ifnet *, struct mbuf *);
205184610Salfredstatic int	em_mq_start_locked(struct ifnet *,
206184610Salfred		    struct tx_ring *, struct mbuf *);
207184610Salfredstatic void	em_qflush(struct ifnet *);
208184610Salfred#else
209184610Salfredstatic void	em_start(struct ifnet *);
210184610Salfredstatic void	em_start_locked(struct ifnet *, struct tx_ring *);
211184610Salfred#endif
212184610Salfredstatic int	em_ioctl(struct ifnet *, u_long, caddr_t);
213184610Salfredstatic void	em_init(void *);
214184610Salfredstatic void	em_init_locked(struct adapter *);
215184610Salfredstatic void	em_stop(void *);
216184610Salfredstatic void	em_media_status(struct ifnet *, struct ifmediareq *);
217184610Salfredstatic int	em_media_change(struct ifnet *);
218184610Salfredstatic void	em_identify_hardware(struct adapter *);
219184610Salfredstatic int	em_allocate_pci_resources(struct adapter *);
220184610Salfredstatic int	em_allocate_legacy(struct adapter *);
221184610Salfredstatic int	em_allocate_msix(struct adapter *);
222184610Salfredstatic int	em_allocate_queues(struct adapter *);
223184610Salfredstatic int	em_setup_msix(struct adapter *);
224184610Salfredstatic void	em_free_pci_resources(struct adapter *);
225184610Salfredstatic void	em_local_timer(void *);
226184610Salfredstatic void	em_reset(struct adapter *);
227184610Salfredstatic int	em_setup_interface(device_t, struct adapter *);
228184610Salfred
229184610Salfredstatic void	em_setup_transmit_structures(struct adapter *);
230246128Ssbzstatic void	em_initialize_transmit_unit(struct adapter *);
231184610Salfredstatic int	em_allocate_transmit_buffers(struct tx_ring *);
232184610Salfredstatic void	em_free_transmit_structures(struct adapter *);
233184610Salfredstatic void	em_free_transmit_buffers(struct tx_ring *);
234184610Salfred
235184610Salfredstatic int	em_setup_receive_structures(struct adapter *);
236184610Salfredstatic int	em_allocate_receive_buffers(struct rx_ring *);
237184610Salfredstatic void	em_initialize_receive_unit(struct adapter *);
238184610Salfredstatic void	em_free_receive_structures(struct adapter *);
239184610Salfredstatic void	em_free_receive_buffers(struct rx_ring *);
240184610Salfred
241184610Salfredstatic void	em_enable_intr(struct adapter *);
242184610Salfredstatic void	em_disable_intr(struct adapter *);
243static void	em_update_stats_counters(struct adapter *);
244static void	em_add_hw_stats(struct adapter *adapter);
245static void	em_txeof(struct tx_ring *);
246static bool	em_rxeof(struct rx_ring *, int, int *);
247#ifndef __NO_STRICT_ALIGNMENT
248static int	em_fixup_rx(struct rx_ring *);
249#endif
250static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252		    struct ip *, u32 *, u32 *);
253static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254		    struct tcphdr *, u32 *, u32 *);
255static void	em_set_promisc(struct adapter *);
256static void	em_disable_promisc(struct adapter *);
257static void	em_set_multi(struct adapter *);
258static void	em_update_link_status(struct adapter *);
259static void	em_refresh_mbufs(struct rx_ring *, int);
260static void	em_register_vlan(void *, struct ifnet *, u16);
261static void	em_unregister_vlan(void *, struct ifnet *, u16);
262static void	em_setup_vlan_hw_support(struct adapter *);
263static int	em_xmit(struct tx_ring *, struct mbuf **);
264static int	em_dma_malloc(struct adapter *, bus_size_t,
265		    struct em_dma_alloc *, int);
266static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268static void	em_print_nvm_info(struct adapter *);
269static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270static void	em_print_debug_info(struct adapter *);
271static int 	em_is_valid_ether_addr(u8 *);
272static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274		    const char *, struct em_int_delay_info *, int, int);
275/* Management and WOL Support */
276static void	em_init_manageability(struct adapter *);
277static void	em_release_manageability(struct adapter *);
278static void     em_get_hw_control(struct adapter *);
279static void     em_release_hw_control(struct adapter *);
280static void	em_get_wakeup(device_t);
281static void     em_enable_wakeup(device_t);
282static int	em_enable_phy_wakeup(struct adapter *);
283static void	em_led_func(void *, int);
284static void	em_disable_aspm(struct adapter *);
285
286static int	em_irq_fast(void *);
287
288/* MSIX handlers */
289static void	em_msix_tx(void *);
290static void	em_msix_rx(void *);
291static void	em_msix_link(void *);
292static void	em_handle_tx(void *context, int pending);
293static void	em_handle_rx(void *context, int pending);
294static void	em_handle_link(void *context, int pending);
295
296static void	em_set_sysctl_value(struct adapter *, const char *,
297		    const char *, int *, int);
298static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301static __inline void em_rx_discard(struct rx_ring *, int);
302
303#ifdef DEVICE_POLLING
304static poll_handler_t em_poll;
305#endif /* POLLING */
306
307/*********************************************************************
308 *  FreeBSD Device Interface Entry Points
309 *********************************************************************/
310
311static device_method_t em_methods[] = {
312	/* Device interface */
313	DEVMETHOD(device_probe, em_probe),
314	DEVMETHOD(device_attach, em_attach),
315	DEVMETHOD(device_detach, em_detach),
316	DEVMETHOD(device_shutdown, em_shutdown),
317	DEVMETHOD(device_suspend, em_suspend),
318	DEVMETHOD(device_resume, em_resume),
319	DEVMETHOD_END
320};
321
322static driver_t em_driver = {
323	"em", em_methods, sizeof(struct adapter),
324};
325
326devclass_t em_devclass;
327DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328MODULE_DEPEND(em, pci, 1, 1, 1);
329MODULE_DEPEND(em, ether, 1, 1, 1);
330
331/*********************************************************************
332 *  Tunable default values.
333 *********************************************************************/
334
335#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337#define M_TSO_LEN			66
338
339#define MAX_INTS_PER_SEC	8000
340#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341
342/* Allow common code without TSO */
343#ifndef CSUM_TSO
344#define CSUM_TSO	0
345#endif
346
347static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354    0, "Default transmit interrupt delay in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356    0, "Default receive interrupt delay in usecs");
357
358static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363    &em_tx_abs_int_delay_dflt, 0,
364    "Default transmit interrupt delay limit in usecs");
365SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366    &em_rx_abs_int_delay_dflt, 0,
367    "Default receive interrupt delay limit in usecs");
368
369static int em_rxd = EM_DEFAULT_RXD;
370static int em_txd = EM_DEFAULT_TXD;
371TUNABLE_INT("hw.em.rxd", &em_rxd);
372TUNABLE_INT("hw.em.txd", &em_txd);
373SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374    "Number of receive descriptors per queue");
375SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376    "Number of transmit descriptors per queue");
377
378static int em_smart_pwr_down = FALSE;
379TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381    0, "Set to true to leave smart power down enabled on newer adapters");
382
383/* Controls whether promiscuous also shows bad packets */
384static int em_debug_sbp = FALSE;
385TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387    "Show bad packets in promiscuous mode");
388
389static int em_enable_msix = TRUE;
390TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392    "Enable MSI-X interrupts");
393
394/* How many packets rxeof tries to clean at a time */
395static int em_rx_process_limit = 100;
396TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &em_rx_process_limit, 0,
399    "Maximum number of received packets to process "
400    "at a time, -1 means unlimited");
401
402/* Energy efficient ethernet - default to OFF */
403static int eee_setting = 1;
404TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406    "Enable Energy Efficient Ethernet");
407
408/* Global used in WOL setup with multiport cards */
409static int global_quad_port_a = 0;
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_em_netmap.h>
413#endif /* DEV_NETMAP */
414
415/*********************************************************************
416 *  Device identification routine
417 *
418 *  em_probe determines if the driver should be loaded on
419 *  adapter based on PCI vendor/device id of the adapter.
420 *
421 *  return BUS_PROBE_DEFAULT on success, positive on failure
422 *********************************************************************/
423
424static int
425em_probe(device_t dev)
426{
427	char		adapter_name[60];
428	u16		pci_vendor_id = 0;
429	u16		pci_device_id = 0;
430	u16		pci_subvendor_id = 0;
431	u16		pci_subdevice_id = 0;
432	em_vendor_info_t *ent;
433
434	INIT_DEBUGOUT("em_probe: begin");
435
436	pci_vendor_id = pci_get_vendor(dev);
437	if (pci_vendor_id != EM_VENDOR_ID)
438		return (ENXIO);
439
440	pci_device_id = pci_get_device(dev);
441	pci_subvendor_id = pci_get_subvendor(dev);
442	pci_subdevice_id = pci_get_subdevice(dev);
443
444	ent = em_vendor_info_array;
445	while (ent->vendor_id != 0) {
446		if ((pci_vendor_id == ent->vendor_id) &&
447		    (pci_device_id == ent->device_id) &&
448
449		    ((pci_subvendor_id == ent->subvendor_id) ||
450		    (ent->subvendor_id == PCI_ANY_ID)) &&
451
452		    ((pci_subdevice_id == ent->subdevice_id) ||
453		    (ent->subdevice_id == PCI_ANY_ID))) {
454			sprintf(adapter_name, "%s %s",
455				em_strings[ent->index],
456				em_driver_version);
457			device_set_desc_copy(dev, adapter_name);
458			return (BUS_PROBE_DEFAULT);
459		}
460		ent++;
461	}
462
463	return (ENXIO);
464}
465
466/*********************************************************************
467 *  Device initialization routine
468 *
469 *  The attach entry point is called when the driver is being loaded.
470 *  This routine identifies the type of hardware, allocates all resources
471 *  and initializes the hardware.
472 *
473 *  return 0 on success, positive on failure
474 *********************************************************************/
475
476static int
477em_attach(device_t dev)
478{
479	struct adapter	*adapter;
480	struct e1000_hw	*hw;
481	int		error = 0;
482
483	INIT_DEBUGOUT("em_attach: begin");
484
485	if (resource_disabled("em", device_get_unit(dev))) {
486		device_printf(dev, "Disabled by device hint\n");
487		return (ENXIO);
488	}
489
490	adapter = device_get_softc(dev);
491	adapter->dev = adapter->osdep.dev = dev;
492	hw = &adapter->hw;
493	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495	/* SYSCTL stuff */
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_sysctl_nvm_info, "I", "NVM Information");
500
501	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504	    em_sysctl_debug_info, "I", "Debug Information");
505
506	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509	    em_set_flowcntl, "I", "Flow Control");
510
511	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513	/* Determine hardware and mac info */
514	em_identify_hardware(adapter);
515
516	/* Setup PCI resources */
517	if (em_allocate_pci_resources(adapter)) {
518		device_printf(dev, "Allocation of PCI resources failed\n");
519		error = ENXIO;
520		goto err_pci;
521	}
522
523	/*
524	** For ICH8 and family we need to
525	** map the flash memory, and this
526	** must happen after the MAC is
527	** identified
528	*/
529	if ((hw->mac.type == e1000_ich8lan) ||
530	    (hw->mac.type == e1000_ich9lan) ||
531	    (hw->mac.type == e1000_ich10lan) ||
532	    (hw->mac.type == e1000_pchlan) ||
533	    (hw->mac.type == e1000_pch2lan) ||
534	    (hw->mac.type == e1000_pch_lpt)) {
535		int rid = EM_BAR_TYPE_FLASH;
536		adapter->flash = bus_alloc_resource_any(dev,
537		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538		if (adapter->flash == NULL) {
539			device_printf(dev, "Mapping of Flash failed\n");
540			error = ENXIO;
541			goto err_pci;
542		}
543		/* This is used in the shared code */
544		hw->flash_address = (u8 *)adapter->flash;
545		adapter->osdep.flash_bus_space_tag =
546		    rman_get_bustag(adapter->flash);
547		adapter->osdep.flash_bus_space_handle =
548		    rman_get_bushandle(adapter->flash);
549	}
550
551	/* Do Shared Code initialization */
552	if (e1000_setup_init_funcs(hw, TRUE)) {
553		device_printf(dev, "Setup of Shared code failed\n");
554		error = ENXIO;
555		goto err_pci;
556	}
557
558	e1000_get_bus_info(hw);
559
560	/* Set up some sysctls for the tunable interrupt delays */
561	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568	    "receive interrupt delay limit in usecs",
569	    &adapter->rx_abs_int_delay,
570	    E1000_REGISTER(hw, E1000_RADV),
571	    em_rx_abs_int_delay_dflt);
572	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573	    "transmit interrupt delay limit in usecs",
574	    &adapter->tx_abs_int_delay,
575	    E1000_REGISTER(hw, E1000_TADV),
576	    em_tx_abs_int_delay_dflt);
577	em_add_int_delay_sysctl(adapter, "itr",
578	    "interrupt delay limit in usecs/4",
579	    &adapter->tx_itr,
580	    E1000_REGISTER(hw, E1000_ITR),
581	    DEFAULT_ITR);
582
583	/* Sysctl for limiting the amount of work done in the taskqueue */
584	em_set_sysctl_value(adapter, "rx_processing_limit",
585	    "max number of rx packets to process", &adapter->rx_process_limit,
586	    em_rx_process_limit);
587
588	/*
589	 * Validate number of transmit and receive descriptors. It
590	 * must not exceed hardware maximum, and must be multiple
591	 * of E1000_DBA_ALIGN.
592	 */
593	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596		    EM_DEFAULT_TXD, em_txd);
597		adapter->num_tx_desc = EM_DEFAULT_TXD;
598	} else
599		adapter->num_tx_desc = em_txd;
600
601	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604		    EM_DEFAULT_RXD, em_rxd);
605		adapter->num_rx_desc = EM_DEFAULT_RXD;
606	} else
607		adapter->num_rx_desc = em_rxd;
608
609	hw->mac.autoneg = DO_AUTO_NEG;
610	hw->phy.autoneg_wait_to_complete = FALSE;
611	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613	/* Copper options */
614	if (hw->phy.media_type == e1000_media_type_copper) {
615		hw->phy.mdix = AUTO_ALL_MODES;
616		hw->phy.disable_polarity_correction = FALSE;
617		hw->phy.ms_type = EM_MASTER_SLAVE;
618	}
619
620	/*
621	 * Set the frame limits assuming
622	 * standard ethernet sized frames.
623	 */
624	adapter->hw.mac.max_frame_size =
625	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627	/*
628	 * This controls when hardware reports transmit completion
629	 * status.
630	 */
631	hw->mac.report_tx_early = 1;
632
633	/*
634	** Get queue/ring memory
635	*/
636	if (em_allocate_queues(adapter)) {
637		error = ENOMEM;
638		goto err_pci;
639	}
640
641	/* Allocate multicast array memory. */
642	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644	if (adapter->mta == NULL) {
645		device_printf(dev, "Can not allocate multicast setup array\n");
646		error = ENOMEM;
647		goto err_late;
648	}
649
650	/* Check SOL/IDER usage */
651	if (e1000_check_reset_block(hw))
652		device_printf(dev, "PHY reset is blocked"
653		    " due to SOL/IDER session.\n");
654
655	/* Sysctl for setting Energy Efficient Ethernet */
656	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660	    adapter, 0, em_sysctl_eee, "I",
661	    "Disable Energy Efficient Ethernet");
662
663	/*
664	** Start from a known state, this is
665	** important in reading the nvm and
666	** mac from that.
667	*/
668	e1000_reset_hw(hw);
669
670
671	/* Make sure we have a good EEPROM before we read from it */
672	if (e1000_validate_nvm_checksum(hw) < 0) {
673		/*
674		** Some PCI-E parts fail the first check due to
675		** the link being in sleep state, call it again,
676		** if it fails a second time its a real issue.
677		*/
678		if (e1000_validate_nvm_checksum(hw) < 0) {
679			device_printf(dev,
680			    "The EEPROM Checksum Is Not Valid\n");
681			error = EIO;
682			goto err_late;
683		}
684	}
685
686	/* Copy the permanent MAC address out of the EEPROM */
687	if (e1000_read_mac_addr(hw) < 0) {
688		device_printf(dev, "EEPROM read error while reading MAC"
689		    " address\n");
690		error = EIO;
691		goto err_late;
692	}
693
694	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695		device_printf(dev, "Invalid MAC address\n");
696		error = EIO;
697		goto err_late;
698	}
699
700	/*
701	**  Do interrupt configuration
702	*/
703	if (adapter->msix > 1) /* Do MSIX */
704		error = em_allocate_msix(adapter);
705	else  /* MSI or Legacy */
706		error = em_allocate_legacy(adapter);
707	if (error)
708		goto err_late;
709
710	/*
711	 * Get Wake-on-Lan and Management info for later use
712	 */
713	em_get_wakeup(dev);
714
715	/* Setup OS specific network interface */
716	if (em_setup_interface(dev, adapter) != 0)
717		goto err_late;
718
719	em_reset(adapter);
720
721	/* Initialize statistics */
722	em_update_stats_counters(adapter);
723
724	hw->mac.get_link_status = 1;
725	em_update_link_status(adapter);
726
727	/* Register for VLAN events */
728	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732
733	em_add_hw_stats(adapter);
734
735	/* Non-AMT based hardware can now take control from firmware */
736	if (adapter->has_manage && !adapter->has_amt)
737		em_get_hw_control(adapter);
738
739	/* Tell the stack that the interface is not active */
740	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743	adapter->led_dev = led_create(em_led_func, adapter,
744	    device_get_nameunit(dev));
745#ifdef DEV_NETMAP
746	em_netmap_attach(adapter);
747#endif /* DEV_NETMAP */
748
749	INIT_DEBUGOUT("em_attach: end");
750
751	return (0);
752
753err_late:
754	em_free_transmit_structures(adapter);
755	em_free_receive_structures(adapter);
756	em_release_hw_control(adapter);
757	if (adapter->ifp != NULL)
758		if_free(adapter->ifp);
759err_pci:
760	em_free_pci_resources(adapter);
761	free(adapter->mta, M_DEVBUF);
762	EM_CORE_LOCK_DESTROY(adapter);
763
764	return (error);
765}
766
767/*********************************************************************
768 *  Device removal routine
769 *
770 *  The detach entry point is called when the driver is being removed.
771 *  This routine stops the adapter and deallocates all the resources
772 *  that were allocated for driver operation.
773 *
774 *  return 0 on success, positive on failure
775 *********************************************************************/
776
777static int
778em_detach(device_t dev)
779{
780	struct adapter	*adapter = device_get_softc(dev);
781	struct ifnet	*ifp = adapter->ifp;
782
783	INIT_DEBUGOUT("em_detach: begin");
784
785	/* Make sure VLANS are not using driver */
786	if (adapter->ifp->if_vlantrunk != NULL) {
787		device_printf(dev,"Vlan in use, detach first\n");
788		return (EBUSY);
789	}
790
791#ifdef DEVICE_POLLING
792	if (ifp->if_capenable & IFCAP_POLLING)
793		ether_poll_deregister(ifp);
794#endif
795
796	if (adapter->led_dev != NULL)
797		led_destroy(adapter->led_dev);
798
799	EM_CORE_LOCK(adapter);
800	adapter->in_detach = 1;
801	em_stop(adapter);
802	EM_CORE_UNLOCK(adapter);
803	EM_CORE_LOCK_DESTROY(adapter);
804
805	e1000_phy_hw_reset(&adapter->hw);
806
807	em_release_manageability(adapter);
808	em_release_hw_control(adapter);
809
810	/* Unregister VLAN events */
811	if (adapter->vlan_attach != NULL)
812		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813	if (adapter->vlan_detach != NULL)
814		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815
816	ether_ifdetach(adapter->ifp);
817	callout_drain(&adapter->timer);
818
819#ifdef DEV_NETMAP
820	netmap_detach(ifp);
821#endif /* DEV_NETMAP */
822
823	em_free_pci_resources(adapter);
824	bus_generic_detach(dev);
825	if_free(ifp);
826
827	em_free_transmit_structures(adapter);
828	em_free_receive_structures(adapter);
829
830	em_release_hw_control(adapter);
831	free(adapter->mta, M_DEVBUF);
832
833	return (0);
834}
835
836/*********************************************************************
837 *
838 *  Shutdown entry point
839 *
840 **********************************************************************/
841
842static int
843em_shutdown(device_t dev)
844{
845	return em_suspend(dev);
846}
847
848/*
849 * Suspend/resume device methods.
850 */
851static int
852em_suspend(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855
856	EM_CORE_LOCK(adapter);
857
858        em_release_manageability(adapter);
859	em_release_hw_control(adapter);
860	em_enable_wakeup(dev);
861
862	EM_CORE_UNLOCK(adapter);
863
864	return bus_generic_suspend(dev);
865}
866
867static int
868em_resume(device_t dev)
869{
870	struct adapter *adapter = device_get_softc(dev);
871	struct tx_ring	*txr = adapter->tx_rings;
872	struct ifnet *ifp = adapter->ifp;
873
874	EM_CORE_LOCK(adapter);
875	if (adapter->hw.mac.type == e1000_pch2lan)
876		e1000_resume_workarounds_pchlan(&adapter->hw);
877	em_init_locked(adapter);
878	em_init_manageability(adapter);
879
880	if ((ifp->if_flags & IFF_UP) &&
881	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883			EM_TX_LOCK(txr);
884#ifdef EM_MULTIQUEUE
885			if (!drbr_empty(ifp, txr->br))
886				em_mq_start_locked(ifp, txr, NULL);
887#else
888			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889				em_start_locked(ifp, txr);
890#endif
891			EM_TX_UNLOCK(txr);
892		}
893	}
894	EM_CORE_UNLOCK(adapter);
895
896	return bus_generic_resume(dev);
897}
898
899
900#ifdef EM_MULTIQUEUE
901/*********************************************************************
902 *  Multiqueue Transmit routines
903 *
904 *  em_mq_start is called by the stack to initiate a transmit.
905 *  however, if busy the driver can queue the request rather
906 *  than do an immediate send. It is this that is an advantage
907 *  in this driver, rather than also having multiple tx queues.
908 **********************************************************************/
909static int
910em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911{
912	struct adapter  *adapter = txr->adapter;
913        struct mbuf     *next;
914        int             err = 0, enq = 0;
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m != NULL) {
925		err = drbr_enqueue(ifp, txr->br, m);
926		if (err)
927			return (err);
928	}
929
930	/* Process the queue */
931	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932		if ((err = em_xmit(txr, &next)) != 0) {
933			if (next == NULL)
934				drbr_advance(ifp, txr->br);
935			else
936				drbr_putback(ifp, txr->br, next);
937			break;
938		}
939		drbr_advance(ifp, txr->br);
940		enq++;
941		ifp->if_obytes += next->m_pkthdr.len;
942		if (next->m_flags & M_MCAST)
943			ifp->if_omcasts++;
944		ETHER_BPF_MTAP(ifp, next);
945		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                        break;
947	}
948
949	if (enq > 0) {
950                /* Set the watchdog */
951                txr->queue_status = EM_QUEUE_WORKING;
952		txr->watchdog_time = ticks;
953	}
954
955	if (txr->tx_avail < EM_MAX_SCATTER)
956		em_txeof(txr);
957	if (txr->tx_avail < EM_MAX_SCATTER)
958		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959	return (err);
960}
961
962/*
963** Multiqueue capable stack interface
964*/
965static int
966em_mq_start(struct ifnet *ifp, struct mbuf *m)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970	int 		error;
971
972	if (EM_TX_TRYLOCK(txr)) {
973		error = em_mq_start_locked(ifp, txr, m);
974		EM_TX_UNLOCK(txr);
975	} else
976		error = drbr_enqueue(ifp, txr->br, m);
977
978	return (error);
979}
980
981/*
982** Flush all ring buffers
983*/
984static void
985em_qflush(struct ifnet *ifp)
986{
987	struct adapter  *adapter = ifp->if_softc;
988	struct tx_ring  *txr = adapter->tx_rings;
989	struct mbuf     *m;
990
991	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992		EM_TX_LOCK(txr);
993		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994			m_freem(m);
995		EM_TX_UNLOCK(txr);
996	}
997	if_qflush(ifp);
998}
999#else  /* !EM_MULTIQUEUE */
1000
1001static void
1002em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003{
1004	struct adapter	*adapter = ifp->if_softc;
1005	struct mbuf	*m_head;
1006
1007	EM_TX_LOCK_ASSERT(txr);
1008
1009	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010	    IFF_DRV_RUNNING)
1011		return;
1012
1013	if (!adapter->link_active)
1014		return;
1015
1016	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017        	/* Call cleanup if number of TX descriptors low */
1018		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019			em_txeof(txr);
1020		if (txr->tx_avail < EM_MAX_SCATTER) {
1021			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022			break;
1023		}
1024                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025		if (m_head == NULL)
1026			break;
1027		/*
1028		 *  Encapsulation can modify our pointer, and or make it
1029		 *  NULL on failure.  In that event, we can't requeue.
1030		 */
1031		if (em_xmit(txr, &m_head)) {
1032			if (m_head == NULL)
1033				break;
1034			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035			break;
1036		}
1037
1038		/* Send a copy of the frame to the BPF listener */
1039		ETHER_BPF_MTAP(ifp, m_head);
1040
1041		/* Set timeout in case hardware has problems transmitting. */
1042		txr->watchdog_time = ticks;
1043                txr->queue_status = EM_QUEUE_WORKING;
1044	}
1045
1046	return;
1047}
1048
1049static void
1050em_start(struct ifnet *ifp)
1051{
1052	struct adapter	*adapter = ifp->if_softc;
1053	struct tx_ring	*txr = adapter->tx_rings;
1054
1055	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056		EM_TX_LOCK(txr);
1057		em_start_locked(ifp, txr);
1058		EM_TX_UNLOCK(txr);
1059	}
1060	return;
1061}
1062#endif /* EM_MULTIQUEUE */
1063
1064/*********************************************************************
1065 *  Ioctl entry point
1066 *
1067 *  em_ioctl is called when the user wants to configure the
1068 *  interface.
1069 *
1070 *  return 0 on success, positive on failure
1071 **********************************************************************/
1072
1073static int
1074em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075{
1076	struct adapter	*adapter = ifp->if_softc;
1077	struct ifreq	*ifr = (struct ifreq *)data;
1078#if defined(INET) || defined(INET6)
1079	struct ifaddr	*ifa = (struct ifaddr *)data;
1080#endif
1081	bool		avoid_reset = FALSE;
1082	int		error = 0;
1083
1084	if (adapter->in_detach)
1085		return (error);
1086
1087	switch (command) {
1088	case SIOCSIFADDR:
1089#ifdef INET
1090		if (ifa->ifa_addr->sa_family == AF_INET)
1091			avoid_reset = TRUE;
1092#endif
1093#ifdef INET6
1094		if (ifa->ifa_addr->sa_family == AF_INET6)
1095			avoid_reset = TRUE;
1096#endif
1097		/*
1098		** Calling init results in link renegotiation,
1099		** so we avoid doing it when possible.
1100		*/
1101		if (avoid_reset) {
1102			ifp->if_flags |= IFF_UP;
1103			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104				em_init(adapter);
1105#ifdef INET
1106			if (!(ifp->if_flags & IFF_NOARP))
1107				arp_ifinit(ifp, ifa);
1108#endif
1109		} else
1110			error = ether_ioctl(ifp, command, data);
1111		break;
1112	case SIOCSIFMTU:
1113	    {
1114		int max_frame_size;
1115
1116		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118		EM_CORE_LOCK(adapter);
1119		switch (adapter->hw.mac.type) {
1120		case e1000_82571:
1121		case e1000_82572:
1122		case e1000_ich9lan:
1123		case e1000_ich10lan:
1124		case e1000_pch2lan:
1125		case e1000_pch_lpt:
1126		case e1000_82574:
1127		case e1000_82583:
1128		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129			max_frame_size = 9234;
1130			break;
1131		case e1000_pchlan:
1132			max_frame_size = 4096;
1133			break;
1134			/* Adapters that do not support jumbo frames */
1135		case e1000_ich8lan:
1136			max_frame_size = ETHER_MAX_LEN;
1137			break;
1138		default:
1139			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140		}
1141		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142		    ETHER_CRC_LEN) {
1143			EM_CORE_UNLOCK(adapter);
1144			error = EINVAL;
1145			break;
1146		}
1147
1148		ifp->if_mtu = ifr->ifr_mtu;
1149		adapter->hw.mac.max_frame_size =
1150		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151		em_init_locked(adapter);
1152		EM_CORE_UNLOCK(adapter);
1153		break;
1154	    }
1155	case SIOCSIFFLAGS:
1156		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157		    SIOCSIFFLAGS (Set Interface Flags)");
1158		EM_CORE_LOCK(adapter);
1159		if (ifp->if_flags & IFF_UP) {
1160			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161				if ((ifp->if_flags ^ adapter->if_flags) &
1162				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163					em_disable_promisc(adapter);
1164					em_set_promisc(adapter);
1165				}
1166			} else
1167				em_init_locked(adapter);
1168		} else
1169			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170				em_stop(adapter);
1171		adapter->if_flags = ifp->if_flags;
1172		EM_CORE_UNLOCK(adapter);
1173		break;
1174	case SIOCADDMULTI:
1175	case SIOCDELMULTI:
1176		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178			EM_CORE_LOCK(adapter);
1179			em_disable_intr(adapter);
1180			em_set_multi(adapter);
1181#ifdef DEVICE_POLLING
1182			if (!(ifp->if_capenable & IFCAP_POLLING))
1183#endif
1184				em_enable_intr(adapter);
1185			EM_CORE_UNLOCK(adapter);
1186		}
1187		break;
1188	case SIOCSIFMEDIA:
1189		/* Check SOL/IDER usage */
1190		EM_CORE_LOCK(adapter);
1191		if (e1000_check_reset_block(&adapter->hw)) {
1192			EM_CORE_UNLOCK(adapter);
1193			device_printf(adapter->dev, "Media change is"
1194			    " blocked due to SOL/IDER session.\n");
1195			break;
1196		}
1197		EM_CORE_UNLOCK(adapter);
1198		/* falls thru */
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(em_poll, ifp);
1215				if (error)
1216					return (error);
1217				EM_CORE_LOCK(adapter);
1218				em_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				EM_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				EM_CORE_LOCK(adapter);
1225				em_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				EM_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_VLAN_HWTAGGING) {
1240			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWFILTER) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWTSO) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249			reinit = 1;
1250		}
1251		if ((mask & IFCAP_WOL) &&
1252		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253			if (mask & IFCAP_WOL_MCAST)
1254				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255			if (mask & IFCAP_WOL_MAGIC)
1256				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257		}
1258		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259			em_init(adapter);
1260		VLAN_CAPABILITIES(ifp);
1261		break;
1262	    }
1263
1264	default:
1265		error = ether_ioctl(ifp, command, data);
1266		break;
1267	}
1268
1269	return (error);
1270}
1271
1272
1273/*********************************************************************
1274 *  Init entry point
1275 *
1276 *  This routine is used in two ways. It is used by the stack as
1277 *  init entry point in network interface structure. It is also used
1278 *  by the driver as a hw/sw initialization routine to get to a
1279 *  consistent state.
1280 *
1281 *  return 0 on success, positive on failure
1282 **********************************************************************/
1283
1284static void
1285em_init_locked(struct adapter *adapter)
1286{
1287	struct ifnet	*ifp = adapter->ifp;
1288	device_t	dev = adapter->dev;
1289
1290	INIT_DEBUGOUT("em_init: begin");
1291
1292	EM_CORE_LOCK_ASSERT(adapter);
1293
1294	em_disable_intr(adapter);
1295	callout_stop(&adapter->timer);
1296
1297	/* Get the latest mac address, User can use a LAA */
1298        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299              ETHER_ADDR_LEN);
1300
1301	/* Put the address into the Receive Address Array */
1302	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304	/*
1305	 * With the 82571 adapter, RAR[0] may be overwritten
1306	 * when the other port is reset, we make a duplicate
1307	 * in RAR[14] for that eventuality, this assures
1308	 * the interface continues to function.
1309	 */
1310	if (adapter->hw.mac.type == e1000_82571) {
1311		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313		    E1000_RAR_ENTRIES - 1);
1314	}
1315
1316	/* Initialize the hardware */
1317	em_reset(adapter);
1318	em_update_link_status(adapter);
1319
1320	/* Setup VLAN support, basic and offload if available */
1321	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323	/* Set hardware offload abilities */
1324	ifp->if_hwassist = 0;
1325	if (ifp->if_capenable & IFCAP_TXCSUM)
1326		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327	if (ifp->if_capenable & IFCAP_TSO4)
1328		ifp->if_hwassist |= CSUM_TSO;
1329
1330	/* Configure for OS presence */
1331	em_init_manageability(adapter);
1332
1333	/* Prepare transmit descriptors and buffers */
1334	em_setup_transmit_structures(adapter);
1335	em_initialize_transmit_unit(adapter);
1336
1337	/* Setup Multicast table */
1338	em_set_multi(adapter);
1339
1340	/*
1341	** Figure out the desired mbuf
1342	** pool for doing jumbos
1343	*/
1344	if (adapter->hw.mac.max_frame_size <= 2048)
1345		adapter->rx_mbuf_sz = MCLBYTES;
1346	else if (adapter->hw.mac.max_frame_size <= 4096)
1347		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348	else
1349		adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351	/* Prepare receive descriptors and buffers */
1352	if (em_setup_receive_structures(adapter)) {
1353		device_printf(dev, "Could not setup receive structures\n");
1354		em_stop(adapter);
1355		return;
1356	}
1357	em_initialize_receive_unit(adapter);
1358
1359	/* Use real VLAN Filter support? */
1360	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362			/* Use real VLAN Filter support */
1363			em_setup_vlan_hw_support(adapter);
1364		else {
1365			u32 ctrl;
1366			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367			ctrl |= E1000_CTRL_VME;
1368			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369		}
1370	}
1371
1372	/* Don't lose promiscuous settings */
1373	em_set_promisc(adapter);
1374
1375	/* Set the interface as ACTIVE */
1376	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382	/* MSI/X configuration for 82574 */
1383	if (adapter->hw.mac.type == e1000_82574) {
1384		int tmp;
1385		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388		/* Set the IVAR - interrupt vector routing. */
1389		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390	}
1391
1392#ifdef DEVICE_POLLING
1393	/*
1394	 * Only enable interrupts if we are not polling, make sure
1395	 * they are off otherwise.
1396	 */
1397	if (ifp->if_capenable & IFCAP_POLLING)
1398		em_disable_intr(adapter);
1399	else
1400#endif /* DEVICE_POLLING */
1401		em_enable_intr(adapter);
1402
1403	/* AMT based hardware can now take control from firmware */
1404	if (adapter->has_manage && adapter->has_amt)
1405		em_get_hw_control(adapter);
1406}
1407
1408static void
1409em_init(void *arg)
1410{
1411	struct adapter *adapter = arg;
1412
1413	EM_CORE_LOCK(adapter);
1414	em_init_locked(adapter);
1415	EM_CORE_UNLOCK(adapter);
1416}
1417
1418
1419#ifdef DEVICE_POLLING
1420/*********************************************************************
1421 *
1422 *  Legacy polling routine: note this only works with single queue
1423 *
1424 *********************************************************************/
1425static int
1426em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427{
1428	struct adapter *adapter = ifp->if_softc;
1429	struct tx_ring	*txr = adapter->tx_rings;
1430	struct rx_ring	*rxr = adapter->rx_rings;
1431	u32		reg_icr;
1432	int		rx_done;
1433
1434	EM_CORE_LOCK(adapter);
1435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436		EM_CORE_UNLOCK(adapter);
1437		return (0);
1438	}
1439
1440	if (cmd == POLL_AND_CHECK_STATUS) {
1441		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443			callout_stop(&adapter->timer);
1444			adapter->hw.mac.get_link_status = 1;
1445			em_update_link_status(adapter);
1446			callout_reset(&adapter->timer, hz,
1447			    em_local_timer, adapter);
1448		}
1449	}
1450	EM_CORE_UNLOCK(adapter);
1451
1452	em_rxeof(rxr, count, &rx_done);
1453
1454	EM_TX_LOCK(txr);
1455	em_txeof(txr);
1456#ifdef EM_MULTIQUEUE
1457	if (!drbr_empty(ifp, txr->br))
1458		em_mq_start_locked(ifp, txr, NULL);
1459#else
1460	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461		em_start_locked(ifp, txr);
1462#endif
1463	EM_TX_UNLOCK(txr);
1464
1465	return (rx_done);
1466}
1467#endif /* DEVICE_POLLING */
1468
1469
1470/*********************************************************************
1471 *
1472 *  Fast Legacy/MSI Combined Interrupt Service routine
1473 *
1474 *********************************************************************/
1475static int
1476em_irq_fast(void *arg)
1477{
1478	struct adapter	*adapter = arg;
1479	struct ifnet	*ifp;
1480	u32		reg_icr;
1481
1482	ifp = adapter->ifp;
1483
1484	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486	/* Hot eject?  */
1487	if (reg_icr == 0xffffffff)
1488		return FILTER_STRAY;
1489
1490	/* Definitely not our interrupt.  */
1491	if (reg_icr == 0x0)
1492		return FILTER_STRAY;
1493
1494	/*
1495	 * Starting with the 82571 chip, bit 31 should be used to
1496	 * determine whether the interrupt belongs to us.
1497	 */
1498	if (adapter->hw.mac.type >= e1000_82571 &&
1499	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500		return FILTER_STRAY;
1501
1502	em_disable_intr(adapter);
1503	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505	/* Link status change */
1506	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507		adapter->hw.mac.get_link_status = 1;
1508		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509	}
1510
1511	if (reg_icr & E1000_ICR_RXO)
1512		adapter->rx_overruns++;
1513	return FILTER_HANDLED;
1514}
1515
1516/* Combined RX/TX handler, used by Legacy and MSI */
1517static void
1518em_handle_que(void *context, int pending)
1519{
1520	struct adapter	*adapter = context;
1521	struct ifnet	*ifp = adapter->ifp;
1522	struct tx_ring	*txr = adapter->tx_rings;
1523	struct rx_ring	*rxr = adapter->rx_rings;
1524
1525
1526	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528		EM_TX_LOCK(txr);
1529		em_txeof(txr);
1530#ifdef EM_MULTIQUEUE
1531		if (!drbr_empty(ifp, txr->br))
1532			em_mq_start_locked(ifp, txr, NULL);
1533#else
1534		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535			em_start_locked(ifp, txr);
1536#endif
1537		EM_TX_UNLOCK(txr);
1538		if (more) {
1539			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540			return;
1541		}
1542	}
1543
1544	em_enable_intr(adapter);
1545	return;
1546}
1547
1548
1549/*********************************************************************
1550 *
1551 *  MSIX Interrupt Service Routines
1552 *
1553 **********************************************************************/
1554static void
1555em_msix_tx(void *arg)
1556{
1557	struct tx_ring *txr = arg;
1558	struct adapter *adapter = txr->adapter;
1559	struct ifnet	*ifp = adapter->ifp;
1560
1561	++txr->tx_irq;
1562	EM_TX_LOCK(txr);
1563	em_txeof(txr);
1564#ifdef EM_MULTIQUEUE
1565	if (!drbr_empty(ifp, txr->br))
1566		em_mq_start_locked(ifp, txr, NULL);
1567#else
1568	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569		em_start_locked(ifp, txr);
1570#endif
1571	/* Reenable this interrupt */
1572	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573	EM_TX_UNLOCK(txr);
1574	return;
1575}
1576
1577/*********************************************************************
1578 *
1579 *  MSIX RX Interrupt Service routine
1580 *
1581 **********************************************************************/
1582
1583static void
1584em_msix_rx(void *arg)
1585{
1586	struct rx_ring	*rxr = arg;
1587	struct adapter	*adapter = rxr->adapter;
1588	bool		more;
1589
1590	++rxr->rx_irq;
1591	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592		return;
1593	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594	if (more)
1595		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596	else
1597		/* Reenable this interrupt */
1598		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599	return;
1600}
1601
1602/*********************************************************************
1603 *
1604 *  MSIX Link Fast Interrupt Service routine
1605 *
1606 **********************************************************************/
1607static void
1608em_msix_link(void *arg)
1609{
1610	struct adapter	*adapter = arg;
1611	u32		reg_icr;
1612
1613	++adapter->link_irq;
1614	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617		adapter->hw.mac.get_link_status = 1;
1618		em_handle_link(adapter, 0);
1619	} else
1620		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621		    EM_MSIX_LINK | E1000_IMS_LSC);
1622	return;
1623}
1624
1625static void
1626em_handle_rx(void *context, int pending)
1627{
1628	struct rx_ring	*rxr = context;
1629	struct adapter	*adapter = rxr->adapter;
1630        bool            more;
1631
1632	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633	if (more)
1634		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635	else
1636		/* Reenable this interrupt */
1637		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638}
1639
1640static void
1641em_handle_tx(void *context, int pending)
1642{
1643	struct tx_ring	*txr = context;
1644	struct adapter	*adapter = txr->adapter;
1645	struct ifnet	*ifp = adapter->ifp;
1646
1647	EM_TX_LOCK(txr);
1648	em_txeof(txr);
1649#ifdef EM_MULTIQUEUE
1650	if (!drbr_empty(ifp, txr->br))
1651		em_mq_start_locked(ifp, txr, NULL);
1652#else
1653	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654		em_start_locked(ifp, txr);
1655#endif
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657	EM_TX_UNLOCK(txr);
1658}
1659
1660static void
1661em_handle_link(void *context, int pending)
1662{
1663	struct adapter	*adapter = context;
1664	struct tx_ring	*txr = adapter->tx_rings;
1665	struct ifnet *ifp = adapter->ifp;
1666
1667	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668		return;
1669
1670	EM_CORE_LOCK(adapter);
1671	callout_stop(&adapter->timer);
1672	em_update_link_status(adapter);
1673	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675	    EM_MSIX_LINK | E1000_IMS_LSC);
1676	if (adapter->link_active) {
1677		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678			EM_TX_LOCK(txr);
1679#ifdef EM_MULTIQUEUE
1680			if (!drbr_empty(ifp, txr->br))
1681				em_mq_start_locked(ifp, txr, NULL);
1682#else
1683			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684				em_start_locked(ifp, txr);
1685#endif
1686			EM_TX_UNLOCK(txr);
1687		}
1688	}
1689	EM_CORE_UNLOCK(adapter);
1690}
1691
1692
1693/*********************************************************************
1694 *
1695 *  Media Ioctl callback
1696 *
1697 *  This routine is called whenever the user queries the status of
1698 *  the interface using ifconfig.
1699 *
1700 **********************************************************************/
1701static void
1702em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703{
1704	struct adapter *adapter = ifp->if_softc;
1705	u_char fiber_type = IFM_1000_SX;
1706
1707	INIT_DEBUGOUT("em_media_status: begin");
1708
1709	EM_CORE_LOCK(adapter);
1710	em_update_link_status(adapter);
1711
1712	ifmr->ifm_status = IFM_AVALID;
1713	ifmr->ifm_active = IFM_ETHER;
1714
1715	if (!adapter->link_active) {
1716		EM_CORE_UNLOCK(adapter);
1717		return;
1718	}
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725	} else {
1726		switch (adapter->link_speed) {
1727		case 10:
1728			ifmr->ifm_active |= IFM_10_T;
1729			break;
1730		case 100:
1731			ifmr->ifm_active |= IFM_100_TX;
1732			break;
1733		case 1000:
1734			ifmr->ifm_active |= IFM_1000_T;
1735			break;
1736		}
1737		if (adapter->link_duplex == FULL_DUPLEX)
1738			ifmr->ifm_active |= IFM_FDX;
1739		else
1740			ifmr->ifm_active |= IFM_HDX;
1741	}
1742	EM_CORE_UNLOCK(adapter);
1743}
1744
1745/*********************************************************************
1746 *
1747 *  Media Ioctl callback
1748 *
1749 *  This routine is called when the user changes speed/duplex using
1750 *  media/mediopt option with ifconfig.
1751 *
1752 **********************************************************************/
1753static int
1754em_media_change(struct ifnet *ifp)
1755{
1756	struct adapter *adapter = ifp->if_softc;
1757	struct ifmedia  *ifm = &adapter->media;
1758
1759	INIT_DEBUGOUT("em_media_change: begin");
1760
1761	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762		return (EINVAL);
1763
1764	EM_CORE_LOCK(adapter);
1765	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766	case IFM_AUTO:
1767		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769		break;
1770	case IFM_1000_LX:
1771	case IFM_1000_SX:
1772	case IFM_1000_T:
1773		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775		break;
1776	case IFM_100_TX:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783		break;
1784	case IFM_10_T:
1785		adapter->hw.mac.autoneg = FALSE;
1786		adapter->hw.phy.autoneg_advertised = 0;
1787		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789		else
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791		break;
1792	default:
1793		device_printf(adapter->dev, "Unsupported media type\n");
1794	}
1795
1796	em_init_locked(adapter);
1797	EM_CORE_UNLOCK(adapter);
1798
1799	return (0);
1800}
1801
1802/*********************************************************************
1803 *
1804 *  This routine maps the mbufs to tx descriptors.
1805 *
1806 *  return 0 on success, positive on failure
1807 **********************************************************************/
1808
1809static int
1810em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811{
1812	struct adapter		*adapter = txr->adapter;
1813	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814	bus_dmamap_t		map;
1815	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816	struct e1000_tx_desc	*ctxd = NULL;
1817	struct mbuf		*m_head;
1818	struct ether_header	*eh;
1819	struct ip		*ip = NULL;
1820	struct tcphdr		*tp = NULL;
1821	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822	int			ip_off, poff;
1823	int			nsegs, i, j, first, last = 0;
1824	int			error, do_tso, tso_desc = 0, remap = 1;
1825
1826retry:
1827	m_head = *m_headp;
1828	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830	ip_off = poff = 0;
1831
1832	/*
1833	 * Intel recommends entire IP/TCP header length reside in a single
1834	 * buffer. If multiple descriptors are used to describe the IP and
1835	 * TCP header, each descriptor should describe one or more
1836	 * complete headers; descriptors referencing only parts of headers
1837	 * are not supported. If all layer headers are not coalesced into
1838	 * a single buffer, each buffer should not cross a 4KB boundary,
1839	 * or be larger than the maximum read request size.
1840	 * Controller also requires modifing IP/TCP header to make TSO work
1841	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842	 * IP/TCP header into a single buffer to meet the requirement of
1843	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844	 * which also has similiar restrictions.
1845	 */
1846	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847		if (do_tso || (m_head->m_next != NULL &&
1848		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849			if (M_WRITABLE(*m_headp) == 0) {
1850				m_head = m_dup(*m_headp, M_NOWAIT);
1851				m_freem(*m_headp);
1852				if (m_head == NULL) {
1853					*m_headp = NULL;
1854					return (ENOBUFS);
1855				}
1856				*m_headp = m_head;
1857			}
1858		}
1859		/*
1860		 * XXX
1861		 * Assume IPv4, we don't have TSO/checksum offload support
1862		 * for IPv6 yet.
1863		 */
1864		ip_off = sizeof(struct ether_header);
1865		m_head = m_pullup(m_head, ip_off);
1866		if (m_head == NULL) {
1867			*m_headp = NULL;
1868			return (ENOBUFS);
1869		}
1870		eh = mtod(m_head, struct ether_header *);
1871		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872			ip_off = sizeof(struct ether_vlan_header);
1873			m_head = m_pullup(m_head, ip_off);
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878		}
1879		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880		if (m_head == NULL) {
1881			*m_headp = NULL;
1882			return (ENOBUFS);
1883		}
1884		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885		poff = ip_off + (ip->ip_hl << 2);
1886		if (do_tso) {
1887			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888			if (m_head == NULL) {
1889				*m_headp = NULL;
1890				return (ENOBUFS);
1891			}
1892			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			/*
1894			 * TSO workaround:
1895			 *   pull 4 more bytes of data into it.
1896			 */
1897			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903			ip->ip_len = 0;
1904			ip->ip_sum = 0;
1905			/*
1906			 * The pseudo TCP checksum does not include TCP payload
1907			 * length so driver should recompute the checksum here
1908			 * what hardware expect to see. This is adherence of
1909			 * Microsoft's Large Send specification.
1910			 */
1911			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930			if (m_head == NULL) {
1931				*m_headp = NULL;
1932				return (ENOBUFS);
1933			}
1934			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935		}
1936		*m_headp = m_head;
1937	}
1938
1939	/*
1940	 * Map the packet for DMA
1941	 *
1942	 * Capture the first descriptor index,
1943	 * this descriptor will have the index
1944	 * of the EOP which is the only one that
1945	 * now gets a DONE bit writeback.
1946	 */
1947	first = txr->next_avail_desc;
1948	tx_buffer = &txr->tx_buffers[first];
1949	tx_buffer_mapped = tx_buffer;
1950	map = tx_buffer->map;
1951
1952	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955	/*
1956	 * There are two types of errors we can (try) to handle:
1957	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958	 *   out of segments.  Defragment the mbuf chain and try again.
1959	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960	 *   at this point in time.  Defer sending and try again later.
1961	 * All other errors, in particular EINVAL, are fatal and prevent the
1962	 * mbuf chain from ever going through.  Drop it and report error.
1963	 */
1964	if (error == EFBIG && remap) {
1965		struct mbuf *m;
1966
1967		m = m_defrag(*m_headp, M_NOWAIT);
1968		if (m == NULL) {
1969			adapter->mbuf_alloc_failed++;
1970			m_freem(*m_headp);
1971			*m_headp = NULL;
1972			return (ENOBUFS);
1973		}
1974		*m_headp = m;
1975
1976		/* Try it again, but only once */
1977		remap = 0;
1978		goto retry;
1979	} else if (error == ENOMEM) {
1980		adapter->no_tx_dma_setup++;
1981		return (error);
1982	} else if (error != 0) {
1983		adapter->no_tx_dma_setup++;
1984		m_freem(*m_headp);
1985		*m_headp = NULL;
1986		return (error);
1987	}
1988
1989	/*
1990	 * TSO Hardware workaround, if this packet is not
1991	 * TSO, and is only a single descriptor long, and
1992	 * it follows a TSO burst, then we need to add a
1993	 * sentinel descriptor to prevent premature writeback.
1994	 */
1995	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996		if (nsegs == 1)
1997			tso_desc = TRUE;
1998		txr->tx_tso = FALSE;
1999	}
2000
2001        if (nsegs > (txr->tx_avail - 2)) {
2002                txr->no_desc_avail++;
2003		bus_dmamap_unload(txr->txtag, map);
2004		return (ENOBUFS);
2005        }
2006	m_head = *m_headp;
2007
2008	/* Do hardware assists */
2009	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011		    &txd_upper, &txd_lower);
2012		/* we need to make a final sentinel transmit desc */
2013		tso_desc = TRUE;
2014	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015		em_transmit_checksum_setup(txr, m_head,
2016		    ip_off, ip, &txd_upper, &txd_lower);
2017
2018	if (m_head->m_flags & M_VLANTAG) {
2019		/* Set the vlan id. */
2020		txd_upper |=
2021		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                /* Tell hardware to add tag */
2023                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024        }
2025
2026	i = txr->next_avail_desc;
2027
2028	/* Set up our transmit descriptors */
2029	for (j = 0; j < nsegs; j++) {
2030		bus_size_t seg_len;
2031		bus_addr_t seg_addr;
2032
2033		tx_buffer = &txr->tx_buffers[i];
2034		ctxd = &txr->tx_base[i];
2035		seg_addr = segs[j].ds_addr;
2036		seg_len  = segs[j].ds_len;
2037		/*
2038		** TSO Workaround:
2039		** If this is the last descriptor, we want to
2040		** split it so we have a small final sentinel
2041		*/
2042		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043			seg_len -= 4;
2044			ctxd->buffer_addr = htole64(seg_addr);
2045			ctxd->lower.data = htole32(
2046			adapter->txd_cmd | txd_lower | seg_len);
2047			ctxd->upper.data =
2048			    htole32(txd_upper);
2049			if (++i == adapter->num_tx_desc)
2050				i = 0;
2051			/* Now make the sentinel */
2052			++txd_used; /* using an extra txd */
2053			ctxd = &txr->tx_base[i];
2054			tx_buffer = &txr->tx_buffers[i];
2055			ctxd->buffer_addr =
2056			    htole64(seg_addr + seg_len);
2057			ctxd->lower.data = htole32(
2058			adapter->txd_cmd | txd_lower | 4);
2059			ctxd->upper.data =
2060			    htole32(txd_upper);
2061			last = i;
2062			if (++i == adapter->num_tx_desc)
2063				i = 0;
2064		} else {
2065			ctxd->buffer_addr = htole64(seg_addr);
2066			ctxd->lower.data = htole32(
2067			adapter->txd_cmd | txd_lower | seg_len);
2068			ctxd->upper.data =
2069			    htole32(txd_upper);
2070			last = i;
2071			if (++i == adapter->num_tx_desc)
2072				i = 0;
2073		}
2074		tx_buffer->m_head = NULL;
2075		tx_buffer->next_eop = -1;
2076	}
2077
2078	txr->next_avail_desc = i;
2079	txr->tx_avail -= nsegs;
2080	if (tso_desc) /* TSO used an extra for sentinel */
2081		txr->tx_avail -= txd_used;
2082
2083        tx_buffer->m_head = m_head;
2084	/*
2085	** Here we swap the map so the last descriptor,
2086	** which gets the completion interrupt has the
2087	** real map, and the first descriptor gets the
2088	** unused map from this descriptor.
2089	*/
2090	tx_buffer_mapped->map = tx_buffer->map;
2091	tx_buffer->map = map;
2092        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094        /*
2095         * Last Descriptor of Packet
2096	 * needs End Of Packet (EOP)
2097	 * and Report Status (RS)
2098         */
2099        ctxd->lower.data |=
2100	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101	/*
2102	 * Keep track in the first buffer which
2103	 * descriptor will be written back
2104	 */
2105	tx_buffer = &txr->tx_buffers[first];
2106	tx_buffer->next_eop = last;
2107	/* Update the watchdog time early and often */
2108	txr->watchdog_time = ticks;
2109
2110	/*
2111	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112	 * that this frame is available to transmit.
2113	 */
2114	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118	return (0);
2119}
2120
2121static void
2122em_set_promisc(struct adapter *adapter)
2123{
2124	struct ifnet	*ifp = adapter->ifp;
2125	u32		reg_rctl;
2126
2127	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129	if (ifp->if_flags & IFF_PROMISC) {
2130		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131		/* Turn this on if you want to see bad packets */
2132		if (em_debug_sbp)
2133			reg_rctl |= E1000_RCTL_SBP;
2134		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136		reg_rctl |= E1000_RCTL_MPE;
2137		reg_rctl &= ~E1000_RCTL_UPE;
2138		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139	}
2140}
2141
2142static void
2143em_disable_promisc(struct adapter *adapter)
2144{
2145	struct ifnet	*ifp = adapter->ifp;
2146	u32		reg_rctl;
2147	int		mcnt = 0;
2148
2149	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150	reg_rctl &=  (~E1000_RCTL_UPE);
2151	if (ifp->if_flags & IFF_ALLMULTI)
2152		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153	else {
2154		struct  ifmultiaddr *ifma;
2155#if __FreeBSD_version < 800000
2156		IF_ADDR_LOCK(ifp);
2157#else
2158		if_maddr_rlock(ifp);
2159#endif
2160		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161			if (ifma->ifma_addr->sa_family != AF_LINK)
2162				continue;
2163			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164				break;
2165			mcnt++;
2166		}
2167#if __FreeBSD_version < 800000
2168		IF_ADDR_UNLOCK(ifp);
2169#else
2170		if_maddr_runlock(ifp);
2171#endif
2172	}
2173	/* Don't disable if in MAX groups */
2174	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175		reg_rctl &=  (~E1000_RCTL_MPE);
2176	reg_rctl &=  (~E1000_RCTL_SBP);
2177	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178}
2179
2180
2181/*********************************************************************
2182 *  Multicast Update
2183 *
2184 *  This routine is called whenever multicast address list is updated.
2185 *
2186 **********************************************************************/
2187
2188static void
2189em_set_multi(struct adapter *adapter)
2190{
2191	struct ifnet	*ifp = adapter->ifp;
2192	struct ifmultiaddr *ifma;
2193	u32 reg_rctl = 0;
2194	u8  *mta; /* Multicast array memory */
2195	int mcnt = 0;
2196
2197	IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199	mta = adapter->mta;
2200	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202	if (adapter->hw.mac.type == e1000_82542 &&
2203	    adapter->hw.revision_id == E1000_REVISION_2) {
2204		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206			e1000_pci_clear_mwi(&adapter->hw);
2207		reg_rctl |= E1000_RCTL_RST;
2208		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209		msec_delay(5);
2210	}
2211
2212#if __FreeBSD_version < 800000
2213	IF_ADDR_LOCK(ifp);
2214#else
2215	if_maddr_rlock(ifp);
2216#endif
2217	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218		if (ifma->ifma_addr->sa_family != AF_LINK)
2219			continue;
2220
2221		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222			break;
2223
2224		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226		mcnt++;
2227	}
2228#if __FreeBSD_version < 800000
2229	IF_ADDR_UNLOCK(ifp);
2230#else
2231	if_maddr_runlock(ifp);
2232#endif
2233	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235		reg_rctl |= E1000_RCTL_MPE;
2236		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237	} else
2238		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240	if (adapter->hw.mac.type == e1000_82542 &&
2241	    adapter->hw.revision_id == E1000_REVISION_2) {
2242		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243		reg_rctl &= ~E1000_RCTL_RST;
2244		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245		msec_delay(5);
2246		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247			e1000_pci_set_mwi(&adapter->hw);
2248	}
2249}
2250
2251
2252/*********************************************************************
2253 *  Timer routine
2254 *
2255 *  This routine checks for link status and updates statistics.
2256 *
2257 **********************************************************************/
2258
2259static void
2260em_local_timer(void *arg)
2261{
2262	struct adapter	*adapter = arg;
2263	struct ifnet	*ifp = adapter->ifp;
2264	struct tx_ring	*txr = adapter->tx_rings;
2265	struct rx_ring	*rxr = adapter->rx_rings;
2266	u32		trigger;
2267
2268	EM_CORE_LOCK_ASSERT(adapter);
2269
2270	em_update_link_status(adapter);
2271	em_update_stats_counters(adapter);
2272
2273	/* Reset LAA into RAR[0] on 82571 */
2274	if ((adapter->hw.mac.type == e1000_82571) &&
2275	    e1000_get_laa_state_82571(&adapter->hw))
2276		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278	/* Mask to use in the irq trigger */
2279	if (adapter->msix_mem)
2280		trigger = rxr->ims;
2281	else
2282		trigger = E1000_ICS_RXDMT0;
2283
2284	/*
2285	** Check on the state of the TX queue(s), this
2286	** can be done without the lock because its RO
2287	** and the HUNG state will be static if set.
2288	*/
2289	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291		    (adapter->pause_frames == 0))
2292			goto hung;
2293		/* Schedule a TX tasklet if needed */
2294		if (txr->tx_avail <= EM_MAX_SCATTER)
2295			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296	}
2297
2298	adapter->pause_frames = 0;
2299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300#ifndef DEVICE_POLLING
2301	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303#endif
2304	return;
2305hung:
2306	/* Looks like we're hung */
2307	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308	device_printf(adapter->dev,
2309	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313	    "Next TX to Clean = %d\n",
2314	    txr->me, txr->tx_avail, txr->next_to_clean);
2315	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316	adapter->watchdog_events++;
2317	adapter->pause_frames = 0;
2318	em_init_locked(adapter);
2319}
2320
2321
2322static void
2323em_update_link_status(struct adapter *adapter)
2324{
2325	struct e1000_hw *hw = &adapter->hw;
2326	struct ifnet *ifp = adapter->ifp;
2327	device_t dev = adapter->dev;
2328	struct tx_ring *txr = adapter->tx_rings;
2329	u32 link_check = 0;
2330
2331	/* Get the cached link value or read phy for real */
2332	switch (hw->phy.media_type) {
2333	case e1000_media_type_copper:
2334		if (hw->mac.get_link_status) {
2335			/* Do the work to read phy */
2336			e1000_check_for_link(hw);
2337			link_check = !hw->mac.get_link_status;
2338			if (link_check) /* ESB2 fix */
2339				e1000_cfg_on_link_up(hw);
2340		} else
2341			link_check = TRUE;
2342		break;
2343	case e1000_media_type_fiber:
2344		e1000_check_for_link(hw);
2345		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                 E1000_STATUS_LU);
2347		break;
2348	case e1000_media_type_internal_serdes:
2349		e1000_check_for_link(hw);
2350		link_check = adapter->hw.mac.serdes_has_link;
2351		break;
2352	default:
2353	case e1000_media_type_unknown:
2354		break;
2355	}
2356
2357	/* Now check for a transition */
2358	if (link_check && (adapter->link_active == 0)) {
2359		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360		    &adapter->link_duplex);
2361		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362		if ((adapter->link_speed != SPEED_1000) &&
2363		    ((hw->mac.type == e1000_82571) ||
2364		    (hw->mac.type == e1000_82572))) {
2365			int tarc0;
2366			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367			tarc0 &= ~SPEED_MODE_BIT;
2368			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369		}
2370		if (bootverbose)
2371			device_printf(dev, "Link is up %d Mbps %s\n",
2372			    adapter->link_speed,
2373			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374			    "Full Duplex" : "Half Duplex"));
2375		adapter->link_active = 1;
2376		adapter->smartspeed = 0;
2377		ifp->if_baudrate = adapter->link_speed * 1000000;
2378		if_link_state_change(ifp, LINK_STATE_UP);
2379	} else if (!link_check && (adapter->link_active == 1)) {
2380		ifp->if_baudrate = adapter->link_speed = 0;
2381		adapter->link_duplex = 0;
2382		if (bootverbose)
2383			device_printf(dev, "Link is Down\n");
2384		adapter->link_active = 0;
2385		/* Link down, disable watchdog */
2386		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387			txr->queue_status = EM_QUEUE_IDLE;
2388		if_link_state_change(ifp, LINK_STATE_DOWN);
2389	}
2390}
2391
2392/*********************************************************************
2393 *
2394 *  This routine disables all traffic on the adapter by issuing a
2395 *  global reset on the MAC and deallocates TX/RX buffers.
2396 *
2397 *  This routine should always be called with BOTH the CORE
2398 *  and TX locks.
2399 **********************************************************************/
2400
2401static void
2402em_stop(void *arg)
2403{
2404	struct adapter	*adapter = arg;
2405	struct ifnet	*ifp = adapter->ifp;
2406	struct tx_ring	*txr = adapter->tx_rings;
2407
2408	EM_CORE_LOCK_ASSERT(adapter);
2409
2410	INIT_DEBUGOUT("em_stop: begin");
2411
2412	em_disable_intr(adapter);
2413	callout_stop(&adapter->timer);
2414
2415	/* Tell the stack that the interface is no longer active */
2416	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419        /* Unarm watchdog timer. */
2420	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421		EM_TX_LOCK(txr);
2422		txr->queue_status = EM_QUEUE_IDLE;
2423		EM_TX_UNLOCK(txr);
2424	}
2425
2426	e1000_reset_hw(&adapter->hw);
2427	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429	e1000_led_off(&adapter->hw);
2430	e1000_cleanup_led(&adapter->hw);
2431}
2432
2433
2434/*********************************************************************
2435 *
2436 *  Determine hardware revision.
2437 *
2438 **********************************************************************/
2439static void
2440em_identify_hardware(struct adapter *adapter)
2441{
2442	device_t dev = adapter->dev;
2443
2444	/* Make sure our PCI config space has the necessary stuff set */
2445	pci_enable_busmaster(dev);
2446	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447
2448	/* Save off the information about this board */
2449	adapter->hw.vendor_id = pci_get_vendor(dev);
2450	adapter->hw.device_id = pci_get_device(dev);
2451	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452	adapter->hw.subsystem_vendor_id =
2453	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454	adapter->hw.subsystem_device_id =
2455	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456
2457	/* Do Shared Code Init and Setup */
2458	if (e1000_set_mac_type(&adapter->hw)) {
2459		device_printf(dev, "Setup init failure\n");
2460		return;
2461	}
2462}
2463
2464static int
2465em_allocate_pci_resources(struct adapter *adapter)
2466{
2467	device_t	dev = adapter->dev;
2468	int		rid;
2469
2470	rid = PCIR_BAR(0);
2471	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472	    &rid, RF_ACTIVE);
2473	if (adapter->memory == NULL) {
2474		device_printf(dev, "Unable to allocate bus resource: memory\n");
2475		return (ENXIO);
2476	}
2477	adapter->osdep.mem_bus_space_tag =
2478	    rman_get_bustag(adapter->memory);
2479	adapter->osdep.mem_bus_space_handle =
2480	    rman_get_bushandle(adapter->memory);
2481	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482
2483	/* Default to a single queue */
2484	adapter->num_queues = 1;
2485
2486	/*
2487	 * Setup MSI/X or MSI if PCI Express
2488	 */
2489	adapter->msix = em_setup_msix(adapter);
2490
2491	adapter->hw.back = &adapter->osdep;
2492
2493	return (0);
2494}
2495
2496/*********************************************************************
2497 *
2498 *  Setup the Legacy or MSI Interrupt handler
2499 *
2500 **********************************************************************/
2501int
2502em_allocate_legacy(struct adapter *adapter)
2503{
2504	device_t dev = adapter->dev;
2505	struct tx_ring	*txr = adapter->tx_rings;
2506	int error, rid = 0;
2507
2508	/* Manually turn off all interrupts */
2509	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510
2511	if (adapter->msix == 1) /* using MSI */
2512		rid = 1;
2513	/* We allocate a single interrupt resource */
2514	adapter->res = bus_alloc_resource_any(dev,
2515	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516	if (adapter->res == NULL) {
2517		device_printf(dev, "Unable to allocate bus resource: "
2518		    "interrupt\n");
2519		return (ENXIO);
2520	}
2521
2522	/*
2523	 * Allocate a fast interrupt and the associated
2524	 * deferred processing contexts.
2525	 */
2526	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528	    taskqueue_thread_enqueue, &adapter->tq);
2529	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530	    device_get_nameunit(adapter->dev));
2531	/* Use a TX only tasklet for local timer */
2532	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534	    taskqueue_thread_enqueue, &txr->tq);
2535	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536	    device_get_nameunit(adapter->dev));
2537	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540		device_printf(dev, "Failed to register fast interrupt "
2541			    "handler: %d\n", error);
2542		taskqueue_free(adapter->tq);
2543		adapter->tq = NULL;
2544		return (error);
2545	}
2546
2547	return (0);
2548}
2549
2550/*********************************************************************
2551 *
2552 *  Setup the MSIX Interrupt handlers
2553 *   This is not really Multiqueue, rather
2554 *   its just seperate interrupt vectors
2555 *   for TX, RX, and Link.
2556 *
2557 **********************************************************************/
2558int
2559em_allocate_msix(struct adapter *adapter)
2560{
2561	device_t	dev = adapter->dev;
2562	struct		tx_ring *txr = adapter->tx_rings;
2563	struct		rx_ring *rxr = adapter->rx_rings;
2564	int		error, rid, vector = 0;
2565
2566
2567	/* Make sure all interrupts are disabled */
2568	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569
2570	/* First set up ring resources */
2571	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572
2573		/* RX ring */
2574		rid = vector + 1;
2575
2576		rxr->res = bus_alloc_resource_any(dev,
2577		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2578		if (rxr->res == NULL) {
2579			device_printf(dev,
2580			    "Unable to allocate bus resource: "
2581			    "RX MSIX Interrupt %d\n", i);
2582			return (ENXIO);
2583		}
2584		if ((error = bus_setup_intr(dev, rxr->res,
2585		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586		    rxr, &rxr->tag)) != 0) {
2587			device_printf(dev, "Failed to register RX handler");
2588			return (error);
2589		}
2590#if __FreeBSD_version >= 800504
2591		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592#endif
2593		rxr->msix = vector++; /* NOTE increment vector for TX */
2594		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596		    taskqueue_thread_enqueue, &rxr->tq);
2597		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598		    device_get_nameunit(adapter->dev));
2599		/*
2600		** Set the bit to enable interrupt
2601		** in E1000_IMS -- bits 20 and 21
2602		** are for RX0 and RX1, note this has
2603		** NOTHING to do with the MSIX vector
2604		*/
2605		rxr->ims = 1 << (20 + i);
2606		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607
2608		/* TX ring */
2609		rid = vector + 1;
2610		txr->res = bus_alloc_resource_any(dev,
2611		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612		if (txr->res == NULL) {
2613			device_printf(dev,
2614			    "Unable to allocate bus resource: "
2615			    "TX MSIX Interrupt %d\n", i);
2616			return (ENXIO);
2617		}
2618		if ((error = bus_setup_intr(dev, txr->res,
2619		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620		    txr, &txr->tag)) != 0) {
2621			device_printf(dev, "Failed to register TX handler");
2622			return (error);
2623		}
2624#if __FreeBSD_version >= 800504
2625		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626#endif
2627		txr->msix = vector++; /* Increment vector for next pass */
2628		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630		    taskqueue_thread_enqueue, &txr->tq);
2631		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632		    device_get_nameunit(adapter->dev));
2633		/*
2634		** Set the bit to enable interrupt
2635		** in E1000_IMS -- bits 22 and 23
2636		** are for TX0 and TX1, note this has
2637		** NOTHING to do with the MSIX vector
2638		*/
2639		txr->ims = 1 << (22 + i);
2640		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641	}
2642
2643	/* Link interrupt */
2644	++rid;
2645	adapter->res = bus_alloc_resource_any(dev,
2646	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2647	if (!adapter->res) {
2648		device_printf(dev,"Unable to allocate "
2649		    "bus resource: Link interrupt [%d]\n", rid);
2650		return (ENXIO);
2651        }
2652	/* Set the link handler function */
2653	error = bus_setup_intr(dev, adapter->res,
2654	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655	    em_msix_link, adapter, &adapter->tag);
2656	if (error) {
2657		adapter->res = NULL;
2658		device_printf(dev, "Failed to register LINK handler");
2659		return (error);
2660	}
2661#if __FreeBSD_version >= 800504
2662		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663#endif
2664	adapter->linkvec = vector;
2665	adapter->ivars |=  (8 | vector) << 16;
2666	adapter->ivars |= 0x80000000;
2667
2668	return (0);
2669}
2670
2671
2672static void
2673em_free_pci_resources(struct adapter *adapter)
2674{
2675	device_t	dev = adapter->dev;
2676	struct tx_ring	*txr;
2677	struct rx_ring	*rxr;
2678	int		rid;
2679
2680
2681	/*
2682	** Release all the queue interrupt resources:
2683	*/
2684	for (int i = 0; i < adapter->num_queues; i++) {
2685		txr = &adapter->tx_rings[i];
2686		rxr = &adapter->rx_rings[i];
2687		/* an early abort? */
2688		if ((txr == NULL) || (rxr == NULL))
2689			break;
2690		rid = txr->msix +1;
2691		if (txr->tag != NULL) {
2692			bus_teardown_intr(dev, txr->res, txr->tag);
2693			txr->tag = NULL;
2694		}
2695		if (txr->res != NULL)
2696			bus_release_resource(dev, SYS_RES_IRQ,
2697			    rid, txr->res);
2698		rid = rxr->msix +1;
2699		if (rxr->tag != NULL) {
2700			bus_teardown_intr(dev, rxr->res, rxr->tag);
2701			rxr->tag = NULL;
2702		}
2703		if (rxr->res != NULL)
2704			bus_release_resource(dev, SYS_RES_IRQ,
2705			    rid, rxr->res);
2706	}
2707
2708        if (adapter->linkvec) /* we are doing MSIX */
2709                rid = adapter->linkvec + 1;
2710        else
2711                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712
2713	if (adapter->tag != NULL) {
2714		bus_teardown_intr(dev, adapter->res, adapter->tag);
2715		adapter->tag = NULL;
2716	}
2717
2718	if (adapter->res != NULL)
2719		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720
2721
2722	if (adapter->msix)
2723		pci_release_msi(dev);
2724
2725	if (adapter->msix_mem != NULL)
2726		bus_release_resource(dev, SYS_RES_MEMORY,
2727		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728
2729	if (adapter->memory != NULL)
2730		bus_release_resource(dev, SYS_RES_MEMORY,
2731		    PCIR_BAR(0), adapter->memory);
2732
2733	if (adapter->flash != NULL)
2734		bus_release_resource(dev, SYS_RES_MEMORY,
2735		    EM_FLASH, adapter->flash);
2736}
2737
2738/*
2739 * Setup MSI or MSI/X
2740 */
2741static int
2742em_setup_msix(struct adapter *adapter)
2743{
2744	device_t dev = adapter->dev;
2745	int val;
2746
2747	/*
2748	** Setup MSI/X for Hartwell: tests have shown
2749	** use of two queues to be unstable, and to
2750	** provide no great gain anyway, so we simply
2751	** seperate the interrupts and use a single queue.
2752	*/
2753	if ((adapter->hw.mac.type == e1000_82574) &&
2754	    (em_enable_msix == TRUE)) {
2755		/* Map the MSIX BAR */
2756		int rid = PCIR_BAR(EM_MSIX_BAR);
2757		adapter->msix_mem = bus_alloc_resource_any(dev,
2758		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759       		if (adapter->msix_mem == NULL) {
2760			/* May not be enabled */
2761               		device_printf(adapter->dev,
2762			    "Unable to map MSIX table \n");
2763			goto msi;
2764       		}
2765		val = pci_msix_count(dev);
2766		/* We only need/want 3 vectors */
2767		if (val >= 3)
2768			val = 3;
2769		else {
2770               		device_printf(adapter->dev,
2771			    "MSIX: insufficient vectors, using MSI\n");
2772			goto msi;
2773		}
2774
2775		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2776			device_printf(adapter->dev,
2777			    "Using MSIX interrupts "
2778			    "with %d vectors\n", val);
2779			return (val);
2780		}
2781
2782		/*
2783		** If MSIX alloc failed or provided us with
2784		** less than needed, free and fall through to MSI
2785		*/
2786		pci_release_msi(dev);
2787	}
2788msi:
2789	if (adapter->msix_mem != NULL) {
2790		bus_release_resource(dev, SYS_RES_MEMORY,
2791		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2792		adapter->msix_mem = NULL;
2793	}
2794       	val = 1;
2795       	if (pci_alloc_msi(dev, &val) == 0) {
2796               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2797		return (val);
2798	}
2799	/* Should only happen due to manual configuration */
2800	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2801	return (0);
2802}
2803
2804
2805/*********************************************************************
2806 *
2807 *  Initialize the hardware to a configuration
2808 *  as specified by the adapter structure.
2809 *
2810 **********************************************************************/
2811static void
2812em_reset(struct adapter *adapter)
2813{
2814	device_t	dev = adapter->dev;
2815	struct ifnet	*ifp = adapter->ifp;
2816	struct e1000_hw	*hw = &adapter->hw;
2817	u16		rx_buffer_size;
2818	u32		pba;
2819
2820	INIT_DEBUGOUT("em_reset: begin");
2821
2822	/* Set up smart power down as default off on newer adapters. */
2823	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2824	    hw->mac.type == e1000_82572)) {
2825		u16 phy_tmp = 0;
2826
2827		/* Speed up time to link by disabling smart power down. */
2828		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2829		phy_tmp &= ~IGP02E1000_PM_SPD;
2830		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2831	}
2832
2833	/*
2834	 * Packet Buffer Allocation (PBA)
2835	 * Writing PBA sets the receive portion of the buffer
2836	 * the remainder is used for the transmit buffer.
2837	 */
2838	switch (hw->mac.type) {
2839	/* Total Packet Buffer on these is 48K */
2840	case e1000_82571:
2841	case e1000_82572:
2842	case e1000_80003es2lan:
2843			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2844		break;
2845	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2846			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2847		break;
2848	case e1000_82574:
2849	case e1000_82583:
2850			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2851		break;
2852	case e1000_ich8lan:
2853		pba = E1000_PBA_8K;
2854		break;
2855	case e1000_ich9lan:
2856	case e1000_ich10lan:
2857		/* Boost Receive side for jumbo frames */
2858		if (adapter->hw.mac.max_frame_size > 4096)
2859			pba = E1000_PBA_14K;
2860		else
2861			pba = E1000_PBA_10K;
2862		break;
2863	case e1000_pchlan:
2864	case e1000_pch2lan:
2865	case e1000_pch_lpt:
2866		pba = E1000_PBA_26K;
2867		break;
2868	default:
2869		if (adapter->hw.mac.max_frame_size > 8192)
2870			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2871		else
2872			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2873	}
2874	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2875
2876	/*
2877	 * These parameters control the automatic generation (Tx) and
2878	 * response (Rx) to Ethernet PAUSE frames.
2879	 * - High water mark should allow for at least two frames to be
2880	 *   received after sending an XOFF.
2881	 * - Low water mark works best when it is very near the high water mark.
2882	 *   This allows the receiver to restart by sending XON when it has
2883	 *   drained a bit. Here we use an arbitary value of 1500 which will
2884	 *   restart after one full frame is pulled from the buffer. There
2885	 *   could be several smaller frames in the buffer and if so they will
2886	 *   not trigger the XON until their total number reduces the buffer
2887	 *   by 1500.
2888	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2889	 */
2890	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2891	hw->fc.high_water = rx_buffer_size -
2892	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2893	hw->fc.low_water = hw->fc.high_water - 1500;
2894
2895	if (adapter->fc) /* locally set flow control value? */
2896		hw->fc.requested_mode = adapter->fc;
2897	else
2898		hw->fc.requested_mode = e1000_fc_full;
2899
2900	if (hw->mac.type == e1000_80003es2lan)
2901		hw->fc.pause_time = 0xFFFF;
2902	else
2903		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2904
2905	hw->fc.send_xon = TRUE;
2906
2907	/* Device specific overrides/settings */
2908	switch (hw->mac.type) {
2909	case e1000_pchlan:
2910		/* Workaround: no TX flow ctrl for PCH */
2911                hw->fc.requested_mode = e1000_fc_rx_pause;
2912		hw->fc.pause_time = 0xFFFF; /* override */
2913		if (ifp->if_mtu > ETHERMTU) {
2914			hw->fc.high_water = 0x3500;
2915			hw->fc.low_water = 0x1500;
2916		} else {
2917			hw->fc.high_water = 0x5000;
2918			hw->fc.low_water = 0x3000;
2919		}
2920		hw->fc.refresh_time = 0x1000;
2921		break;
2922	case e1000_pch2lan:
2923	case e1000_pch_lpt:
2924		hw->fc.high_water = 0x5C20;
2925		hw->fc.low_water = 0x5048;
2926		hw->fc.pause_time = 0x0650;
2927		hw->fc.refresh_time = 0x0400;
2928		/* Jumbos need adjusted PBA */
2929		if (ifp->if_mtu > ETHERMTU)
2930			E1000_WRITE_REG(hw, E1000_PBA, 12);
2931		else
2932			E1000_WRITE_REG(hw, E1000_PBA, 26);
2933		break;
2934        case e1000_ich9lan:
2935        case e1000_ich10lan:
2936		if (ifp->if_mtu > ETHERMTU) {
2937			hw->fc.high_water = 0x2800;
2938			hw->fc.low_water = hw->fc.high_water - 8;
2939			break;
2940		}
2941		/* else fall thru */
2942	default:
2943		if (hw->mac.type == e1000_80003es2lan)
2944			hw->fc.pause_time = 0xFFFF;
2945		break;
2946	}
2947
2948	/* Issue a global reset */
2949	e1000_reset_hw(hw);
2950	E1000_WRITE_REG(hw, E1000_WUC, 0);
2951	em_disable_aspm(adapter);
2952	/* and a re-init */
2953	if (e1000_init_hw(hw) < 0) {
2954		device_printf(dev, "Hardware Initialization Failed\n");
2955		return;
2956	}
2957
2958	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2959	e1000_get_phy_info(hw);
2960	e1000_check_for_link(hw);
2961	return;
2962}
2963
2964/*********************************************************************
2965 *
2966 *  Setup networking device structure and register an interface.
2967 *
2968 **********************************************************************/
2969static int
2970em_setup_interface(device_t dev, struct adapter *adapter)
2971{
2972	struct ifnet   *ifp;
2973
2974	INIT_DEBUGOUT("em_setup_interface: begin");
2975
2976	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2977	if (ifp == NULL) {
2978		device_printf(dev, "can not allocate ifnet structure\n");
2979		return (-1);
2980	}
2981	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2982	ifp->if_init =  em_init;
2983	ifp->if_softc = adapter;
2984	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2985	ifp->if_ioctl = em_ioctl;
2986#ifdef EM_MULTIQUEUE
2987	/* Multiqueue stack interface */
2988	ifp->if_transmit = em_mq_start;
2989	ifp->if_qflush = em_qflush;
2990#else
2991	ifp->if_start = em_start;
2992	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2993	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2994	IFQ_SET_READY(&ifp->if_snd);
2995#endif
2996
2997	ether_ifattach(ifp, adapter->hw.mac.addr);
2998
2999	ifp->if_capabilities = ifp->if_capenable = 0;
3000
3001
3002	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3003	ifp->if_capabilities |= IFCAP_TSO4;
3004	/*
3005	 * Tell the upper layer(s) we
3006	 * support full VLAN capability
3007	 */
3008	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3009	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3010			     |  IFCAP_VLAN_HWTSO
3011			     |  IFCAP_VLAN_MTU;
3012	ifp->if_capenable = ifp->if_capabilities;
3013
3014	/*
3015	** Don't turn this on by default, if vlans are
3016	** created on another pseudo device (eg. lagg)
3017	** then vlan events are not passed thru, breaking
3018	** operation, but with HW FILTER off it works. If
3019	** using vlans directly on the em driver you can
3020	** enable this and get full hardware tag filtering.
3021	*/
3022	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3023
3024#ifdef DEVICE_POLLING
3025	ifp->if_capabilities |= IFCAP_POLLING;
3026#endif
3027
3028	/* Enable only WOL MAGIC by default */
3029	if (adapter->wol) {
3030		ifp->if_capabilities |= IFCAP_WOL;
3031		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3032	}
3033
3034	/*
3035	 * Specify the media types supported by this adapter and register
3036	 * callbacks to update media and link information
3037	 */
3038	ifmedia_init(&adapter->media, IFM_IMASK,
3039	    em_media_change, em_media_status);
3040	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3041	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3042		u_char fiber_type = IFM_1000_SX;	/* default type */
3043
3044		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3045			    0, NULL);
3046		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3047	} else {
3048		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3049		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3050			    0, NULL);
3051		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3052			    0, NULL);
3053		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3054			    0, NULL);
3055		if (adapter->hw.phy.type != e1000_phy_ife) {
3056			ifmedia_add(&adapter->media,
3057				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3058			ifmedia_add(&adapter->media,
3059				IFM_ETHER | IFM_1000_T, 0, NULL);
3060		}
3061	}
3062	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3063	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3064	return (0);
3065}
3066
3067
3068/*
3069 * Manage DMA'able memory.
3070 */
3071static void
3072em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3073{
3074	if (error)
3075		return;
3076	*(bus_addr_t *) arg = segs[0].ds_addr;
3077}
3078
3079static int
3080em_dma_malloc(struct adapter *adapter, bus_size_t size,
3081        struct em_dma_alloc *dma, int mapflags)
3082{
3083	int error;
3084
3085	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3086				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3087				BUS_SPACE_MAXADDR,	/* lowaddr */
3088				BUS_SPACE_MAXADDR,	/* highaddr */
3089				NULL, NULL,		/* filter, filterarg */
3090				size,			/* maxsize */
3091				1,			/* nsegments */
3092				size,			/* maxsegsize */
3093				0,			/* flags */
3094				NULL,			/* lockfunc */
3095				NULL,			/* lockarg */
3096				&dma->dma_tag);
3097	if (error) {
3098		device_printf(adapter->dev,
3099		    "%s: bus_dma_tag_create failed: %d\n",
3100		    __func__, error);
3101		goto fail_0;
3102	}
3103
3104	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3105	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3106	if (error) {
3107		device_printf(adapter->dev,
3108		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3109		    __func__, (uintmax_t)size, error);
3110		goto fail_2;
3111	}
3112
3113	dma->dma_paddr = 0;
3114	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3115	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3116	if (error || dma->dma_paddr == 0) {
3117		device_printf(adapter->dev,
3118		    "%s: bus_dmamap_load failed: %d\n",
3119		    __func__, error);
3120		goto fail_3;
3121	}
3122
3123	return (0);
3124
3125fail_3:
3126	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3127fail_2:
3128	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3129	bus_dma_tag_destroy(dma->dma_tag);
3130fail_0:
3131	dma->dma_map = NULL;
3132	dma->dma_tag = NULL;
3133
3134	return (error);
3135}
3136
3137static void
3138em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3139{
3140	if (dma->dma_tag == NULL)
3141		return;
3142	if (dma->dma_map != NULL) {
3143		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3144		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3145		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3146		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3147		dma->dma_map = NULL;
3148	}
3149	bus_dma_tag_destroy(dma->dma_tag);
3150	dma->dma_tag = NULL;
3151}
3152
3153
3154/*********************************************************************
3155 *
3156 *  Allocate memory for the transmit and receive rings, and then
3157 *  the descriptors associated with each, called only once at attach.
3158 *
3159 **********************************************************************/
3160static int
3161em_allocate_queues(struct adapter *adapter)
3162{
3163	device_t		dev = adapter->dev;
3164	struct tx_ring		*txr = NULL;
3165	struct rx_ring		*rxr = NULL;
3166	int rsize, tsize, error = E1000_SUCCESS;
3167	int txconf = 0, rxconf = 0;
3168
3169
3170	/* Allocate the TX ring struct memory */
3171	if (!(adapter->tx_rings =
3172	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3173	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3174		device_printf(dev, "Unable to allocate TX ring memory\n");
3175		error = ENOMEM;
3176		goto fail;
3177	}
3178
3179	/* Now allocate the RX */
3180	if (!(adapter->rx_rings =
3181	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3182	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183		device_printf(dev, "Unable to allocate RX ring memory\n");
3184		error = ENOMEM;
3185		goto rx_fail;
3186	}
3187
3188	tsize = roundup2(adapter->num_tx_desc *
3189	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3190	/*
3191	 * Now set up the TX queues, txconf is needed to handle the
3192	 * possibility that things fail midcourse and we need to
3193	 * undo memory gracefully
3194	 */
3195	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3196		/* Set up some basics */
3197		txr = &adapter->tx_rings[i];
3198		txr->adapter = adapter;
3199		txr->me = i;
3200
3201		/* Initialize the TX lock */
3202		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3203		    device_get_nameunit(dev), txr->me);
3204		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3205
3206		if (em_dma_malloc(adapter, tsize,
3207			&txr->txdma, BUS_DMA_NOWAIT)) {
3208			device_printf(dev,
3209			    "Unable to allocate TX Descriptor memory\n");
3210			error = ENOMEM;
3211			goto err_tx_desc;
3212		}
3213		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3214		bzero((void *)txr->tx_base, tsize);
3215
3216        	if (em_allocate_transmit_buffers(txr)) {
3217			device_printf(dev,
3218			    "Critical Failure setting up transmit buffers\n");
3219			error = ENOMEM;
3220			goto err_tx_desc;
3221        	}
3222#if __FreeBSD_version >= 800000
3223		/* Allocate a buf ring */
3224		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3225		    M_WAITOK, &txr->tx_mtx);
3226#endif
3227	}
3228
3229	/*
3230	 * Next the RX queues...
3231	 */
3232	rsize = roundup2(adapter->num_rx_desc *
3233	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3234	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3235		rxr = &adapter->rx_rings[i];
3236		rxr->adapter = adapter;
3237		rxr->me = i;
3238
3239		/* Initialize the RX lock */
3240		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3241		    device_get_nameunit(dev), txr->me);
3242		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3243
3244		if (em_dma_malloc(adapter, rsize,
3245			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3246			device_printf(dev,
3247			    "Unable to allocate RxDescriptor memory\n");
3248			error = ENOMEM;
3249			goto err_rx_desc;
3250		}
3251		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3252		bzero((void *)rxr->rx_base, rsize);
3253
3254        	/* Allocate receive buffers for the ring*/
3255		if (em_allocate_receive_buffers(rxr)) {
3256			device_printf(dev,
3257			    "Critical Failure setting up receive buffers\n");
3258			error = ENOMEM;
3259			goto err_rx_desc;
3260		}
3261	}
3262
3263	return (0);
3264
3265err_rx_desc:
3266	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3267		em_dma_free(adapter, &rxr->rxdma);
3268err_tx_desc:
3269	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3270		em_dma_free(adapter, &txr->txdma);
3271	free(adapter->rx_rings, M_DEVBUF);
3272rx_fail:
3273#if __FreeBSD_version >= 800000
3274	buf_ring_free(txr->br, M_DEVBUF);
3275#endif
3276	free(adapter->tx_rings, M_DEVBUF);
3277fail:
3278	return (error);
3279}
3280
3281
3282/*********************************************************************
3283 *
3284 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3285 *  the information needed to transmit a packet on the wire. This is
3286 *  called only once at attach, setup is done every reset.
3287 *
3288 **********************************************************************/
3289static int
3290em_allocate_transmit_buffers(struct tx_ring *txr)
3291{
3292	struct adapter *adapter = txr->adapter;
3293	device_t dev = adapter->dev;
3294	struct em_buffer *txbuf;
3295	int error, i;
3296
3297	/*
3298	 * Setup DMA descriptor areas.
3299	 */
3300	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3301			       1, 0,			/* alignment, bounds */
3302			       BUS_SPACE_MAXADDR,	/* lowaddr */
3303			       BUS_SPACE_MAXADDR,	/* highaddr */
3304			       NULL, NULL,		/* filter, filterarg */
3305			       EM_TSO_SIZE,		/* maxsize */
3306			       EM_MAX_SCATTER,		/* nsegments */
3307			       PAGE_SIZE,		/* maxsegsize */
3308			       0,			/* flags */
3309			       NULL,			/* lockfunc */
3310			       NULL,			/* lockfuncarg */
3311			       &txr->txtag))) {
3312		device_printf(dev,"Unable to allocate TX DMA tag\n");
3313		goto fail;
3314	}
3315
3316	if (!(txr->tx_buffers =
3317	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3318	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3319		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3320		error = ENOMEM;
3321		goto fail;
3322	}
3323
3324        /* Create the descriptor buffer dma maps */
3325	txbuf = txr->tx_buffers;
3326	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3327		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3328		if (error != 0) {
3329			device_printf(dev, "Unable to create TX DMA map\n");
3330			goto fail;
3331		}
3332	}
3333
3334	return 0;
3335fail:
3336	/* We free all, it handles case where we are in the middle */
3337	em_free_transmit_structures(adapter);
3338	return (error);
3339}
3340
3341/*********************************************************************
3342 *
3343 *  Initialize a transmit ring.
3344 *
3345 **********************************************************************/
3346static void
3347em_setup_transmit_ring(struct tx_ring *txr)
3348{
3349	struct adapter *adapter = txr->adapter;
3350	struct em_buffer *txbuf;
3351	int i;
3352#ifdef DEV_NETMAP
3353	struct netmap_adapter *na = NA(adapter->ifp);
3354	struct netmap_slot *slot;
3355#endif /* DEV_NETMAP */
3356
3357	/* Clear the old descriptor contents */
3358	EM_TX_LOCK(txr);
3359#ifdef DEV_NETMAP
3360	slot = netmap_reset(na, NR_TX, txr->me, 0);
3361#endif /* DEV_NETMAP */
3362
3363	bzero((void *)txr->tx_base,
3364	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3365	/* Reset indices */
3366	txr->next_avail_desc = 0;
3367	txr->next_to_clean = 0;
3368
3369	/* Free any existing tx buffers. */
3370        txbuf = txr->tx_buffers;
3371	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3372		if (txbuf->m_head != NULL) {
3373			bus_dmamap_sync(txr->txtag, txbuf->map,
3374			    BUS_DMASYNC_POSTWRITE);
3375			bus_dmamap_unload(txr->txtag, txbuf->map);
3376			m_freem(txbuf->m_head);
3377			txbuf->m_head = NULL;
3378		}
3379#ifdef DEV_NETMAP
3380		if (slot) {
3381			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3382			uint64_t paddr;
3383			void *addr;
3384
3385			addr = PNMB(slot + si, &paddr);
3386			txr->tx_base[i].buffer_addr = htole64(paddr);
3387			/* reload the map for netmap mode */
3388			netmap_load_map(txr->txtag, txbuf->map, addr);
3389		}
3390#endif /* DEV_NETMAP */
3391
3392		/* clear the watch index */
3393		txbuf->next_eop = -1;
3394        }
3395
3396	/* Set number of descriptors available */
3397	txr->tx_avail = adapter->num_tx_desc;
3398	txr->queue_status = EM_QUEUE_IDLE;
3399
3400	/* Clear checksum offload context. */
3401	txr->last_hw_offload = 0;
3402	txr->last_hw_ipcss = 0;
3403	txr->last_hw_ipcso = 0;
3404	txr->last_hw_tucss = 0;
3405	txr->last_hw_tucso = 0;
3406
3407	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3408	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3409	EM_TX_UNLOCK(txr);
3410}
3411
3412/*********************************************************************
3413 *
3414 *  Initialize all transmit rings.
3415 *
3416 **********************************************************************/
3417static void
3418em_setup_transmit_structures(struct adapter *adapter)
3419{
3420	struct tx_ring *txr = adapter->tx_rings;
3421
3422	for (int i = 0; i < adapter->num_queues; i++, txr++)
3423		em_setup_transmit_ring(txr);
3424
3425	return;
3426}
3427
3428/*********************************************************************
3429 *
3430 *  Enable transmit unit.
3431 *
3432 **********************************************************************/
3433static void
3434em_initialize_transmit_unit(struct adapter *adapter)
3435{
3436	struct tx_ring	*txr = adapter->tx_rings;
3437	struct e1000_hw	*hw = &adapter->hw;
3438	u32	tctl, tarc, tipg = 0;
3439
3440	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3441
3442	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3443		u64 bus_addr = txr->txdma.dma_paddr;
3444		/* Base and Len of TX Ring */
3445		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3446	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3447		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3448	    	    (u32)(bus_addr >> 32));
3449		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3450	    	    (u32)bus_addr);
3451		/* Init the HEAD/TAIL indices */
3452		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3453		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3454
3455		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3456		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3457		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3458
3459		txr->queue_status = EM_QUEUE_IDLE;
3460	}
3461
3462	/* Set the default values for the Tx Inter Packet Gap timer */
3463	switch (adapter->hw.mac.type) {
3464	case e1000_80003es2lan:
3465		tipg = DEFAULT_82543_TIPG_IPGR1;
3466		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3467		    E1000_TIPG_IPGR2_SHIFT;
3468		break;
3469	default:
3470		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3471		    (adapter->hw.phy.media_type ==
3472		    e1000_media_type_internal_serdes))
3473			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3474		else
3475			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3476		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3477		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3478	}
3479
3480	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3481	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3482
3483	if(adapter->hw.mac.type >= e1000_82540)
3484		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3485		    adapter->tx_abs_int_delay.value);
3486
3487	if ((adapter->hw.mac.type == e1000_82571) ||
3488	    (adapter->hw.mac.type == e1000_82572)) {
3489		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3490		tarc |= SPEED_MODE_BIT;
3491		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3492	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3493		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3494		tarc |= 1;
3495		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3496		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3497		tarc |= 1;
3498		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3499	}
3500
3501	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3502	if (adapter->tx_int_delay.value > 0)
3503		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3504
3505	/* Program the Transmit Control Register */
3506	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3507	tctl &= ~E1000_TCTL_CT;
3508	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3509		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3510
3511	if (adapter->hw.mac.type >= e1000_82571)
3512		tctl |= E1000_TCTL_MULR;
3513
3514	/* This write will effectively turn on the transmit unit. */
3515	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3516
3517}
3518
3519
3520/*********************************************************************
3521 *
3522 *  Free all transmit rings.
3523 *
3524 **********************************************************************/
3525static void
3526em_free_transmit_structures(struct adapter *adapter)
3527{
3528	struct tx_ring *txr = adapter->tx_rings;
3529
3530	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3531		EM_TX_LOCK(txr);
3532		em_free_transmit_buffers(txr);
3533		em_dma_free(adapter, &txr->txdma);
3534		EM_TX_UNLOCK(txr);
3535		EM_TX_LOCK_DESTROY(txr);
3536	}
3537
3538	free(adapter->tx_rings, M_DEVBUF);
3539}
3540
3541/*********************************************************************
3542 *
3543 *  Free transmit ring related data structures.
3544 *
3545 **********************************************************************/
3546static void
3547em_free_transmit_buffers(struct tx_ring *txr)
3548{
3549	struct adapter		*adapter = txr->adapter;
3550	struct em_buffer	*txbuf;
3551
3552	INIT_DEBUGOUT("free_transmit_ring: begin");
3553
3554	if (txr->tx_buffers == NULL)
3555		return;
3556
3557	for (int i = 0; i < adapter->num_tx_desc; i++) {
3558		txbuf = &txr->tx_buffers[i];
3559		if (txbuf->m_head != NULL) {
3560			bus_dmamap_sync(txr->txtag, txbuf->map,
3561			    BUS_DMASYNC_POSTWRITE);
3562			bus_dmamap_unload(txr->txtag,
3563			    txbuf->map);
3564			m_freem(txbuf->m_head);
3565			txbuf->m_head = NULL;
3566			if (txbuf->map != NULL) {
3567				bus_dmamap_destroy(txr->txtag,
3568				    txbuf->map);
3569				txbuf->map = NULL;
3570			}
3571		} else if (txbuf->map != NULL) {
3572			bus_dmamap_unload(txr->txtag,
3573			    txbuf->map);
3574			bus_dmamap_destroy(txr->txtag,
3575			    txbuf->map);
3576			txbuf->map = NULL;
3577		}
3578	}
3579#if __FreeBSD_version >= 800000
3580	if (txr->br != NULL)
3581		buf_ring_free(txr->br, M_DEVBUF);
3582#endif
3583	if (txr->tx_buffers != NULL) {
3584		free(txr->tx_buffers, M_DEVBUF);
3585		txr->tx_buffers = NULL;
3586	}
3587	if (txr->txtag != NULL) {
3588		bus_dma_tag_destroy(txr->txtag);
3589		txr->txtag = NULL;
3590	}
3591	return;
3592}
3593
3594
3595/*********************************************************************
3596 *  The offload context is protocol specific (TCP/UDP) and thus
3597 *  only needs to be set when the protocol changes. The occasion
3598 *  of a context change can be a performance detriment, and
3599 *  might be better just disabled. The reason arises in the way
3600 *  in which the controller supports pipelined requests from the
3601 *  Tx data DMA. Up to four requests can be pipelined, and they may
3602 *  belong to the same packet or to multiple packets. However all
3603 *  requests for one packet are issued before a request is issued
3604 *  for a subsequent packet and if a request for the next packet
3605 *  requires a context change, that request will be stalled
3606 *  until the previous request completes. This means setting up
3607 *  a new context effectively disables pipelined Tx data DMA which
3608 *  in turn greatly slow down performance to send small sized
3609 *  frames.
3610 **********************************************************************/
3611static void
3612em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3613    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3614{
3615	struct adapter			*adapter = txr->adapter;
3616	struct e1000_context_desc	*TXD = NULL;
3617	struct em_buffer		*tx_buffer;
3618	int				cur, hdr_len;
3619	u32				cmd = 0;
3620	u16				offload = 0;
3621	u8				ipcso, ipcss, tucso, tucss;
3622
3623	ipcss = ipcso = tucss = tucso = 0;
3624	hdr_len = ip_off + (ip->ip_hl << 2);
3625	cur = txr->next_avail_desc;
3626
3627	/* Setup of IP header checksum. */
3628	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3629		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3630		offload |= CSUM_IP;
3631		ipcss = ip_off;
3632		ipcso = ip_off + offsetof(struct ip, ip_sum);
3633		/*
3634		 * Start offset for header checksum calculation.
3635		 * End offset for header checksum calculation.
3636		 * Offset of place to put the checksum.
3637		 */
3638		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3639		TXD->lower_setup.ip_fields.ipcss = ipcss;
3640		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3641		TXD->lower_setup.ip_fields.ipcso = ipcso;
3642		cmd |= E1000_TXD_CMD_IP;
3643	}
3644
3645	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3646 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3647 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3648 		offload |= CSUM_TCP;
3649 		tucss = hdr_len;
3650 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3651 		/*
3652 		 * Setting up new checksum offload context for every frames
3653 		 * takes a lot of processing time for hardware. This also
3654 		 * reduces performance a lot for small sized frames so avoid
3655 		 * it if driver can use previously configured checksum
3656 		 * offload context.
3657 		 */
3658 		if (txr->last_hw_offload == offload) {
3659 			if (offload & CSUM_IP) {
3660 				if (txr->last_hw_ipcss == ipcss &&
3661 				    txr->last_hw_ipcso == ipcso &&
3662 				    txr->last_hw_tucss == tucss &&
3663 				    txr->last_hw_tucso == tucso)
3664 					return;
3665 			} else {
3666 				if (txr->last_hw_tucss == tucss &&
3667 				    txr->last_hw_tucso == tucso)
3668 					return;
3669 			}
3670  		}
3671 		txr->last_hw_offload = offload;
3672 		txr->last_hw_tucss = tucss;
3673 		txr->last_hw_tucso = tucso;
3674 		/*
3675 		 * Start offset for payload checksum calculation.
3676 		 * End offset for payload checksum calculation.
3677 		 * Offset of place to put the checksum.
3678 		 */
3679		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3680 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3681 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3682 		TXD->upper_setup.tcp_fields.tucso = tucso;
3683 		cmd |= E1000_TXD_CMD_TCP;
3684 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3685 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3686 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3687 		tucss = hdr_len;
3688 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3689 		/*
3690 		 * Setting up new checksum offload context for every frames
3691 		 * takes a lot of processing time for hardware. This also
3692 		 * reduces performance a lot for small sized frames so avoid
3693 		 * it if driver can use previously configured checksum
3694 		 * offload context.
3695 		 */
3696 		if (txr->last_hw_offload == offload) {
3697 			if (offload & CSUM_IP) {
3698 				if (txr->last_hw_ipcss == ipcss &&
3699 				    txr->last_hw_ipcso == ipcso &&
3700 				    txr->last_hw_tucss == tucss &&
3701 				    txr->last_hw_tucso == tucso)
3702 					return;
3703 			} else {
3704 				if (txr->last_hw_tucss == tucss &&
3705 				    txr->last_hw_tucso == tucso)
3706 					return;
3707 			}
3708 		}
3709 		txr->last_hw_offload = offload;
3710 		txr->last_hw_tucss = tucss;
3711 		txr->last_hw_tucso = tucso;
3712 		/*
3713 		 * Start offset for header checksum calculation.
3714 		 * End offset for header checksum calculation.
3715 		 * Offset of place to put the checksum.
3716 		 */
3717		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3718 		TXD->upper_setup.tcp_fields.tucss = tucss;
3719 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3720 		TXD->upper_setup.tcp_fields.tucso = tucso;
3721  	}
3722
3723 	if (offload & CSUM_IP) {
3724 		txr->last_hw_ipcss = ipcss;
3725 		txr->last_hw_ipcso = ipcso;
3726  	}
3727
3728	TXD->tcp_seg_setup.data = htole32(0);
3729	TXD->cmd_and_length =
3730	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3731	tx_buffer = &txr->tx_buffers[cur];
3732	tx_buffer->m_head = NULL;
3733	tx_buffer->next_eop = -1;
3734
3735	if (++cur == adapter->num_tx_desc)
3736		cur = 0;
3737
3738	txr->tx_avail--;
3739	txr->next_avail_desc = cur;
3740}
3741
3742
3743/**********************************************************************
3744 *
3745 *  Setup work for hardware segmentation offload (TSO)
3746 *
3747 **********************************************************************/
3748static void
3749em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3750    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3751{
3752	struct adapter			*adapter = txr->adapter;
3753	struct e1000_context_desc	*TXD;
3754	struct em_buffer		*tx_buffer;
3755	int cur, hdr_len;
3756
3757	/*
3758	 * In theory we can use the same TSO context if and only if
3759	 * frame is the same type(IP/TCP) and the same MSS. However
3760	 * checking whether a frame has the same IP/TCP structure is
3761	 * hard thing so just ignore that and always restablish a
3762	 * new TSO context.
3763	 */
3764	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3765	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3766		      E1000_TXD_DTYP_D |	/* Data descr type */
3767		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3768
3769	/* IP and/or TCP header checksum calculation and insertion. */
3770	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3771
3772	cur = txr->next_avail_desc;
3773	tx_buffer = &txr->tx_buffers[cur];
3774	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3775
3776	/*
3777	 * Start offset for header checksum calculation.
3778	 * End offset for header checksum calculation.
3779	 * Offset of place put the checksum.
3780	 */
3781	TXD->lower_setup.ip_fields.ipcss = ip_off;
3782	TXD->lower_setup.ip_fields.ipcse =
3783	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3784	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3785	/*
3786	 * Start offset for payload checksum calculation.
3787	 * End offset for payload checksum calculation.
3788	 * Offset of place to put the checksum.
3789	 */
3790	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3791	TXD->upper_setup.tcp_fields.tucse = 0;
3792	TXD->upper_setup.tcp_fields.tucso =
3793	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3794	/*
3795	 * Payload size per packet w/o any headers.
3796	 * Length of all headers up to payload.
3797	 */
3798	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3799	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3800
3801	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3802				E1000_TXD_CMD_DEXT |	/* Extended descr */
3803				E1000_TXD_CMD_TSE |	/* TSE context */
3804				E1000_TXD_CMD_IP |	/* Do IP csum */
3805				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3806				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3807
3808	tx_buffer->m_head = NULL;
3809	tx_buffer->next_eop = -1;
3810
3811	if (++cur == adapter->num_tx_desc)
3812		cur = 0;
3813
3814	txr->tx_avail--;
3815	txr->next_avail_desc = cur;
3816	txr->tx_tso = TRUE;
3817}
3818
3819
3820/**********************************************************************
3821 *
3822 *  Examine each tx_buffer in the used queue. If the hardware is done
3823 *  processing the packet then free associated resources. The
3824 *  tx_buffer is put back on the free queue.
3825 *
3826 **********************************************************************/
3827static void
3828em_txeof(struct tx_ring *txr)
3829{
3830	struct adapter	*adapter = txr->adapter;
3831        int first, last, done, processed;
3832        struct em_buffer *tx_buffer;
3833        struct e1000_tx_desc   *tx_desc, *eop_desc;
3834	struct ifnet   *ifp = adapter->ifp;
3835
3836	EM_TX_LOCK_ASSERT(txr);
3837#ifdef DEV_NETMAP
3838	if (netmap_tx_irq(ifp, txr->me))
3839		return;
3840#endif /* DEV_NETMAP */
3841
3842	/* No work, make sure watchdog is off */
3843        if (txr->tx_avail == adapter->num_tx_desc) {
3844		txr->queue_status = EM_QUEUE_IDLE;
3845                return;
3846	}
3847
3848	processed = 0;
3849        first = txr->next_to_clean;
3850        tx_desc = &txr->tx_base[first];
3851        tx_buffer = &txr->tx_buffers[first];
3852	last = tx_buffer->next_eop;
3853        eop_desc = &txr->tx_base[last];
3854
3855	/*
3856	 * What this does is get the index of the
3857	 * first descriptor AFTER the EOP of the
3858	 * first packet, that way we can do the
3859	 * simple comparison on the inner while loop.
3860	 */
3861	if (++last == adapter->num_tx_desc)
3862 		last = 0;
3863	done = last;
3864
3865        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3866            BUS_DMASYNC_POSTREAD);
3867
3868        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3869		/* We clean the range of the packet */
3870		while (first != done) {
3871                	tx_desc->upper.data = 0;
3872                	tx_desc->lower.data = 0;
3873                	tx_desc->buffer_addr = 0;
3874                	++txr->tx_avail;
3875			++processed;
3876
3877			if (tx_buffer->m_head) {
3878				bus_dmamap_sync(txr->txtag,
3879				    tx_buffer->map,
3880				    BUS_DMASYNC_POSTWRITE);
3881				bus_dmamap_unload(txr->txtag,
3882				    tx_buffer->map);
3883                        	m_freem(tx_buffer->m_head);
3884                        	tx_buffer->m_head = NULL;
3885                	}
3886			tx_buffer->next_eop = -1;
3887			txr->watchdog_time = ticks;
3888
3889	                if (++first == adapter->num_tx_desc)
3890				first = 0;
3891
3892	                tx_buffer = &txr->tx_buffers[first];
3893			tx_desc = &txr->tx_base[first];
3894		}
3895		++ifp->if_opackets;
3896		/* See if we can continue to the next packet */
3897		last = tx_buffer->next_eop;
3898		if (last != -1) {
3899        		eop_desc = &txr->tx_base[last];
3900			/* Get new done point */
3901			if (++last == adapter->num_tx_desc) last = 0;
3902			done = last;
3903		} else
3904			break;
3905        }
3906        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3907            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3908
3909        txr->next_to_clean = first;
3910
3911	/*
3912	** Watchdog calculation, we know there's
3913	** work outstanding or the first return
3914	** would have been taken, so none processed
3915	** for too long indicates a hang. local timer
3916	** will examine this and do a reset if needed.
3917	*/
3918	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3919		txr->queue_status = EM_QUEUE_HUNG;
3920
3921        /*
3922         * If we have a minimum free, clear IFF_DRV_OACTIVE
3923         * to tell the stack that it is OK to send packets.
3924	 * Notice that all writes of OACTIVE happen under the
3925	 * TX lock which, with a single queue, guarantees
3926	 * sanity.
3927         */
3928        if (txr->tx_avail >= EM_MAX_SCATTER)
3929		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3930
3931	/* Disable watchdog if all clean */
3932	if (txr->tx_avail == adapter->num_tx_desc) {
3933		txr->queue_status = EM_QUEUE_IDLE;
3934	}
3935}
3936
3937
3938/*********************************************************************
3939 *
3940 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3941 *
3942 **********************************************************************/
3943static void
3944em_refresh_mbufs(struct rx_ring *rxr, int limit)
3945{
3946	struct adapter		*adapter = rxr->adapter;
3947	struct mbuf		*m;
3948	bus_dma_segment_t	segs[1];
3949	struct em_buffer	*rxbuf;
3950	int			i, j, error, nsegs;
3951	bool			cleaned = FALSE;
3952
3953	i = j = rxr->next_to_refresh;
3954	/*
3955	** Get one descriptor beyond
3956	** our work mark to control
3957	** the loop.
3958	*/
3959	if (++j == adapter->num_rx_desc)
3960		j = 0;
3961
3962	while (j != limit) {
3963		rxbuf = &rxr->rx_buffers[i];
3964		if (rxbuf->m_head == NULL) {
3965			m = m_getjcl(M_NOWAIT, MT_DATA,
3966			    M_PKTHDR, adapter->rx_mbuf_sz);
3967			/*
3968			** If we have a temporary resource shortage
3969			** that causes a failure, just abort refresh
3970			** for now, we will return to this point when
3971			** reinvoked from em_rxeof.
3972			*/
3973			if (m == NULL)
3974				goto update;
3975		} else
3976			m = rxbuf->m_head;
3977
3978		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3979		m->m_flags |= M_PKTHDR;
3980		m->m_data = m->m_ext.ext_buf;
3981
3982		/* Use bus_dma machinery to setup the memory mapping  */
3983		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3984		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3985		if (error != 0) {
3986			printf("Refresh mbufs: hdr dmamap load"
3987			    " failure - %d\n", error);
3988			m_free(m);
3989			rxbuf->m_head = NULL;
3990			goto update;
3991		}
3992		rxbuf->m_head = m;
3993		bus_dmamap_sync(rxr->rxtag,
3994		    rxbuf->map, BUS_DMASYNC_PREREAD);
3995		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3996		cleaned = TRUE;
3997
3998		i = j; /* Next is precalulated for us */
3999		rxr->next_to_refresh = i;
4000		/* Calculate next controlling index */
4001		if (++j == adapter->num_rx_desc)
4002			j = 0;
4003	}
4004update:
4005	/*
4006	** Update the tail pointer only if,
4007	** and as far as we have refreshed.
4008	*/
4009	if (cleaned)
4010		E1000_WRITE_REG(&adapter->hw,
4011		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4012
4013	return;
4014}
4015
4016
4017/*********************************************************************
4018 *
4019 *  Allocate memory for rx_buffer structures. Since we use one
4020 *  rx_buffer per received packet, the maximum number of rx_buffer's
4021 *  that we'll need is equal to the number of receive descriptors
4022 *  that we've allocated.
4023 *
4024 **********************************************************************/
4025static int
4026em_allocate_receive_buffers(struct rx_ring *rxr)
4027{
4028	struct adapter		*adapter = rxr->adapter;
4029	device_t		dev = adapter->dev;
4030	struct em_buffer	*rxbuf;
4031	int			error;
4032
4033	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4034	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4035	if (rxr->rx_buffers == NULL) {
4036		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4037		return (ENOMEM);
4038	}
4039
4040	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4041				1, 0,			/* alignment, bounds */
4042				BUS_SPACE_MAXADDR,	/* lowaddr */
4043				BUS_SPACE_MAXADDR,	/* highaddr */
4044				NULL, NULL,		/* filter, filterarg */
4045				MJUM9BYTES,		/* maxsize */
4046				1,			/* nsegments */
4047				MJUM9BYTES,		/* maxsegsize */
4048				0,			/* flags */
4049				NULL,			/* lockfunc */
4050				NULL,			/* lockarg */
4051				&rxr->rxtag);
4052	if (error) {
4053		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4054		    __func__, error);
4055		goto fail;
4056	}
4057
4058	rxbuf = rxr->rx_buffers;
4059	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4060		rxbuf = &rxr->rx_buffers[i];
4061		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4062		    &rxbuf->map);
4063		if (error) {
4064			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4065			    __func__, error);
4066			goto fail;
4067		}
4068	}
4069
4070	return (0);
4071
4072fail:
4073	em_free_receive_structures(adapter);
4074	return (error);
4075}
4076
4077
4078/*********************************************************************
4079 *
4080 *  Initialize a receive ring and its buffers.
4081 *
4082 **********************************************************************/
4083static int
4084em_setup_receive_ring(struct rx_ring *rxr)
4085{
4086	struct	adapter 	*adapter = rxr->adapter;
4087	struct em_buffer	*rxbuf;
4088	bus_dma_segment_t	seg[1];
4089	int			rsize, nsegs, error = 0;
4090#ifdef DEV_NETMAP
4091	struct netmap_adapter *na = NA(adapter->ifp);
4092	struct netmap_slot *slot;
4093#endif
4094
4095
4096	/* Clear the ring contents */
4097	EM_RX_LOCK(rxr);
4098	rsize = roundup2(adapter->num_rx_desc *
4099	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4100	bzero((void *)rxr->rx_base, rsize);
4101#ifdef DEV_NETMAP
4102	slot = netmap_reset(na, NR_RX, 0, 0);
4103#endif
4104
4105	/*
4106	** Free current RX buffer structs and their mbufs
4107	*/
4108	for (int i = 0; i < adapter->num_rx_desc; i++) {
4109		rxbuf = &rxr->rx_buffers[i];
4110		if (rxbuf->m_head != NULL) {
4111			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4112			    BUS_DMASYNC_POSTREAD);
4113			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4114			m_freem(rxbuf->m_head);
4115			rxbuf->m_head = NULL; /* mark as freed */
4116		}
4117	}
4118
4119	/* Now replenish the mbufs */
4120        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4121		rxbuf = &rxr->rx_buffers[j];
4122#ifdef DEV_NETMAP
4123		if (slot) {
4124			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4125			uint64_t paddr;
4126			void *addr;
4127
4128			addr = PNMB(slot + si, &paddr);
4129			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4130			/* Update descriptor */
4131			rxr->rx_base[j].buffer_addr = htole64(paddr);
4132			continue;
4133		}
4134#endif /* DEV_NETMAP */
4135		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4136		    M_PKTHDR, adapter->rx_mbuf_sz);
4137		if (rxbuf->m_head == NULL) {
4138			error = ENOBUFS;
4139			goto fail;
4140		}
4141		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4142		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4143		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4144
4145		/* Get the memory mapping */
4146		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4147		    rxbuf->map, rxbuf->m_head, seg,
4148		    &nsegs, BUS_DMA_NOWAIT);
4149		if (error != 0) {
4150			m_freem(rxbuf->m_head);
4151			rxbuf->m_head = NULL;
4152			goto fail;
4153		}
4154		bus_dmamap_sync(rxr->rxtag,
4155		    rxbuf->map, BUS_DMASYNC_PREREAD);
4156
4157		/* Update descriptor */
4158		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4159	}
4160	rxr->next_to_check = 0;
4161	rxr->next_to_refresh = 0;
4162	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4163	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4164
4165fail:
4166	EM_RX_UNLOCK(rxr);
4167	return (error);
4168}
4169
4170/*********************************************************************
4171 *
4172 *  Initialize all receive rings.
4173 *
4174 **********************************************************************/
4175static int
4176em_setup_receive_structures(struct adapter *adapter)
4177{
4178	struct rx_ring *rxr = adapter->rx_rings;
4179	int q;
4180
4181	for (q = 0; q < adapter->num_queues; q++, rxr++)
4182		if (em_setup_receive_ring(rxr))
4183			goto fail;
4184
4185	return (0);
4186fail:
4187	/*
4188	 * Free RX buffers allocated so far, we will only handle
4189	 * the rings that completed, the failing case will have
4190	 * cleaned up for itself. 'q' failed, so its the terminus.
4191	 */
4192	for (int i = 0; i < q; ++i) {
4193		rxr = &adapter->rx_rings[i];
4194		for (int n = 0; n < adapter->num_rx_desc; n++) {
4195			struct em_buffer *rxbuf;
4196			rxbuf = &rxr->rx_buffers[n];
4197			if (rxbuf->m_head != NULL) {
4198				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4199			  	  BUS_DMASYNC_POSTREAD);
4200				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4201				m_freem(rxbuf->m_head);
4202				rxbuf->m_head = NULL;
4203			}
4204		}
4205		rxr->next_to_check = 0;
4206		rxr->next_to_refresh = 0;
4207	}
4208
4209	return (ENOBUFS);
4210}
4211
4212/*********************************************************************
4213 *
4214 *  Free all receive rings.
4215 *
4216 **********************************************************************/
4217static void
4218em_free_receive_structures(struct adapter *adapter)
4219{
4220	struct rx_ring *rxr = adapter->rx_rings;
4221
4222	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4223		em_free_receive_buffers(rxr);
4224		/* Free the ring memory as well */
4225		em_dma_free(adapter, &rxr->rxdma);
4226		EM_RX_LOCK_DESTROY(rxr);
4227	}
4228
4229	free(adapter->rx_rings, M_DEVBUF);
4230}
4231
4232
4233/*********************************************************************
4234 *
4235 *  Free receive ring data structures
4236 *
4237 **********************************************************************/
4238static void
4239em_free_receive_buffers(struct rx_ring *rxr)
4240{
4241	struct adapter		*adapter = rxr->adapter;
4242	struct em_buffer	*rxbuf = NULL;
4243
4244	INIT_DEBUGOUT("free_receive_buffers: begin");
4245
4246	if (rxr->rx_buffers != NULL) {
4247		for (int i = 0; i < adapter->num_rx_desc; i++) {
4248			rxbuf = &rxr->rx_buffers[i];
4249			if (rxbuf->map != NULL) {
4250				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4251				    BUS_DMASYNC_POSTREAD);
4252				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4253				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4254			}
4255			if (rxbuf->m_head != NULL) {
4256				m_freem(rxbuf->m_head);
4257				rxbuf->m_head = NULL;
4258			}
4259		}
4260		free(rxr->rx_buffers, M_DEVBUF);
4261		rxr->rx_buffers = NULL;
4262		rxr->next_to_check = 0;
4263		rxr->next_to_refresh = 0;
4264	}
4265
4266	if (rxr->rxtag != NULL) {
4267		bus_dma_tag_destroy(rxr->rxtag);
4268		rxr->rxtag = NULL;
4269	}
4270
4271	return;
4272}
4273
4274
4275/*********************************************************************
4276 *
4277 *  Enable receive unit.
4278 *
4279 **********************************************************************/
4280
4281static void
4282em_initialize_receive_unit(struct adapter *adapter)
4283{
4284	struct rx_ring	*rxr = adapter->rx_rings;
4285	struct ifnet	*ifp = adapter->ifp;
4286	struct e1000_hw	*hw = &adapter->hw;
4287	u64	bus_addr;
4288	u32	rctl, rxcsum;
4289
4290	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4291
4292	/*
4293	 * Make sure receives are disabled while setting
4294	 * up the descriptor ring
4295	 */
4296	rctl = E1000_READ_REG(hw, E1000_RCTL);
4297	/* Do not disable if ever enabled on this hardware */
4298	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4299		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4300
4301	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4302	    adapter->rx_abs_int_delay.value);
4303	/*
4304	 * Set the interrupt throttling rate. Value is calculated
4305	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4306	 */
4307	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4308
4309	/*
4310	** When using MSIX interrupts we need to throttle
4311	** using the EITR register (82574 only)
4312	*/
4313	if (hw->mac.type == e1000_82574) {
4314		for (int i = 0; i < 4; i++)
4315			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4316			    DEFAULT_ITR);
4317		/* Disable accelerated acknowledge */
4318		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4319	}
4320
4321	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4322	if (ifp->if_capenable & IFCAP_RXCSUM)
4323		rxcsum |= E1000_RXCSUM_TUOFL;
4324	else
4325		rxcsum &= ~E1000_RXCSUM_TUOFL;
4326	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4327
4328	/*
4329	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4330	** long latencies are observed, like Lenovo X60. This
4331	** change eliminates the problem, but since having positive
4332	** values in RDTR is a known source of problems on other
4333	** platforms another solution is being sought.
4334	*/
4335	if (hw->mac.type == e1000_82573)
4336		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4337
4338	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4339		/* Setup the Base and Length of the Rx Descriptor Ring */
4340		u32 rdt = adapter->num_rx_desc - 1; /* default */
4341
4342		bus_addr = rxr->rxdma.dma_paddr;
4343		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4344		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4345		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4346		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4347		/* Setup the Head and Tail Descriptor Pointers */
4348		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4349#ifdef DEV_NETMAP
4350		/*
4351		 * an init() while a netmap client is active must
4352		 * preserve the rx buffers passed to userspace.
4353		 */
4354		if (ifp->if_capenable & IFCAP_NETMAP)
4355			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4356#endif /* DEV_NETMAP */
4357		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4358	}
4359
4360	/* Set PTHRESH for improved jumbo performance */
4361	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4362	    (adapter->hw.mac.type == e1000_pch2lan) ||
4363	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4364	    (ifp->if_mtu > ETHERMTU)) {
4365		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4366		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4367	}
4368
4369	if (adapter->hw.mac.type >= e1000_pch2lan) {
4370		if (ifp->if_mtu > ETHERMTU)
4371			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4372		else
4373			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4374	}
4375
4376	/* Setup the Receive Control Register */
4377	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4378	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4379	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4380	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4381
4382        /* Strip the CRC */
4383        rctl |= E1000_RCTL_SECRC;
4384
4385        /* Make sure VLAN Filters are off */
4386        rctl &= ~E1000_RCTL_VFE;
4387	rctl &= ~E1000_RCTL_SBP;
4388
4389	if (adapter->rx_mbuf_sz == MCLBYTES)
4390		rctl |= E1000_RCTL_SZ_2048;
4391	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4392		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4393	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4394		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4395
4396	if (ifp->if_mtu > ETHERMTU)
4397		rctl |= E1000_RCTL_LPE;
4398	else
4399		rctl &= ~E1000_RCTL_LPE;
4400
4401	/* Write out the settings */
4402	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4403
4404	return;
4405}
4406
4407
4408/*********************************************************************
4409 *
4410 *  This routine executes in interrupt context. It replenishes
4411 *  the mbufs in the descriptor and sends data which has been
4412 *  dma'ed into host memory to upper layer.
4413 *
4414 *  We loop at most count times if count is > 0, or until done if
4415 *  count < 0.
4416 *
4417 *  For polling we also now return the number of cleaned packets
4418 *********************************************************************/
4419static bool
4420em_rxeof(struct rx_ring *rxr, int count, int *done)
4421{
4422	struct adapter		*adapter = rxr->adapter;
4423	struct ifnet		*ifp = adapter->ifp;
4424	struct mbuf		*mp, *sendmp;
4425	u8			status = 0;
4426	u16 			len;
4427	int			i, processed, rxdone = 0;
4428	bool			eop;
4429	struct e1000_rx_desc	*cur;
4430
4431	EM_RX_LOCK(rxr);
4432
4433#ifdef DEV_NETMAP
4434	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4435		EM_RX_UNLOCK(rxr);
4436		return (FALSE);
4437	}
4438#endif /* DEV_NETMAP */
4439
4440	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441
4442		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443			break;
4444
4445		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447
4448		cur = &rxr->rx_base[i];
4449		status = cur->status;
4450		mp = sendmp = NULL;
4451
4452		if ((status & E1000_RXD_STAT_DD) == 0)
4453			break;
4454
4455		len = le16toh(cur->length);
4456		eop = (status & E1000_RXD_STAT_EOP) != 0;
4457
4458		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459		    (rxr->discard == TRUE)) {
4460			adapter->dropped_pkts++;
4461			++rxr->rx_discarded;
4462			if (!eop) /* Catch subsequent segs */
4463				rxr->discard = TRUE;
4464			else
4465				rxr->discard = FALSE;
4466			em_rx_discard(rxr, i);
4467			goto next_desc;
4468		}
4469
4470		/* Assign correct length to the current fragment */
4471		mp = rxr->rx_buffers[i].m_head;
4472		mp->m_len = len;
4473
4474		/* Trigger for refresh */
4475		rxr->rx_buffers[i].m_head = NULL;
4476
4477		/* First segment? */
4478		if (rxr->fmp == NULL) {
4479			mp->m_pkthdr.len = len;
4480			rxr->fmp = rxr->lmp = mp;
4481		} else {
4482			/* Chain mbuf's together */
4483			mp->m_flags &= ~M_PKTHDR;
4484			rxr->lmp->m_next = mp;
4485			rxr->lmp = mp;
4486			rxr->fmp->m_pkthdr.len += len;
4487		}
4488
4489		if (eop) {
4490			--count;
4491			sendmp = rxr->fmp;
4492			sendmp->m_pkthdr.rcvif = ifp;
4493			ifp->if_ipackets++;
4494			em_receive_checksum(cur, sendmp);
4495#ifndef __NO_STRICT_ALIGNMENT
4496			if (adapter->hw.mac.max_frame_size >
4497			    (MCLBYTES - ETHER_ALIGN) &&
4498			    em_fixup_rx(rxr) != 0)
4499				goto skip;
4500#endif
4501			if (status & E1000_RXD_STAT_VP) {
4502				sendmp->m_pkthdr.ether_vtag =
4503				    le16toh(cur->special);
4504				sendmp->m_flags |= M_VLANTAG;
4505			}
4506#ifndef __NO_STRICT_ALIGNMENT
4507skip:
4508#endif
4509			rxr->fmp = rxr->lmp = NULL;
4510		}
4511next_desc:
4512		/* Zero out the receive descriptors status. */
4513		cur->status = 0;
4514		++rxdone;	/* cumulative for POLL */
4515		++processed;
4516
4517		/* Advance our pointers to the next descriptor. */
4518		if (++i == adapter->num_rx_desc)
4519			i = 0;
4520
4521		/* Send to the stack */
4522		if (sendmp != NULL) {
4523			rxr->next_to_check = i;
4524			EM_RX_UNLOCK(rxr);
4525			(*ifp->if_input)(ifp, sendmp);
4526			EM_RX_LOCK(rxr);
4527			i = rxr->next_to_check;
4528		}
4529
4530		/* Only refresh mbufs every 8 descriptors */
4531		if (processed == 8) {
4532			em_refresh_mbufs(rxr, i);
4533			processed = 0;
4534		}
4535	}
4536
4537	/* Catch any remaining refresh work */
4538	if (e1000_rx_unrefreshed(rxr))
4539		em_refresh_mbufs(rxr, i);
4540
4541	rxr->next_to_check = i;
4542	if (done != NULL)
4543		*done = rxdone;
4544	EM_RX_UNLOCK(rxr);
4545
4546	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4547}
4548
4549static __inline void
4550em_rx_discard(struct rx_ring *rxr, int i)
4551{
4552	struct em_buffer	*rbuf;
4553
4554	rbuf = &rxr->rx_buffers[i];
4555	/* Free any previous pieces */
4556	if (rxr->fmp != NULL) {
4557		rxr->fmp->m_flags |= M_PKTHDR;
4558		m_freem(rxr->fmp);
4559		rxr->fmp = NULL;
4560		rxr->lmp = NULL;
4561	}
4562	/*
4563	** Free buffer and allow em_refresh_mbufs()
4564	** to clean up and recharge buffer.
4565	*/
4566	if (rbuf->m_head) {
4567		m_free(rbuf->m_head);
4568		rbuf->m_head = NULL;
4569	}
4570	return;
4571}
4572
4573#ifndef __NO_STRICT_ALIGNMENT
4574/*
4575 * When jumbo frames are enabled we should realign entire payload on
4576 * architecures with strict alignment. This is serious design mistake of 8254x
4577 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4578 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4579 * payload. On architecures without strict alignment restrictions 8254x still
4580 * performs unaligned memory access which would reduce the performance too.
4581 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4582 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4583 * existing mbuf chain.
4584 *
4585 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4586 * not used at all on architectures with strict alignment.
4587 */
4588static int
4589em_fixup_rx(struct rx_ring *rxr)
4590{
4591	struct adapter *adapter = rxr->adapter;
4592	struct mbuf *m, *n;
4593	int error;
4594
4595	error = 0;
4596	m = rxr->fmp;
4597	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4598		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4599		m->m_data += ETHER_HDR_LEN;
4600	} else {
4601		MGETHDR(n, M_NOWAIT, MT_DATA);
4602		if (n != NULL) {
4603			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4604			m->m_data += ETHER_HDR_LEN;
4605			m->m_len -= ETHER_HDR_LEN;
4606			n->m_len = ETHER_HDR_LEN;
4607			M_MOVE_PKTHDR(n, m);
4608			n->m_next = m;
4609			rxr->fmp = n;
4610		} else {
4611			adapter->dropped_pkts++;
4612			m_freem(rxr->fmp);
4613			rxr->fmp = NULL;
4614			error = ENOMEM;
4615		}
4616	}
4617
4618	return (error);
4619}
4620#endif
4621
4622/*********************************************************************
4623 *
4624 *  Verify that the hardware indicated that the checksum is valid.
4625 *  Inform the stack about the status of checksum so that stack
4626 *  doesn't spend time verifying the checksum.
4627 *
4628 *********************************************************************/
4629static void
4630em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4631{
4632	mp->m_pkthdr.csum_flags = 0;
4633
4634	/* Ignore Checksum bit is set */
4635	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4636		return;
4637
4638	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4639		return;
4640
4641	/* IP Checksum Good? */
4642	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4643		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4644
4645	/* TCP or UDP checksum */
4646	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4647		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4648		mp->m_pkthdr.csum_data = htons(0xffff);
4649	}
4650}
4651
4652/*
4653 * This routine is run via an vlan
4654 * config EVENT
4655 */
4656static void
4657em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4658{
4659	struct adapter	*adapter = ifp->if_softc;
4660	u32		index, bit;
4661
4662	if (ifp->if_softc !=  arg)   /* Not our event */
4663		return;
4664
4665	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4666                return;
4667
4668	EM_CORE_LOCK(adapter);
4669	index = (vtag >> 5) & 0x7F;
4670	bit = vtag & 0x1F;
4671	adapter->shadow_vfta[index] |= (1 << bit);
4672	++adapter->num_vlans;
4673	/* Re-init to load the changes */
4674	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4675		em_init_locked(adapter);
4676	EM_CORE_UNLOCK(adapter);
4677}
4678
4679/*
4680 * This routine is run via an vlan
4681 * unconfig EVENT
4682 */
4683static void
4684em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4685{
4686	struct adapter	*adapter = ifp->if_softc;
4687	u32		index, bit;
4688
4689	if (ifp->if_softc !=  arg)
4690		return;
4691
4692	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4693                return;
4694
4695	EM_CORE_LOCK(adapter);
4696	index = (vtag >> 5) & 0x7F;
4697	bit = vtag & 0x1F;
4698	adapter->shadow_vfta[index] &= ~(1 << bit);
4699	--adapter->num_vlans;
4700	/* Re-init to load the changes */
4701	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4702		em_init_locked(adapter);
4703	EM_CORE_UNLOCK(adapter);
4704}
4705
4706static void
4707em_setup_vlan_hw_support(struct adapter *adapter)
4708{
4709	struct e1000_hw *hw = &adapter->hw;
4710	u32             reg;
4711
4712	/*
4713	** We get here thru init_locked, meaning
4714	** a soft reset, this has already cleared
4715	** the VFTA and other state, so if there
4716	** have been no vlan's registered do nothing.
4717	*/
4718	if (adapter->num_vlans == 0)
4719                return;
4720
4721	/*
4722	** A soft reset zero's out the VFTA, so
4723	** we need to repopulate it now.
4724	*/
4725	for (int i = 0; i < EM_VFTA_SIZE; i++)
4726                if (adapter->shadow_vfta[i] != 0)
4727			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4728                            i, adapter->shadow_vfta[i]);
4729
4730	reg = E1000_READ_REG(hw, E1000_CTRL);
4731	reg |= E1000_CTRL_VME;
4732	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4733
4734	/* Enable the Filter Table */
4735	reg = E1000_READ_REG(hw, E1000_RCTL);
4736	reg &= ~E1000_RCTL_CFIEN;
4737	reg |= E1000_RCTL_VFE;
4738	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4739}
4740
4741static void
4742em_enable_intr(struct adapter *adapter)
4743{
4744	struct e1000_hw *hw = &adapter->hw;
4745	u32 ims_mask = IMS_ENABLE_MASK;
4746
4747	if (hw->mac.type == e1000_82574) {
4748		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4749		ims_mask |= EM_MSIX_MASK;
4750	}
4751	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4752}
4753
4754static void
4755em_disable_intr(struct adapter *adapter)
4756{
4757	struct e1000_hw *hw = &adapter->hw;
4758
4759	if (hw->mac.type == e1000_82574)
4760		E1000_WRITE_REG(hw, EM_EIAC, 0);
4761	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4762}
4763
4764/*
4765 * Bit of a misnomer, what this really means is
4766 * to enable OS management of the system... aka
4767 * to disable special hardware management features
4768 */
4769static void
4770em_init_manageability(struct adapter *adapter)
4771{
4772	/* A shared code workaround */
4773#define E1000_82542_MANC2H E1000_MANC2H
4774	if (adapter->has_manage) {
4775		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4776		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4777
4778		/* disable hardware interception of ARP */
4779		manc &= ~(E1000_MANC_ARP_EN);
4780
4781                /* enable receiving management packets to the host */
4782		manc |= E1000_MANC_EN_MNG2HOST;
4783#define E1000_MNG2HOST_PORT_623 (1 << 5)
4784#define E1000_MNG2HOST_PORT_664 (1 << 6)
4785		manc2h |= E1000_MNG2HOST_PORT_623;
4786		manc2h |= E1000_MNG2HOST_PORT_664;
4787		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4788		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789	}
4790}
4791
4792/*
4793 * Give control back to hardware management
4794 * controller if there is one.
4795 */
4796static void
4797em_release_manageability(struct adapter *adapter)
4798{
4799	if (adapter->has_manage) {
4800		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4801
4802		/* re-enable hardware interception of ARP */
4803		manc |= E1000_MANC_ARP_EN;
4804		manc &= ~E1000_MANC_EN_MNG2HOST;
4805
4806		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4807	}
4808}
4809
4810/*
4811 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4812 * For ASF and Pass Through versions of f/w this means
4813 * that the driver is loaded. For AMT version type f/w
4814 * this means that the network i/f is open.
4815 */
4816static void
4817em_get_hw_control(struct adapter *adapter)
4818{
4819	u32 ctrl_ext, swsm;
4820
4821	if (adapter->hw.mac.type == e1000_82573) {
4822		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4823		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4824		    swsm | E1000_SWSM_DRV_LOAD);
4825		return;
4826	}
4827	/* else */
4828	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4829	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4830	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4831	return;
4832}
4833
4834/*
4835 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4836 * For ASF and Pass Through versions of f/w this means that
4837 * the driver is no longer loaded. For AMT versions of the
4838 * f/w this means that the network i/f is closed.
4839 */
4840static void
4841em_release_hw_control(struct adapter *adapter)
4842{
4843	u32 ctrl_ext, swsm;
4844
4845	if (!adapter->has_manage)
4846		return;
4847
4848	if (adapter->hw.mac.type == e1000_82573) {
4849		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4850		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4851		    swsm & ~E1000_SWSM_DRV_LOAD);
4852		return;
4853	}
4854	/* else */
4855	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4856	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4857	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4858	return;
4859}
4860
4861static int
4862em_is_valid_ether_addr(u8 *addr)
4863{
4864	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4865
4866	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4867		return (FALSE);
4868	}
4869
4870	return (TRUE);
4871}
4872
4873/*
4874** Parse the interface capabilities with regard
4875** to both system management and wake-on-lan for
4876** later use.
4877*/
4878static void
4879em_get_wakeup(device_t dev)
4880{
4881	struct adapter	*adapter = device_get_softc(dev);
4882	u16		eeprom_data = 0, device_id, apme_mask;
4883
4884	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4885	apme_mask = EM_EEPROM_APME;
4886
4887	switch (adapter->hw.mac.type) {
4888	case e1000_82573:
4889	case e1000_82583:
4890		adapter->has_amt = TRUE;
4891		/* Falls thru */
4892	case e1000_82571:
4893	case e1000_82572:
4894	case e1000_80003es2lan:
4895		if (adapter->hw.bus.func == 1) {
4896			e1000_read_nvm(&adapter->hw,
4897			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4898			break;
4899		} else
4900			e1000_read_nvm(&adapter->hw,
4901			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902		break;
4903	case e1000_ich8lan:
4904	case e1000_ich9lan:
4905	case e1000_ich10lan:
4906	case e1000_pchlan:
4907	case e1000_pch2lan:
4908		apme_mask = E1000_WUC_APME;
4909		adapter->has_amt = TRUE;
4910		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4911		break;
4912	default:
4913		e1000_read_nvm(&adapter->hw,
4914		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4915		break;
4916	}
4917	if (eeprom_data & apme_mask)
4918		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4919	/*
4920         * We have the eeprom settings, now apply the special cases
4921         * where the eeprom may be wrong or the board won't support
4922         * wake on lan on a particular port
4923	 */
4924	device_id = pci_get_device(dev);
4925        switch (device_id) {
4926	case E1000_DEV_ID_82571EB_FIBER:
4927		/* Wake events only supported on port A for dual fiber
4928		 * regardless of eeprom setting */
4929		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4930		    E1000_STATUS_FUNC_1)
4931			adapter->wol = 0;
4932		break;
4933	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4934	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4935	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4936                /* if quad port adapter, disable WoL on all but port A */
4937		if (global_quad_port_a != 0)
4938			adapter->wol = 0;
4939		/* Reset for multiple quad port adapters */
4940		if (++global_quad_port_a == 4)
4941			global_quad_port_a = 0;
4942                break;
4943	}
4944	return;
4945}
4946
4947
4948/*
4949 * Enable PCI Wake On Lan capability
4950 */
4951static void
4952em_enable_wakeup(device_t dev)
4953{
4954	struct adapter	*adapter = device_get_softc(dev);
4955	struct ifnet	*ifp = adapter->ifp;
4956	u32		pmc, ctrl, ctrl_ext, rctl;
4957	u16     	status;
4958
4959	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4960		return;
4961
4962	/* Advertise the wakeup capability */
4963	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4964	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4965	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4966	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967
4968	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4969	    (adapter->hw.mac.type == e1000_pchlan) ||
4970	    (adapter->hw.mac.type == e1000_ich9lan) ||
4971	    (adapter->hw.mac.type == e1000_ich10lan))
4972		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4973
4974	/* Keep the laser running on Fiber adapters */
4975	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4976	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4977		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4978		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4979		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4980	}
4981
4982	/*
4983	** Determine type of Wakeup: note that wol
4984	** is set with all bits on by default.
4985	*/
4986	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4987		adapter->wol &= ~E1000_WUFC_MAG;
4988
4989	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4990		adapter->wol &= ~E1000_WUFC_MC;
4991	else {
4992		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4993		rctl |= E1000_RCTL_MPE;
4994		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4995	}
4996
4997	if ((adapter->hw.mac.type == e1000_pchlan) ||
4998	    (adapter->hw.mac.type == e1000_pch2lan)) {
4999		if (em_enable_phy_wakeup(adapter))
5000			return;
5001	} else {
5002		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5003		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5004	}
5005
5006	if (adapter->hw.phy.type == e1000_phy_igp_3)
5007		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5008
5009        /* Request PME */
5010        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5011	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5012	if (ifp->if_capenable & IFCAP_WOL)
5013		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5014        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5015
5016	return;
5017}
5018
5019/*
5020** WOL in the newer chipset interfaces (pchlan)
5021** require thing to be copied into the phy
5022*/
5023static int
5024em_enable_phy_wakeup(struct adapter *adapter)
5025{
5026	struct e1000_hw *hw = &adapter->hw;
5027	u32 mreg, ret = 0;
5028	u16 preg;
5029
5030	/* copy MAC RARs to PHY RARs */
5031	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5032
5033	/* copy MAC MTA to PHY MTA */
5034	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5035		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5036		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5037		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5038		    (u16)((mreg >> 16) & 0xFFFF));
5039	}
5040
5041	/* configure PHY Rx Control register */
5042	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5043	mreg = E1000_READ_REG(hw, E1000_RCTL);
5044	if (mreg & E1000_RCTL_UPE)
5045		preg |= BM_RCTL_UPE;
5046	if (mreg & E1000_RCTL_MPE)
5047		preg |= BM_RCTL_MPE;
5048	preg &= ~(BM_RCTL_MO_MASK);
5049	if (mreg & E1000_RCTL_MO_3)
5050		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5051				<< BM_RCTL_MO_SHIFT);
5052	if (mreg & E1000_RCTL_BAM)
5053		preg |= BM_RCTL_BAM;
5054	if (mreg & E1000_RCTL_PMCF)
5055		preg |= BM_RCTL_PMCF;
5056	mreg = E1000_READ_REG(hw, E1000_CTRL);
5057	if (mreg & E1000_CTRL_RFCE)
5058		preg |= BM_RCTL_RFCE;
5059	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5060
5061	/* enable PHY wakeup in MAC register */
5062	E1000_WRITE_REG(hw, E1000_WUC,
5063	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5064	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5065
5066	/* configure and enable PHY wakeup in PHY registers */
5067	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5068	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5069
5070	/* activate PHY wakeup */
5071	ret = hw->phy.ops.acquire(hw);
5072	if (ret) {
5073		printf("Could not acquire PHY\n");
5074		return ret;
5075	}
5076	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5077	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5078	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5079	if (ret) {
5080		printf("Could not read PHY page 769\n");
5081		goto out;
5082	}
5083	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5084	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5085	if (ret)
5086		printf("Could not set PHY Host Wakeup bit\n");
5087out:
5088	hw->phy.ops.release(hw);
5089
5090	return ret;
5091}
5092
5093static void
5094em_led_func(void *arg, int onoff)
5095{
5096	struct adapter	*adapter = arg;
5097
5098	EM_CORE_LOCK(adapter);
5099	if (onoff) {
5100		e1000_setup_led(&adapter->hw);
5101		e1000_led_on(&adapter->hw);
5102	} else {
5103		e1000_led_off(&adapter->hw);
5104		e1000_cleanup_led(&adapter->hw);
5105	}
5106	EM_CORE_UNLOCK(adapter);
5107}
5108
5109/*
5110** Disable the L0S and L1 LINK states
5111*/
5112static void
5113em_disable_aspm(struct adapter *adapter)
5114{
5115	int		base, reg;
5116	u16		link_cap,link_ctrl;
5117	device_t	dev = adapter->dev;
5118
5119	switch (adapter->hw.mac.type) {
5120		case e1000_82573:
5121		case e1000_82574:
5122		case e1000_82583:
5123			break;
5124		default:
5125			return;
5126	}
5127	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5128		return;
5129	reg = base + PCIER_LINK_CAP;
5130	link_cap = pci_read_config(dev, reg, 2);
5131	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5132		return;
5133	reg = base + PCIER_LINK_CTL;
5134	link_ctrl = pci_read_config(dev, reg, 2);
5135	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5136	pci_write_config(dev, reg, link_ctrl, 2);
5137	return;
5138}
5139
5140/**********************************************************************
5141 *
5142 *  Update the board statistics counters.
5143 *
5144 **********************************************************************/
5145static void
5146em_update_stats_counters(struct adapter *adapter)
5147{
5148	struct ifnet   *ifp;
5149
5150	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5151	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5152		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5153		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5154	}
5155	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5156	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5157	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5158	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5159
5160	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5161	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5162	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5163	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5164	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5165	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5166	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5167	/*
5168	** For watchdog management we need to know if we have been
5169	** paused during the last interval, so capture that here.
5170	*/
5171	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5172	adapter->stats.xoffrxc += adapter->pause_frames;
5173	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5174	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5175	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5176	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5177	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5178	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5179	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5180	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5181	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5182	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5183	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5184	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5185
5186	/* For the 64-bit byte counters the low dword must be read first. */
5187	/* Both registers clear on the read of the high dword */
5188
5189	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5190	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5191	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5192	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5193
5194	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5195	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5196	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5197	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5198	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5199
5200	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5201	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5202
5203	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5204	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5205	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5206	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5207	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5208	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5209	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5210	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5211	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5212	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5213
5214	/* Interrupt Counts */
5215
5216	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5217	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5218	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5219	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5220	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5221	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5222	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5223	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5224	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5225
5226	if (adapter->hw.mac.type >= e1000_82543) {
5227		adapter->stats.algnerrc +=
5228		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5229		adapter->stats.rxerrc +=
5230		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5231		adapter->stats.tncrs +=
5232		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5233		adapter->stats.cexterr +=
5234		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5235		adapter->stats.tsctc +=
5236		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5237		adapter->stats.tsctfc +=
5238		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5239	}
5240	ifp = adapter->ifp;
5241
5242	ifp->if_collisions = adapter->stats.colc;
5243
5244	/* Rx Errors */
5245	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5246	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5247	    adapter->stats.ruc + adapter->stats.roc +
5248	    adapter->stats.mpc + adapter->stats.cexterr;
5249
5250	/* Tx Errors */
5251	ifp->if_oerrors = adapter->stats.ecol +
5252	    adapter->stats.latecol + adapter->watchdog_events;
5253}
5254
5255/* Export a single 32-bit register via a read-only sysctl. */
5256static int
5257em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5258{
5259	struct adapter *adapter;
5260	u_int val;
5261
5262	adapter = oidp->oid_arg1;
5263	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5264	return (sysctl_handle_int(oidp, &val, 0, req));
5265}
5266
5267/*
5268 * Add sysctl variables, one per statistic, to the system.
5269 */
5270static void
5271em_add_hw_stats(struct adapter *adapter)
5272{
5273	device_t dev = adapter->dev;
5274
5275	struct tx_ring *txr = adapter->tx_rings;
5276	struct rx_ring *rxr = adapter->rx_rings;
5277
5278	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5279	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5280	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5281	struct e1000_hw_stats *stats = &adapter->stats;
5282
5283	struct sysctl_oid *stat_node, *queue_node, *int_node;
5284	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5285
5286#define QUEUE_NAME_LEN 32
5287	char namebuf[QUEUE_NAME_LEN];
5288
5289	/* Driver Statistics */
5290	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5291			CTLFLAG_RD, &adapter->link_irq,
5292			"Link MSIX IRQ Handled");
5293	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5294			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5295			 "Std mbuf failed");
5296	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5297			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5298			 "Std mbuf cluster failed");
5299	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5300			CTLFLAG_RD, &adapter->dropped_pkts,
5301			"Driver dropped packets");
5302	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5303			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5304			"Driver tx dma failure in xmit");
5305	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5306			CTLFLAG_RD, &adapter->rx_overruns,
5307			"RX overruns");
5308	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5309			CTLFLAG_RD, &adapter->watchdog_events,
5310			"Watchdog timeouts");
5311
5312	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5313			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5314			em_sysctl_reg_handler, "IU",
5315			"Device Control Register");
5316	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5317			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5318			em_sysctl_reg_handler, "IU",
5319			"Receiver Control Register");
5320	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5321			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5322			"Flow Control High Watermark");
5323	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5324			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5325			"Flow Control Low Watermark");
5326
5327	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5328		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5329		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5330					    CTLFLAG_RD, NULL, "Queue Name");
5331		queue_list = SYSCTL_CHILDREN(queue_node);
5332
5333		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5334				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5335				E1000_TDH(txr->me),
5336				em_sysctl_reg_handler, "IU",
5337 				"Transmit Descriptor Head");
5338		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5339				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5340				E1000_TDT(txr->me),
5341				em_sysctl_reg_handler, "IU",
5342 				"Transmit Descriptor Tail");
5343		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5344				CTLFLAG_RD, &txr->tx_irq,
5345				"Queue MSI-X Transmit Interrupts");
5346		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5347				CTLFLAG_RD, &txr->no_desc_avail,
5348				"Queue No Descriptor Available");
5349
5350		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5351				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5352				E1000_RDH(rxr->me),
5353				em_sysctl_reg_handler, "IU",
5354				"Receive Descriptor Head");
5355		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5356				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5357				E1000_RDT(rxr->me),
5358				em_sysctl_reg_handler, "IU",
5359				"Receive Descriptor Tail");
5360		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5361				CTLFLAG_RD, &rxr->rx_irq,
5362				"Queue MSI-X Receive Interrupts");
5363	}
5364
5365	/* MAC stats get their own sub node */
5366
5367	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5368				    CTLFLAG_RD, NULL, "Statistics");
5369	stat_list = SYSCTL_CHILDREN(stat_node);
5370
5371	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5372			CTLFLAG_RD, &stats->ecol,
5373			"Excessive collisions");
5374	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5375			CTLFLAG_RD, &stats->scc,
5376			"Single collisions");
5377	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5378			CTLFLAG_RD, &stats->mcc,
5379			"Multiple collisions");
5380	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5381			CTLFLAG_RD, &stats->latecol,
5382			"Late collisions");
5383	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5384			CTLFLAG_RD, &stats->colc,
5385			"Collision Count");
5386	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5387			CTLFLAG_RD, &adapter->stats.symerrs,
5388			"Symbol Errors");
5389	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5390			CTLFLAG_RD, &adapter->stats.sec,
5391			"Sequence Errors");
5392	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5393			CTLFLAG_RD, &adapter->stats.dc,
5394			"Defer Count");
5395	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5396			CTLFLAG_RD, &adapter->stats.mpc,
5397			"Missed Packets");
5398	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5399			CTLFLAG_RD, &adapter->stats.rnbc,
5400			"Receive No Buffers");
5401	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5402			CTLFLAG_RD, &adapter->stats.ruc,
5403			"Receive Undersize");
5404	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5405			CTLFLAG_RD, &adapter->stats.rfc,
5406			"Fragmented Packets Received ");
5407	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5408			CTLFLAG_RD, &adapter->stats.roc,
5409			"Oversized Packets Received");
5410	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5411			CTLFLAG_RD, &adapter->stats.rjc,
5412			"Recevied Jabber");
5413	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5414			CTLFLAG_RD, &adapter->stats.rxerrc,
5415			"Receive Errors");
5416	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5417			CTLFLAG_RD, &adapter->stats.crcerrs,
5418			"CRC errors");
5419	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5420			CTLFLAG_RD, &adapter->stats.algnerrc,
5421			"Alignment Errors");
5422	/* On 82575 these are collision counts */
5423	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5424			CTLFLAG_RD, &adapter->stats.cexterr,
5425			"Collision/Carrier extension errors");
5426	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5427			CTLFLAG_RD, &adapter->stats.xonrxc,
5428			"XON Received");
5429	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5430			CTLFLAG_RD, &adapter->stats.xontxc,
5431			"XON Transmitted");
5432	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5433			CTLFLAG_RD, &adapter->stats.xoffrxc,
5434			"XOFF Received");
5435	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5436			CTLFLAG_RD, &adapter->stats.xofftxc,
5437			"XOFF Transmitted");
5438
5439	/* Packet Reception Stats */
5440	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5441			CTLFLAG_RD, &adapter->stats.tpr,
5442			"Total Packets Received ");
5443	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5444			CTLFLAG_RD, &adapter->stats.gprc,
5445			"Good Packets Received");
5446	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5447			CTLFLAG_RD, &adapter->stats.bprc,
5448			"Broadcast Packets Received");
5449	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5450			CTLFLAG_RD, &adapter->stats.mprc,
5451			"Multicast Packets Received");
5452	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5453			CTLFLAG_RD, &adapter->stats.prc64,
5454			"64 byte frames received ");
5455	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5456			CTLFLAG_RD, &adapter->stats.prc127,
5457			"65-127 byte frames received");
5458	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5459			CTLFLAG_RD, &adapter->stats.prc255,
5460			"128-255 byte frames received");
5461	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5462			CTLFLAG_RD, &adapter->stats.prc511,
5463			"256-511 byte frames received");
5464	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5465			CTLFLAG_RD, &adapter->stats.prc1023,
5466			"512-1023 byte frames received");
5467	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5468			CTLFLAG_RD, &adapter->stats.prc1522,
5469			"1023-1522 byte frames received");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5471 			CTLFLAG_RD, &adapter->stats.gorc,
5472 			"Good Octets Received");
5473
5474	/* Packet Transmission Stats */
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5476 			CTLFLAG_RD, &adapter->stats.gotc,
5477 			"Good Octets Transmitted");
5478	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5479			CTLFLAG_RD, &adapter->stats.tpt,
5480			"Total Packets Transmitted");
5481	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5482			CTLFLAG_RD, &adapter->stats.gptc,
5483			"Good Packets Transmitted");
5484	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5485			CTLFLAG_RD, &adapter->stats.bptc,
5486			"Broadcast Packets Transmitted");
5487	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5488			CTLFLAG_RD, &adapter->stats.mptc,
5489			"Multicast Packets Transmitted");
5490	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5491			CTLFLAG_RD, &adapter->stats.ptc64,
5492			"64 byte frames transmitted ");
5493	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5494			CTLFLAG_RD, &adapter->stats.ptc127,
5495			"65-127 byte frames transmitted");
5496	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5497			CTLFLAG_RD, &adapter->stats.ptc255,
5498			"128-255 byte frames transmitted");
5499	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5500			CTLFLAG_RD, &adapter->stats.ptc511,
5501			"256-511 byte frames transmitted");
5502	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5503			CTLFLAG_RD, &adapter->stats.ptc1023,
5504			"512-1023 byte frames transmitted");
5505	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5506			CTLFLAG_RD, &adapter->stats.ptc1522,
5507			"1024-1522 byte frames transmitted");
5508	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5509			CTLFLAG_RD, &adapter->stats.tsctc,
5510			"TSO Contexts Transmitted");
5511	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5512			CTLFLAG_RD, &adapter->stats.tsctfc,
5513			"TSO Contexts Failed");
5514
5515
5516	/* Interrupt Stats */
5517
5518	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5519				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5520	int_list = SYSCTL_CHILDREN(int_node);
5521
5522	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5523			CTLFLAG_RD, &adapter->stats.iac,
5524			"Interrupt Assertion Count");
5525
5526	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5527			CTLFLAG_RD, &adapter->stats.icrxptc,
5528			"Interrupt Cause Rx Pkt Timer Expire Count");
5529
5530	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5531			CTLFLAG_RD, &adapter->stats.icrxatc,
5532			"Interrupt Cause Rx Abs Timer Expire Count");
5533
5534	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5535			CTLFLAG_RD, &adapter->stats.ictxptc,
5536			"Interrupt Cause Tx Pkt Timer Expire Count");
5537
5538	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5539			CTLFLAG_RD, &adapter->stats.ictxatc,
5540			"Interrupt Cause Tx Abs Timer Expire Count");
5541
5542	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5543			CTLFLAG_RD, &adapter->stats.ictxqec,
5544			"Interrupt Cause Tx Queue Empty Count");
5545
5546	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5547			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5548			"Interrupt Cause Tx Queue Min Thresh Count");
5549
5550	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5551			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5552			"Interrupt Cause Rx Desc Min Thresh Count");
5553
5554	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5555			CTLFLAG_RD, &adapter->stats.icrxoc,
5556			"Interrupt Cause Receiver Overrun Count");
5557}
5558
5559/**********************************************************************
5560 *
5561 *  This routine provides a way to dump out the adapter eeprom,
5562 *  often a useful debug/service tool. This only dumps the first
5563 *  32 words, stuff that matters is in that extent.
5564 *
5565 **********************************************************************/
5566static int
5567em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5568{
5569	struct adapter *adapter = (struct adapter *)arg1;
5570	int error;
5571	int result;
5572
5573	result = -1;
5574	error = sysctl_handle_int(oidp, &result, 0, req);
5575
5576	if (error || !req->newptr)
5577		return (error);
5578
5579	/*
5580	 * This value will cause a hex dump of the
5581	 * first 32 16-bit words of the EEPROM to
5582	 * the screen.
5583	 */
5584	if (result == 1)
5585		em_print_nvm_info(adapter);
5586
5587	return (error);
5588}
5589
5590static void
5591em_print_nvm_info(struct adapter *adapter)
5592{
5593	u16	eeprom_data;
5594	int	i, j, row = 0;
5595
5596	/* Its a bit crude, but it gets the job done */
5597	printf("\nInterface EEPROM Dump:\n");
5598	printf("Offset\n0x0000  ");
5599	for (i = 0, j = 0; i < 32; i++, j++) {
5600		if (j == 8) { /* Make the offset block */
5601			j = 0; ++row;
5602			printf("\n0x00%x0  ",row);
5603		}
5604		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5605		printf("%04x ", eeprom_data);
5606	}
5607	printf("\n");
5608}
5609
5610static int
5611em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5612{
5613	struct em_int_delay_info *info;
5614	struct adapter *adapter;
5615	u32 regval;
5616	int error, usecs, ticks;
5617
5618	info = (struct em_int_delay_info *)arg1;
5619	usecs = info->value;
5620	error = sysctl_handle_int(oidp, &usecs, 0, req);
5621	if (error != 0 || req->newptr == NULL)
5622		return (error);
5623	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5624		return (EINVAL);
5625	info->value = usecs;
5626	ticks = EM_USECS_TO_TICKS(usecs);
5627	if (info->offset == E1000_ITR)	/* units are 256ns here */
5628		ticks *= 4;
5629
5630	adapter = info->adapter;
5631
5632	EM_CORE_LOCK(adapter);
5633	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5634	regval = (regval & ~0xffff) | (ticks & 0xffff);
5635	/* Handle a few special cases. */
5636	switch (info->offset) {
5637	case E1000_RDTR:
5638		break;
5639	case E1000_TIDV:
5640		if (ticks == 0) {
5641			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5642			/* Don't write 0 into the TIDV register. */
5643			regval++;
5644		} else
5645			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5646		break;
5647	}
5648	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5649	EM_CORE_UNLOCK(adapter);
5650	return (0);
5651}
5652
5653static void
5654em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5655	const char *description, struct em_int_delay_info *info,
5656	int offset, int value)
5657{
5658	info->adapter = adapter;
5659	info->offset = offset;
5660	info->value = value;
5661	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5662	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5663	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5664	    info, 0, em_sysctl_int_delay, "I", description);
5665}
5666
5667static void
5668em_set_sysctl_value(struct adapter *adapter, const char *name,
5669	const char *description, int *limit, int value)
5670{
5671	*limit = value;
5672	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5673	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5674	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5675}
5676
5677
5678/*
5679** Set flow control using sysctl:
5680** Flow control values:
5681**      0 - off
5682**      1 - rx pause
5683**      2 - tx pause
5684**      3 - full
5685*/
5686static int
5687em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5688{
5689        int		error;
5690	static int	input = 3; /* default is full */
5691        struct adapter	*adapter = (struct adapter *) arg1;
5692
5693        error = sysctl_handle_int(oidp, &input, 0, req);
5694
5695        if ((error) || (req->newptr == NULL))
5696                return (error);
5697
5698	if (input == adapter->fc) /* no change? */
5699		return (error);
5700
5701        switch (input) {
5702                case e1000_fc_rx_pause:
5703                case e1000_fc_tx_pause:
5704                case e1000_fc_full:
5705                case e1000_fc_none:
5706                        adapter->hw.fc.requested_mode = input;
5707			adapter->fc = input;
5708                        break;
5709                default:
5710			/* Do nothing */
5711			return (error);
5712        }
5713
5714        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5715        e1000_force_mac_fc(&adapter->hw);
5716        return (error);
5717}
5718
5719/*
5720** Manage Energy Efficient Ethernet:
5721** Control values:
5722**     0/1 - enabled/disabled
5723*/
5724static int
5725em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5726{
5727       struct adapter *adapter = (struct adapter *) arg1;
5728       int             error, value;
5729
5730       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5731       error = sysctl_handle_int(oidp, &value, 0, req);
5732       if (error || req->newptr == NULL)
5733               return (error);
5734       EM_CORE_LOCK(adapter);
5735       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5736       em_init_locked(adapter);
5737       EM_CORE_UNLOCK(adapter);
5738       return (0);
5739}
5740
5741static int
5742em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5743{
5744	struct adapter *adapter;
5745	int error;
5746	int result;
5747
5748	result = -1;
5749	error = sysctl_handle_int(oidp, &result, 0, req);
5750
5751	if (error || !req->newptr)
5752		return (error);
5753
5754	if (result == 1) {
5755		adapter = (struct adapter *)arg1;
5756		em_print_debug_info(adapter);
5757        }
5758
5759	return (error);
5760}
5761
5762/*
5763** This routine is meant to be fluid, add whatever is
5764** needed for debugging a problem.  -jfv
5765*/
5766static void
5767em_print_debug_info(struct adapter *adapter)
5768{
5769	device_t dev = adapter->dev;
5770	struct tx_ring *txr = adapter->tx_rings;
5771	struct rx_ring *rxr = adapter->rx_rings;
5772
5773	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5774		printf("Interface is RUNNING ");
5775	else
5776		printf("Interface is NOT RUNNING\n");
5777
5778	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5779		printf("and INACTIVE\n");
5780	else
5781		printf("and ACTIVE\n");
5782
5783	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5784	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5785	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5786	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5787	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5788	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5789	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5790	device_printf(dev, "TX descriptors avail = %d\n",
5791	    txr->tx_avail);
5792	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5793	    txr->no_desc_avail);
5794	device_printf(dev, "RX discarded packets = %ld\n",
5795	    rxr->rx_discarded);
5796	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5797	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5798}
5799