if_em.c revision 254382
1139804Simp/******************************************************************************
2885Swollman
3885Swollman  Copyright (c) 2001-2013, Intel Corporation
4885Swollman  All rights reserved.
5885Swollman
6885Swollman  Redistribution and use in source and binary forms, with or without
7885Swollman  modification, are permitted provided that the following conditions are met:
8885Swollman
9885Swollman   1. Redistributions of source code must retain the above copyright notice,
10885Swollman      this list of conditions and the following disclaimer.
11885Swollman
12885Swollman   2. Redistributions in binary form must reproduce the above copyright
13885Swollman      notice, this list of conditions and the following disclaimer in the
14885Swollman      documentation and/or other materials provided with the distribution.
15885Swollman
16885Swollman   3. Neither the name of the Intel Corporation nor the names of its
1710625Sdg      contributors may be used to endorse or promote products derived from
18885Swollman      this software without specific prior written permission.
19885Swollman
20885Swollman  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21885Swollman  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22885Swollman  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23885Swollman  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24885Swollman  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25885Swollman  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26885Swollman  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27116182Sobrien  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28116182Sobrien  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29116182Sobrien  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
302056Swollman  POSSIBILITY OF SUCH DAMAGE.
31142448Ssobomax
32142448Ssobomax******************************************************************************/
33182191Skib/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 254382 2013-08-15 20:33:17Z jfv $*/
3440435Speter
3511332Sswallace#include "opt_inet.h"
361549Srgrimes#include "opt_inet6.h"
372056Swollman
382056Swollman#ifdef HAVE_KERNEL_OPTION_HEADERS
39885Swollman#include "opt_device_polling.h"
40886Swollman#endif
41885Swollman
42886Swollman#include <sys/param.h>
43886Swollman#include <sys/systm.h>
44886Swollman#if __FreeBSD_version >= 800000
45886Swollman#include <sys/buf_ring.h>
46885Swollman#endif
47146731Sgad#include <sys/bus.h>
48146731Sgad#include <sys/endian.h>
49212965Salc#include <sys/kernel.h>
50885Swollman#include <sys/kthread.h>
51146731Sgad#include <sys/malloc.h>
52146731Sgad#include <sys/mbuf.h>
53146731Sgad#include <sys/module.h>
54146731Sgad#include <sys/rman.h>
55212965Salc#include <sys/socket.h>
56212965Salc#include <sys/sockio.h>
57212965Salc#include <sys/sysctl.h>
58212965Salc#include <sys/taskqueue.h>
59212965Salc#include <sys/eventhandler.h>
60212965Salc#include <machine/bus.h>
61146731Sgad#include <machine/resource.h>
62146731Sgad
63146731Sgad#include <net/bpf.h>
64146731Sgad#include <net/ethernet.h>
65146731Sgad#include <net/if.h>
66146731Sgad#include <net/if_arp.h>
67146731Sgad#include <net/if_dl.h>
68146731Sgad#include <net/if_media.h>
69146731Sgad
70146731Sgad#include <net/if_types.h>
71146731Sgad#include <net/if_vlan_var.h>
72146731Sgad
73146731Sgad#include <netinet/in_systm.h>
74146731Sgad#include <netinet/in.h>
75146731Sgad#include <netinet/if_ether.h>
76146731Sgad#include <netinet/ip.h>
77146731Sgad#include <netinet/ip6.h>
78146731Sgad#include <netinet/tcp.h>
79146731Sgad#include <netinet/udp.h>
80146731Sgad
81146731Sgad#include <machine/in_cksum.h>
82146731Sgad#include <dev/led/led.h>
83146731Sgad#include <dev/pci/pcivar.h>
84146731Sgad#include <dev/pci/pcireg.h>
85146731Sgad
86146731Sgad#include "e1000_api.h"
87146731Sgad#include "e1000_82571.h"
88146731Sgad#include "if_em.h"
89146731Sgad
90146731Sgad/*********************************************************************
91146731Sgad *  Set this to one to display debug statistics
92146731Sgad *********************************************************************/
93146731Sgadint	em_display_debug_stats = 0;
94146731Sgad
95146731Sgad/*********************************************************************
96147151Sgad *  Driver version:
97147151Sgad *********************************************************************/
98146731Sgadchar em_driver_version[] = "7.3.8";
9959663Sdillon
10012130Sdg/*********************************************************************
10112130Sdg *  PCI Device ID Table
102885Swollman *
10312130Sdg *  Used by probe to select devices to load on
104182191Skib *  Last field stores an index into e1000_strings
105141028Ssobomax *  Last entry must be all 0s
106212965Salc *
107142448Ssobomax *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108182191Skib *********************************************************************/
109885Swollman
110885Swollmanstatic em_vendor_info_t em_vendor_info_array[] =
111212965Salc{
112212965Salc	/* Intel(R) PRO/1000 Network Connection */
113885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117885Swollman						PCI_ANY_ID, PCI_ANY_ID, 0},
118272450Ssbruno	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119212965Salc						PCI_ANY_ID, PCI_ANY_ID, 0},
120885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121272450Ssbruno						PCI_ANY_ID, PCI_ANY_ID, 0},
122885Swollman	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123885Swollman						PCI_ANY_ID, PCI_ANY_ID, 0},
124142448Ssobomax	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125142448Ssobomax						PCI_ANY_ID, PCI_ANY_ID, 0},
126142448Ssobomax	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127142448Ssobomax						PCI_ANY_ID, PCI_ANY_ID, 0},
128142448Ssobomax	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129182371Sattilio	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130142448Ssobomax	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131142448Ssobomax	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132142448Ssobomax
133142448Ssobomax	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134146731Sgad	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135212965Salc	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136146731Sgad	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137212965Salc	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138146731Sgad						PCI_ANY_ID, PCI_ANY_ID, 0},
139146731Sgad	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140146731Sgad						PCI_ANY_ID, PCI_ANY_ID, 0},
141146731Sgad	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142146731Sgad						PCI_ANY_ID, PCI_ANY_ID, 0},
143146731Sgad	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144146731Sgad						PCI_ANY_ID, PCI_ANY_ID, 0},
145146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152146731Sgad	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154212965Salc	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156885Swollman	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157140992Ssobomax	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161146731Sgad	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162146731Sgad	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163146731Sgad	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164146731Sgad	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165146731Sgad	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166146731Sgad	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167146731Sgad	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168212965Salc	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169212965Salc	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170147479Sgad	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171146731Sgad	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172146731Sgad	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173182191Skib	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174182191Skib	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175182191Skib	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176182191Skib	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177182191Skib	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178182191Skib	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179182191Skib						PCI_ANY_ID, PCI_ANY_ID, 0},
180182191Skib	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181182191Skib						PCI_ANY_ID, PCI_ANY_ID, 0},
182182191Skib	/* required last entry */
183146731Sgad	{ 0, 0, 0, 0, 0}
184146731Sgad};
185146731Sgad
186146731Sgad/*********************************************************************
187146731Sgad *  Table of branding strings for all supported NICs.
188146731Sgad *********************************************************************/
189146731Sgad
190146731Sgadstatic char *em_strings[] = {
191146731Sgad	"Intel(R) PRO/1000 Network Connection"
192147479Sgad};
193146731Sgad
194182191Skib/*********************************************************************
195146731Sgad *  Function prototypes
196146731Sgad *********************************************************************/
197146731Sgadstatic int	em_probe(device_t);
198219352Skibstatic int	em_attach(device_t);
199182191Skibstatic int	em_detach(device_t);
200182191Skibstatic int	em_shutdown(device_t);
201146731Sgadstatic int	em_suspend(device_t);
202182191Skibstatic int	em_resume(device_t);
203146731Sgad#ifdef EM_MULTIQUEUE
204146731Sgadstatic int	em_mq_start(struct ifnet *, struct mbuf *);
205146731Sgadstatic int	em_mq_start_locked(struct ifnet *,
206146731Sgad		    struct tx_ring *, struct mbuf *);
207146731Sgadstatic void	em_qflush(struct ifnet *);
208146731Sgad#else
209146731Sgadstatic void	em_start(struct ifnet *);
210146731Sgadstatic void	em_start_locked(struct ifnet *, struct tx_ring *);
211146731Sgad#endif
212146731Sgadstatic int	em_ioctl(struct ifnet *, u_long, caddr_t);
213146731Sgadstatic void	em_init(void *);
214146731Sgadstatic void	em_init_locked(struct adapter *);
215146731Sgadstatic void	em_stop(void *);
216146731Sgadstatic void	em_media_status(struct ifnet *, struct ifmediareq *);
217146731Sgadstatic int	em_media_change(struct ifnet *);
218146731Sgadstatic void	em_identify_hardware(struct adapter *);
219146731Sgadstatic int	em_allocate_pci_resources(struct adapter *);
220146731Sgadstatic int	em_allocate_legacy(struct adapter *);
221146731Sgadstatic int	em_allocate_msix(struct adapter *);
222146731Sgadstatic int	em_allocate_queues(struct adapter *);
223146731Sgadstatic int	em_setup_msix(struct adapter *);
224146731Sgadstatic void	em_free_pci_resources(struct adapter *);
225146731Sgadstatic void	em_local_timer(void *);
226146731Sgadstatic void	em_reset(struct adapter *);
227210475Salcstatic int	em_setup_interface(device_t, struct adapter *);
228210475Salc
229146731Sgadstatic void	em_setup_transmit_structures(struct adapter *);
230147479Sgadstatic void	em_initialize_transmit_unit(struct adapter *);
231146731Sgadstatic int	em_allocate_transmit_buffers(struct tx_ring *);
232210475Salcstatic void	em_free_transmit_structures(struct adapter *);
233210475Salcstatic void	em_free_transmit_buffers(struct tx_ring *);
234146731Sgad
235146731Sgadstatic int	em_setup_receive_structures(struct adapter *);
236146731Sgadstatic int	em_allocate_receive_buffers(struct rx_ring *);
237146731Sgadstatic void	em_initialize_receive_unit(struct adapter *);
238146731Sgadstatic void	em_free_receive_structures(struct adapter *);
239140992Ssobomaxstatic void	em_free_receive_buffers(struct rx_ring *);
240140992Ssobomax
241140992Ssobomaxstatic void	em_enable_intr(struct adapter *);
242140992Ssobomaxstatic void	em_disable_intr(struct adapter *);
243210475Salcstatic void	em_update_stats_counters(struct adapter *);
244210475Salcstatic void	em_add_hw_stats(struct adapter *adapter);
245885Swollmanstatic void	em_txeof(struct tx_ring *);
246140992Ssobomaxstatic bool	em_rxeof(struct rx_ring *, int, int *);
247210545Salc#ifndef __NO_STRICT_ALIGNMENT
248140992Ssobomaxstatic int	em_fixup_rx(struct rx_ring *);
249182191Skib#endif
250182191Skibstatic void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251140992Ssobomaxstatic void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252885Swollman		    struct ip *, u32 *, u32 *);
253886Swollmanstatic void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254886Swollman		    struct tcphdr *, u32 *, u32 *);
255886Swollmanstatic void	em_set_promisc(struct adapter *);
256886Swollmanstatic void	em_disable_promisc(struct adapter *);
25743402Sdillonstatic void	em_set_multi(struct adapter *);
25840435Speterstatic void	em_update_link_status(struct adapter *);
259static void	em_refresh_mbufs(struct rx_ring *, int);
260static void	em_register_vlan(void *, struct ifnet *, u16);
261static void	em_unregister_vlan(void *, struct ifnet *, u16);
262static void	em_setup_vlan_hw_support(struct adapter *);
263static int	em_xmit(struct tx_ring *, struct mbuf **);
264static int	em_dma_malloc(struct adapter *, bus_size_t,
265		    struct em_dma_alloc *, int);
266static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268static void	em_print_nvm_info(struct adapter *);
269static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270static void	em_print_debug_info(struct adapter *);
271static int 	em_is_valid_ether_addr(u8 *);
272static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274		    const char *, struct em_int_delay_info *, int, int);
275/* Management and WOL Support */
276static void	em_init_manageability(struct adapter *);
277static void	em_release_manageability(struct adapter *);
278static void     em_get_hw_control(struct adapter *);
279static void     em_release_hw_control(struct adapter *);
280static void	em_get_wakeup(device_t);
281static void     em_enable_wakeup(device_t);
282static int	em_enable_phy_wakeup(struct adapter *);
283static void	em_led_func(void *, int);
284static void	em_disable_aspm(struct adapter *);
285
286static int	em_irq_fast(void *);
287
288/* MSIX handlers */
289static void	em_msix_tx(void *);
290static void	em_msix_rx(void *);
291static void	em_msix_link(void *);
292static void	em_handle_tx(void *context, int pending);
293static void	em_handle_rx(void *context, int pending);
294static void	em_handle_link(void *context, int pending);
295
296static void	em_set_sysctl_value(struct adapter *, const char *,
297		    const char *, int *, int);
298static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301static __inline void em_rx_discard(struct rx_ring *, int);
302
303#ifdef DEVICE_POLLING
304static poll_handler_t em_poll;
305#endif /* POLLING */
306
307/*********************************************************************
308 *  FreeBSD Device Interface Entry Points
309 *********************************************************************/
310
311static device_method_t em_methods[] = {
312	/* Device interface */
313	DEVMETHOD(device_probe, em_probe),
314	DEVMETHOD(device_attach, em_attach),
315	DEVMETHOD(device_detach, em_detach),
316	DEVMETHOD(device_shutdown, em_shutdown),
317	DEVMETHOD(device_suspend, em_suspend),
318	DEVMETHOD(device_resume, em_resume),
319	DEVMETHOD_END
320};
321
322static driver_t em_driver = {
323	"em", em_methods, sizeof(struct adapter),
324};
325
326devclass_t em_devclass;
327DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328MODULE_DEPEND(em, pci, 1, 1, 1);
329MODULE_DEPEND(em, ether, 1, 1, 1);
330
331/*********************************************************************
332 *  Tunable default values.
333 *********************************************************************/
334
335#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337#define M_TSO_LEN			66
338
339#define MAX_INTS_PER_SEC	8000
340#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341
342/* Allow common code without TSO */
343#ifndef CSUM_TSO
344#define CSUM_TSO	0
345#endif
346
347static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354    0, "Default transmit interrupt delay in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356    0, "Default receive interrupt delay in usecs");
357
358static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363    &em_tx_abs_int_delay_dflt, 0,
364    "Default transmit interrupt delay limit in usecs");
365SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366    &em_rx_abs_int_delay_dflt, 0,
367    "Default receive interrupt delay limit in usecs");
368
369static int em_rxd = EM_DEFAULT_RXD;
370static int em_txd = EM_DEFAULT_TXD;
371TUNABLE_INT("hw.em.rxd", &em_rxd);
372TUNABLE_INT("hw.em.txd", &em_txd);
373SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374    "Number of receive descriptors per queue");
375SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376    "Number of transmit descriptors per queue");
377
378static int em_smart_pwr_down = FALSE;
379TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381    0, "Set to true to leave smart power down enabled on newer adapters");
382
383/* Controls whether promiscuous also shows bad packets */
384static int em_debug_sbp = FALSE;
385TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387    "Show bad packets in promiscuous mode");
388
389static int em_enable_msix = TRUE;
390TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392    "Enable MSI-X interrupts");
393
394/* How many packets rxeof tries to clean at a time */
395static int em_rx_process_limit = 100;
396TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &em_rx_process_limit, 0,
399    "Maximum number of received packets to process "
400    "at a time, -1 means unlimited");
401
402/* Energy efficient ethernet - default to OFF */
403static int eee_setting = 1;
404TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406    "Enable Energy Efficient Ethernet");
407
408/* Global used in WOL setup with multiport cards */
409static int global_quad_port_a = 0;
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_em_netmap.h>
413#endif /* DEV_NETMAP */
414
415/*********************************************************************
416 *  Device identification routine
417 *
418 *  em_probe determines if the driver should be loaded on
419 *  adapter based on PCI vendor/device id of the adapter.
420 *
421 *  return BUS_PROBE_DEFAULT on success, positive on failure
422 *********************************************************************/
423
424static int
425em_probe(device_t dev)
426{
427	char		adapter_name[60];
428	u16		pci_vendor_id = 0;
429	u16		pci_device_id = 0;
430	u16		pci_subvendor_id = 0;
431	u16		pci_subdevice_id = 0;
432	em_vendor_info_t *ent;
433
434	INIT_DEBUGOUT("em_probe: begin");
435
436	pci_vendor_id = pci_get_vendor(dev);
437	if (pci_vendor_id != EM_VENDOR_ID)
438		return (ENXIO);
439
440	pci_device_id = pci_get_device(dev);
441	pci_subvendor_id = pci_get_subvendor(dev);
442	pci_subdevice_id = pci_get_subdevice(dev);
443
444	ent = em_vendor_info_array;
445	while (ent->vendor_id != 0) {
446		if ((pci_vendor_id == ent->vendor_id) &&
447		    (pci_device_id == ent->device_id) &&
448
449		    ((pci_subvendor_id == ent->subvendor_id) ||
450		    (ent->subvendor_id == PCI_ANY_ID)) &&
451
452		    ((pci_subdevice_id == ent->subdevice_id) ||
453		    (ent->subdevice_id == PCI_ANY_ID))) {
454			sprintf(adapter_name, "%s %s",
455				em_strings[ent->index],
456				em_driver_version);
457			device_set_desc_copy(dev, adapter_name);
458			return (BUS_PROBE_DEFAULT);
459		}
460		ent++;
461	}
462
463	return (ENXIO);
464}
465
466/*********************************************************************
467 *  Device initialization routine
468 *
469 *  The attach entry point is called when the driver is being loaded.
470 *  This routine identifies the type of hardware, allocates all resources
471 *  and initializes the hardware.
472 *
473 *  return 0 on success, positive on failure
474 *********************************************************************/
475
476static int
477em_attach(device_t dev)
478{
479	struct adapter	*adapter;
480	struct e1000_hw	*hw;
481	int		error = 0;
482
483	INIT_DEBUGOUT("em_attach: begin");
484
485	if (resource_disabled("em", device_get_unit(dev))) {
486		device_printf(dev, "Disabled by device hint\n");
487		return (ENXIO);
488	}
489
490	adapter = device_get_softc(dev);
491	adapter->dev = adapter->osdep.dev = dev;
492	hw = &adapter->hw;
493	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495	/* SYSCTL stuff */
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_sysctl_nvm_info, "I", "NVM Information");
500
501	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504	    em_sysctl_debug_info, "I", "Debug Information");
505
506	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509	    em_set_flowcntl, "I", "Flow Control");
510
511	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513	/* Determine hardware and mac info */
514	em_identify_hardware(adapter);
515
516	/* Setup PCI resources */
517	if (em_allocate_pci_resources(adapter)) {
518		device_printf(dev, "Allocation of PCI resources failed\n");
519		error = ENXIO;
520		goto err_pci;
521	}
522
523	/*
524	** For ICH8 and family we need to
525	** map the flash memory, and this
526	** must happen after the MAC is
527	** identified
528	*/
529	if ((hw->mac.type == e1000_ich8lan) ||
530	    (hw->mac.type == e1000_ich9lan) ||
531	    (hw->mac.type == e1000_ich10lan) ||
532	    (hw->mac.type == e1000_pchlan) ||
533	    (hw->mac.type == e1000_pch2lan) ||
534	    (hw->mac.type == e1000_pch_lpt)) {
535		int rid = EM_BAR_TYPE_FLASH;
536		adapter->flash = bus_alloc_resource_any(dev,
537		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538		if (adapter->flash == NULL) {
539			device_printf(dev, "Mapping of Flash failed\n");
540			error = ENXIO;
541			goto err_pci;
542		}
543		/* This is used in the shared code */
544		hw->flash_address = (u8 *)adapter->flash;
545		adapter->osdep.flash_bus_space_tag =
546		    rman_get_bustag(adapter->flash);
547		adapter->osdep.flash_bus_space_handle =
548		    rman_get_bushandle(adapter->flash);
549	}
550
551	/* Do Shared Code initialization */
552	if (e1000_setup_init_funcs(hw, TRUE)) {
553		device_printf(dev, "Setup of Shared code failed\n");
554		error = ENXIO;
555		goto err_pci;
556	}
557
558	e1000_get_bus_info(hw);
559
560	/* Set up some sysctls for the tunable interrupt delays */
561	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568	    "receive interrupt delay limit in usecs",
569	    &adapter->rx_abs_int_delay,
570	    E1000_REGISTER(hw, E1000_RADV),
571	    em_rx_abs_int_delay_dflt);
572	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573	    "transmit interrupt delay limit in usecs",
574	    &adapter->tx_abs_int_delay,
575	    E1000_REGISTER(hw, E1000_TADV),
576	    em_tx_abs_int_delay_dflt);
577	em_add_int_delay_sysctl(adapter, "itr",
578	    "interrupt delay limit in usecs/4",
579	    &adapter->tx_itr,
580	    E1000_REGISTER(hw, E1000_ITR),
581	    DEFAULT_ITR);
582
583	/* Sysctl for limiting the amount of work done in the taskqueue */
584	em_set_sysctl_value(adapter, "rx_processing_limit",
585	    "max number of rx packets to process", &adapter->rx_process_limit,
586	    em_rx_process_limit);
587
588	/*
589	 * Validate number of transmit and receive descriptors. It
590	 * must not exceed hardware maximum, and must be multiple
591	 * of E1000_DBA_ALIGN.
592	 */
593	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596		    EM_DEFAULT_TXD, em_txd);
597		adapter->num_tx_desc = EM_DEFAULT_TXD;
598	} else
599		adapter->num_tx_desc = em_txd;
600
601	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604		    EM_DEFAULT_RXD, em_rxd);
605		adapter->num_rx_desc = EM_DEFAULT_RXD;
606	} else
607		adapter->num_rx_desc = em_rxd;
608
609	hw->mac.autoneg = DO_AUTO_NEG;
610	hw->phy.autoneg_wait_to_complete = FALSE;
611	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613	/* Copper options */
614	if (hw->phy.media_type == e1000_media_type_copper) {
615		hw->phy.mdix = AUTO_ALL_MODES;
616		hw->phy.disable_polarity_correction = FALSE;
617		hw->phy.ms_type = EM_MASTER_SLAVE;
618	}
619
620	/*
621	 * Set the frame limits assuming
622	 * standard ethernet sized frames.
623	 */
624	adapter->hw.mac.max_frame_size =
625	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627	/*
628	 * This controls when hardware reports transmit completion
629	 * status.
630	 */
631	hw->mac.report_tx_early = 1;
632
633	/*
634	** Get queue/ring memory
635	*/
636	if (em_allocate_queues(adapter)) {
637		error = ENOMEM;
638		goto err_pci;
639	}
640
641	/* Allocate multicast array memory. */
642	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644	if (adapter->mta == NULL) {
645		device_printf(dev, "Can not allocate multicast setup array\n");
646		error = ENOMEM;
647		goto err_late;
648	}
649
650	/* Check SOL/IDER usage */
651	if (e1000_check_reset_block(hw))
652		device_printf(dev, "PHY reset is blocked"
653		    " due to SOL/IDER session.\n");
654
655	/* Sysctl for setting Energy Efficient Ethernet */
656	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660	    adapter, 0, em_sysctl_eee, "I",
661	    "Disable Energy Efficient Ethernet");
662
663	/*
664	** Start from a known state, this is
665	** important in reading the nvm and
666	** mac from that.
667	*/
668	e1000_reset_hw(hw);
669
670
671	/* Make sure we have a good EEPROM before we read from it */
672	if (e1000_validate_nvm_checksum(hw) < 0) {
673		/*
674		** Some PCI-E parts fail the first check due to
675		** the link being in sleep state, call it again,
676		** if it fails a second time its a real issue.
677		*/
678		if (e1000_validate_nvm_checksum(hw) < 0) {
679			device_printf(dev,
680			    "The EEPROM Checksum Is Not Valid\n");
681			error = EIO;
682			goto err_late;
683		}
684	}
685
686	/* Copy the permanent MAC address out of the EEPROM */
687	if (e1000_read_mac_addr(hw) < 0) {
688		device_printf(dev, "EEPROM read error while reading MAC"
689		    " address\n");
690		error = EIO;
691		goto err_late;
692	}
693
694	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695		device_printf(dev, "Invalid MAC address\n");
696		error = EIO;
697		goto err_late;
698	}
699
700	/*
701	**  Do interrupt configuration
702	*/
703	if (adapter->msix > 1) /* Do MSIX */
704		error = em_allocate_msix(adapter);
705	else  /* MSI or Legacy */
706		error = em_allocate_legacy(adapter);
707	if (error)
708		goto err_late;
709
710	/*
711	 * Get Wake-on-Lan and Management info for later use
712	 */
713	em_get_wakeup(dev);
714
715	/* Setup OS specific network interface */
716	if (em_setup_interface(dev, adapter) != 0)
717		goto err_late;
718
719	em_reset(adapter);
720
721	/* Initialize statistics */
722	em_update_stats_counters(adapter);
723
724	hw->mac.get_link_status = 1;
725	em_update_link_status(adapter);
726
727	/* Register for VLAN events */
728	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732
733	em_add_hw_stats(adapter);
734
735	/* Non-AMT based hardware can now take control from firmware */
736	if (adapter->has_manage && !adapter->has_amt)
737		em_get_hw_control(adapter);
738
739	/* Tell the stack that the interface is not active */
740	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743	adapter->led_dev = led_create(em_led_func, adapter,
744	    device_get_nameunit(dev));
745#ifdef DEV_NETMAP
746	em_netmap_attach(adapter);
747#endif /* DEV_NETMAP */
748
749	INIT_DEBUGOUT("em_attach: end");
750
751	return (0);
752
753err_late:
754	em_free_transmit_structures(adapter);
755	em_free_receive_structures(adapter);
756	em_release_hw_control(adapter);
757	if (adapter->ifp != NULL)
758		if_free(adapter->ifp);
759err_pci:
760	em_free_pci_resources(adapter);
761	free(adapter->mta, M_DEVBUF);
762	EM_CORE_LOCK_DESTROY(adapter);
763
764	return (error);
765}
766
767/*********************************************************************
768 *  Device removal routine
769 *
770 *  The detach entry point is called when the driver is being removed.
771 *  This routine stops the adapter and deallocates all the resources
772 *  that were allocated for driver operation.
773 *
774 *  return 0 on success, positive on failure
775 *********************************************************************/
776
777static int
778em_detach(device_t dev)
779{
780	struct adapter	*adapter = device_get_softc(dev);
781	struct ifnet	*ifp = adapter->ifp;
782
783	INIT_DEBUGOUT("em_detach: begin");
784
785	/* Make sure VLANS are not using driver */
786	if (adapter->ifp->if_vlantrunk != NULL) {
787		device_printf(dev,"Vlan in use, detach first\n");
788		return (EBUSY);
789	}
790
791#ifdef DEVICE_POLLING
792	if (ifp->if_capenable & IFCAP_POLLING)
793		ether_poll_deregister(ifp);
794#endif
795
796	if (adapter->led_dev != NULL)
797		led_destroy(adapter->led_dev);
798
799	EM_CORE_LOCK(adapter);
800	adapter->in_detach = 1;
801	em_stop(adapter);
802	EM_CORE_UNLOCK(adapter);
803	EM_CORE_LOCK_DESTROY(adapter);
804
805	e1000_phy_hw_reset(&adapter->hw);
806
807	em_release_manageability(adapter);
808	em_release_hw_control(adapter);
809
810	/* Unregister VLAN events */
811	if (adapter->vlan_attach != NULL)
812		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813	if (adapter->vlan_detach != NULL)
814		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815
816	ether_ifdetach(adapter->ifp);
817	callout_drain(&adapter->timer);
818
819#ifdef DEV_NETMAP
820	netmap_detach(ifp);
821#endif /* DEV_NETMAP */
822
823	em_free_pci_resources(adapter);
824	bus_generic_detach(dev);
825	if_free(ifp);
826
827	em_free_transmit_structures(adapter);
828	em_free_receive_structures(adapter);
829
830	em_release_hw_control(adapter);
831	free(adapter->mta, M_DEVBUF);
832
833	return (0);
834}
835
836/*********************************************************************
837 *
838 *  Shutdown entry point
839 *
840 **********************************************************************/
841
842static int
843em_shutdown(device_t dev)
844{
845	return em_suspend(dev);
846}
847
848/*
849 * Suspend/resume device methods.
850 */
851static int
852em_suspend(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855
856	EM_CORE_LOCK(adapter);
857
858        em_release_manageability(adapter);
859	em_release_hw_control(adapter);
860	em_enable_wakeup(dev);
861
862	EM_CORE_UNLOCK(adapter);
863
864	return bus_generic_suspend(dev);
865}
866
867static int
868em_resume(device_t dev)
869{
870	struct adapter *adapter = device_get_softc(dev);
871	struct tx_ring	*txr = adapter->tx_rings;
872	struct ifnet *ifp = adapter->ifp;
873
874	EM_CORE_LOCK(adapter);
875	if (adapter->hw.mac.type == e1000_pch2lan)
876		e1000_resume_workarounds_pchlan(&adapter->hw);
877	em_init_locked(adapter);
878	em_init_manageability(adapter);
879
880	if ((ifp->if_flags & IFF_UP) &&
881	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883			EM_TX_LOCK(txr);
884#ifdef EM_MULTIQUEUE
885			if (!drbr_empty(ifp, txr->br))
886				em_mq_start_locked(ifp, txr, NULL);
887#else
888			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889				em_start_locked(ifp, txr);
890#endif
891			EM_TX_UNLOCK(txr);
892		}
893	}
894	EM_CORE_UNLOCK(adapter);
895
896	return bus_generic_resume(dev);
897}
898
899
900#ifdef EM_MULTIQUEUE
901/*********************************************************************
902 *  Multiqueue Transmit routines
903 *
904 *  em_mq_start is called by the stack to initiate a transmit.
905 *  however, if busy the driver can queue the request rather
906 *  than do an immediate send. It is this that is an advantage
907 *  in this driver, rather than also having multiple tx queues.
908 **********************************************************************/
909static int
910em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911{
912	struct adapter  *adapter = txr->adapter;
913        struct mbuf     *next;
914        int             err = 0, enq = 0;
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m != NULL) {
925		err = drbr_enqueue(ifp, txr->br, m);
926		if (err)
927			return (err);
928	}
929
930	/* Process the queue */
931	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932		if ((err = em_xmit(txr, &next)) != 0) {
933			if (next == NULL)
934				drbr_advance(ifp, txr->br);
935			else
936				drbr_putback(ifp, txr->br, next);
937			break;
938		}
939		drbr_advance(ifp, txr->br);
940		enq++;
941		ifp->if_obytes += next->m_pkthdr.len;
942		if (next->m_flags & M_MCAST)
943			ifp->if_omcasts++;
944		ETHER_BPF_MTAP(ifp, next);
945		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                        break;
947	}
948
949	if (enq > 0) {
950                /* Set the watchdog */
951                txr->queue_status = EM_QUEUE_WORKING;
952		txr->watchdog_time = ticks;
953	}
954
955	if (txr->tx_avail < EM_MAX_SCATTER)
956		em_txeof(txr);
957	if (txr->tx_avail < EM_MAX_SCATTER)
958		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959	return (err);
960}
961
962/*
963** Multiqueue capable stack interface
964*/
965static int
966em_mq_start(struct ifnet *ifp, struct mbuf *m)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970	int 		error;
971
972	if (EM_TX_TRYLOCK(txr)) {
973		error = em_mq_start_locked(ifp, txr, m);
974		EM_TX_UNLOCK(txr);
975	} else
976		error = drbr_enqueue(ifp, txr->br, m);
977
978	return (error);
979}
980
981/*
982** Flush all ring buffers
983*/
984static void
985em_qflush(struct ifnet *ifp)
986{
987	struct adapter  *adapter = ifp->if_softc;
988	struct tx_ring  *txr = adapter->tx_rings;
989	struct mbuf     *m;
990
991	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992		EM_TX_LOCK(txr);
993		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994			m_freem(m);
995		EM_TX_UNLOCK(txr);
996	}
997	if_qflush(ifp);
998}
999#else  /* !EM_MULTIQUEUE */
1000
1001static void
1002em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003{
1004	struct adapter	*adapter = ifp->if_softc;
1005	struct mbuf	*m_head;
1006
1007	EM_TX_LOCK_ASSERT(txr);
1008
1009	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010	    IFF_DRV_RUNNING)
1011		return;
1012
1013	if (!adapter->link_active)
1014		return;
1015
1016	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017        	/* Call cleanup if number of TX descriptors low */
1018		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019			em_txeof(txr);
1020		if (txr->tx_avail < EM_MAX_SCATTER) {
1021			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022			break;
1023		}
1024                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025		if (m_head == NULL)
1026			break;
1027		/*
1028		 *  Encapsulation can modify our pointer, and or make it
1029		 *  NULL on failure.  In that event, we can't requeue.
1030		 */
1031		if (em_xmit(txr, &m_head)) {
1032			if (m_head == NULL)
1033				break;
1034			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035			break;
1036		}
1037
1038		/* Send a copy of the frame to the BPF listener */
1039		ETHER_BPF_MTAP(ifp, m_head);
1040
1041		/* Set timeout in case hardware has problems transmitting. */
1042		txr->watchdog_time = ticks;
1043                txr->queue_status = EM_QUEUE_WORKING;
1044	}
1045
1046	return;
1047}
1048
1049static void
1050em_start(struct ifnet *ifp)
1051{
1052	struct adapter	*adapter = ifp->if_softc;
1053	struct tx_ring	*txr = adapter->tx_rings;
1054
1055	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056		EM_TX_LOCK(txr);
1057		em_start_locked(ifp, txr);
1058		EM_TX_UNLOCK(txr);
1059	}
1060	return;
1061}
1062#endif /* EM_MULTIQUEUE */
1063
1064/*********************************************************************
1065 *  Ioctl entry point
1066 *
1067 *  em_ioctl is called when the user wants to configure the
1068 *  interface.
1069 *
1070 *  return 0 on success, positive on failure
1071 **********************************************************************/
1072
1073static int
1074em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075{
1076	struct adapter	*adapter = ifp->if_softc;
1077	struct ifreq	*ifr = (struct ifreq *)data;
1078#if defined(INET) || defined(INET6)
1079	struct ifaddr	*ifa = (struct ifaddr *)data;
1080#endif
1081	bool		avoid_reset = FALSE;
1082	int		error = 0;
1083
1084	if (adapter->in_detach)
1085		return (error);
1086
1087	switch (command) {
1088	case SIOCSIFADDR:
1089#ifdef INET
1090		if (ifa->ifa_addr->sa_family == AF_INET)
1091			avoid_reset = TRUE;
1092#endif
1093#ifdef INET6
1094		if (ifa->ifa_addr->sa_family == AF_INET6)
1095			avoid_reset = TRUE;
1096#endif
1097		/*
1098		** Calling init results in link renegotiation,
1099		** so we avoid doing it when possible.
1100		*/
1101		if (avoid_reset) {
1102			ifp->if_flags |= IFF_UP;
1103			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104				em_init(adapter);
1105#ifdef INET
1106			if (!(ifp->if_flags & IFF_NOARP))
1107				arp_ifinit(ifp, ifa);
1108#endif
1109		} else
1110			error = ether_ioctl(ifp, command, data);
1111		break;
1112	case SIOCSIFMTU:
1113	    {
1114		int max_frame_size;
1115
1116		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118		EM_CORE_LOCK(adapter);
1119		switch (adapter->hw.mac.type) {
1120		case e1000_82571:
1121		case e1000_82572:
1122		case e1000_ich9lan:
1123		case e1000_ich10lan:
1124		case e1000_pch2lan:
1125		case e1000_pch_lpt:
1126		case e1000_82574:
1127		case e1000_82583:
1128		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129			max_frame_size = 9234;
1130			break;
1131		case e1000_pchlan:
1132			max_frame_size = 4096;
1133			break;
1134			/* Adapters that do not support jumbo frames */
1135		case e1000_ich8lan:
1136			max_frame_size = ETHER_MAX_LEN;
1137			break;
1138		default:
1139			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140		}
1141		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142		    ETHER_CRC_LEN) {
1143			EM_CORE_UNLOCK(adapter);
1144			error = EINVAL;
1145			break;
1146		}
1147
1148		ifp->if_mtu = ifr->ifr_mtu;
1149		adapter->hw.mac.max_frame_size =
1150		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151		em_init_locked(adapter);
1152		EM_CORE_UNLOCK(adapter);
1153		break;
1154	    }
1155	case SIOCSIFFLAGS:
1156		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157		    SIOCSIFFLAGS (Set Interface Flags)");
1158		EM_CORE_LOCK(adapter);
1159		if (ifp->if_flags & IFF_UP) {
1160			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161				if ((ifp->if_flags ^ adapter->if_flags) &
1162				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163					em_disable_promisc(adapter);
1164					em_set_promisc(adapter);
1165				}
1166			} else
1167				em_init_locked(adapter);
1168		} else
1169			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170				em_stop(adapter);
1171		adapter->if_flags = ifp->if_flags;
1172		EM_CORE_UNLOCK(adapter);
1173		break;
1174	case SIOCADDMULTI:
1175	case SIOCDELMULTI:
1176		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178			EM_CORE_LOCK(adapter);
1179			em_disable_intr(adapter);
1180			em_set_multi(adapter);
1181#ifdef DEVICE_POLLING
1182			if (!(ifp->if_capenable & IFCAP_POLLING))
1183#endif
1184				em_enable_intr(adapter);
1185			EM_CORE_UNLOCK(adapter);
1186		}
1187		break;
1188	case SIOCSIFMEDIA:
1189		/* Check SOL/IDER usage */
1190		EM_CORE_LOCK(adapter);
1191		if (e1000_check_reset_block(&adapter->hw)) {
1192			EM_CORE_UNLOCK(adapter);
1193			device_printf(adapter->dev, "Media change is"
1194			    " blocked due to SOL/IDER session.\n");
1195			break;
1196		}
1197		EM_CORE_UNLOCK(adapter);
1198		/* falls thru */
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(em_poll, ifp);
1215				if (error)
1216					return (error);
1217				EM_CORE_LOCK(adapter);
1218				em_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				EM_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				EM_CORE_LOCK(adapter);
1225				em_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				EM_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_VLAN_HWTAGGING) {
1240			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWFILTER) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWTSO) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249			reinit = 1;
1250		}
1251		if ((mask & IFCAP_WOL) &&
1252		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253			if (mask & IFCAP_WOL_MCAST)
1254				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255			if (mask & IFCAP_WOL_MAGIC)
1256				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257		}
1258		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259			em_init(adapter);
1260		VLAN_CAPABILITIES(ifp);
1261		break;
1262	    }
1263
1264	default:
1265		error = ether_ioctl(ifp, command, data);
1266		break;
1267	}
1268
1269	return (error);
1270}
1271
1272
1273/*********************************************************************
1274 *  Init entry point
1275 *
1276 *  This routine is used in two ways. It is used by the stack as
1277 *  init entry point in network interface structure. It is also used
1278 *  by the driver as a hw/sw initialization routine to get to a
1279 *  consistent state.
1280 *
1281 *  return 0 on success, positive on failure
1282 **********************************************************************/
1283
1284static void
1285em_init_locked(struct adapter *adapter)
1286{
1287	struct ifnet	*ifp = adapter->ifp;
1288	device_t	dev = adapter->dev;
1289
1290	INIT_DEBUGOUT("em_init: begin");
1291
1292	EM_CORE_LOCK_ASSERT(adapter);
1293
1294	em_disable_intr(adapter);
1295	callout_stop(&adapter->timer);
1296
1297	/* Get the latest mac address, User can use a LAA */
1298        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299              ETHER_ADDR_LEN);
1300
1301	/* Put the address into the Receive Address Array */
1302	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304	/*
1305	 * With the 82571 adapter, RAR[0] may be overwritten
1306	 * when the other port is reset, we make a duplicate
1307	 * in RAR[14] for that eventuality, this assures
1308	 * the interface continues to function.
1309	 */
1310	if (adapter->hw.mac.type == e1000_82571) {
1311		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313		    E1000_RAR_ENTRIES - 1);
1314	}
1315
1316	/* Initialize the hardware */
1317	em_reset(adapter);
1318	em_update_link_status(adapter);
1319
1320	/* Setup VLAN support, basic and offload if available */
1321	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323	/* Set hardware offload abilities */
1324	ifp->if_hwassist = 0;
1325	if (ifp->if_capenable & IFCAP_TXCSUM)
1326		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327	if (ifp->if_capenable & IFCAP_TSO4)
1328		ifp->if_hwassist |= CSUM_TSO;
1329
1330	/* Configure for OS presence */
1331	em_init_manageability(adapter);
1332
1333	/* Prepare transmit descriptors and buffers */
1334	em_setup_transmit_structures(adapter);
1335	em_initialize_transmit_unit(adapter);
1336
1337	/* Setup Multicast table */
1338	em_set_multi(adapter);
1339
1340	/*
1341	** Figure out the desired mbuf
1342	** pool for doing jumbos
1343	*/
1344	if (adapter->hw.mac.max_frame_size <= 2048)
1345		adapter->rx_mbuf_sz = MCLBYTES;
1346	else if (adapter->hw.mac.max_frame_size <= 4096)
1347		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348	else
1349		adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351	/* Prepare receive descriptors and buffers */
1352	if (em_setup_receive_structures(adapter)) {
1353		device_printf(dev, "Could not setup receive structures\n");
1354		em_stop(adapter);
1355		return;
1356	}
1357	em_initialize_receive_unit(adapter);
1358
1359	/* Use real VLAN Filter support? */
1360	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362			/* Use real VLAN Filter support */
1363			em_setup_vlan_hw_support(adapter);
1364		else {
1365			u32 ctrl;
1366			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367			ctrl |= E1000_CTRL_VME;
1368			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369		}
1370	}
1371
1372	/* Don't lose promiscuous settings */
1373	em_set_promisc(adapter);
1374
1375	/* Set the interface as ACTIVE */
1376	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382	/* MSI/X configuration for 82574 */
1383	if (adapter->hw.mac.type == e1000_82574) {
1384		int tmp;
1385		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388		/* Set the IVAR - interrupt vector routing. */
1389		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390	}
1391
1392#ifdef DEVICE_POLLING
1393	/*
1394	 * Only enable interrupts if we are not polling, make sure
1395	 * they are off otherwise.
1396	 */
1397	if (ifp->if_capenable & IFCAP_POLLING)
1398		em_disable_intr(adapter);
1399	else
1400#endif /* DEVICE_POLLING */
1401		em_enable_intr(adapter);
1402
1403	/* AMT based hardware can now take control from firmware */
1404	if (adapter->has_manage && adapter->has_amt)
1405		em_get_hw_control(adapter);
1406}
1407
1408static void
1409em_init(void *arg)
1410{
1411	struct adapter *adapter = arg;
1412
1413	EM_CORE_LOCK(adapter);
1414	em_init_locked(adapter);
1415	EM_CORE_UNLOCK(adapter);
1416}
1417
1418
1419#ifdef DEVICE_POLLING
1420/*********************************************************************
1421 *
1422 *  Legacy polling routine: note this only works with single queue
1423 *
1424 *********************************************************************/
1425static int
1426em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427{
1428	struct adapter *adapter = ifp->if_softc;
1429	struct tx_ring	*txr = adapter->tx_rings;
1430	struct rx_ring	*rxr = adapter->rx_rings;
1431	u32		reg_icr;
1432	int		rx_done;
1433
1434	EM_CORE_LOCK(adapter);
1435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436		EM_CORE_UNLOCK(adapter);
1437		return (0);
1438	}
1439
1440	if (cmd == POLL_AND_CHECK_STATUS) {
1441		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443			callout_stop(&adapter->timer);
1444			adapter->hw.mac.get_link_status = 1;
1445			em_update_link_status(adapter);
1446			callout_reset(&adapter->timer, hz,
1447			    em_local_timer, adapter);
1448		}
1449	}
1450	EM_CORE_UNLOCK(adapter);
1451
1452	em_rxeof(rxr, count, &rx_done);
1453
1454	EM_TX_LOCK(txr);
1455	em_txeof(txr);
1456#ifdef EM_MULTIQUEUE
1457	if (!drbr_empty(ifp, txr->br))
1458		em_mq_start_locked(ifp, txr, NULL);
1459#else
1460	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461		em_start_locked(ifp, txr);
1462#endif
1463	EM_TX_UNLOCK(txr);
1464
1465	return (rx_done);
1466}
1467#endif /* DEVICE_POLLING */
1468
1469
1470/*********************************************************************
1471 *
1472 *  Fast Legacy/MSI Combined Interrupt Service routine
1473 *
1474 *********************************************************************/
1475static int
1476em_irq_fast(void *arg)
1477{
1478	struct adapter	*adapter = arg;
1479	struct ifnet	*ifp;
1480	u32		reg_icr;
1481
1482	ifp = adapter->ifp;
1483
1484	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486	/* Hot eject?  */
1487	if (reg_icr == 0xffffffff)
1488		return FILTER_STRAY;
1489
1490	/* Definitely not our interrupt.  */
1491	if (reg_icr == 0x0)
1492		return FILTER_STRAY;
1493
1494	/*
1495	 * Starting with the 82571 chip, bit 31 should be used to
1496	 * determine whether the interrupt belongs to us.
1497	 */
1498	if (adapter->hw.mac.type >= e1000_82571 &&
1499	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500		return FILTER_STRAY;
1501
1502	em_disable_intr(adapter);
1503	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505	/* Link status change */
1506	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507		adapter->hw.mac.get_link_status = 1;
1508		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509	}
1510
1511	if (reg_icr & E1000_ICR_RXO)
1512		adapter->rx_overruns++;
1513	return FILTER_HANDLED;
1514}
1515
1516/* Combined RX/TX handler, used by Legacy and MSI */
1517static void
1518em_handle_que(void *context, int pending)
1519{
1520	struct adapter	*adapter = context;
1521	struct ifnet	*ifp = adapter->ifp;
1522	struct tx_ring	*txr = adapter->tx_rings;
1523	struct rx_ring	*rxr = adapter->rx_rings;
1524
1525
1526	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528		EM_TX_LOCK(txr);
1529		em_txeof(txr);
1530#ifdef EM_MULTIQUEUE
1531		if (!drbr_empty(ifp, txr->br))
1532			em_mq_start_locked(ifp, txr, NULL);
1533#else
1534		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535			em_start_locked(ifp, txr);
1536#endif
1537		EM_TX_UNLOCK(txr);
1538		if (more) {
1539			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540			return;
1541		}
1542	}
1543
1544	em_enable_intr(adapter);
1545	return;
1546}
1547
1548
1549/*********************************************************************
1550 *
1551 *  MSIX Interrupt Service Routines
1552 *
1553 **********************************************************************/
1554static void
1555em_msix_tx(void *arg)
1556{
1557	struct tx_ring *txr = arg;
1558	struct adapter *adapter = txr->adapter;
1559	struct ifnet	*ifp = adapter->ifp;
1560
1561	++txr->tx_irq;
1562	EM_TX_LOCK(txr);
1563	em_txeof(txr);
1564#ifdef EM_MULTIQUEUE
1565	if (!drbr_empty(ifp, txr->br))
1566		em_mq_start_locked(ifp, txr, NULL);
1567#else
1568	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569		em_start_locked(ifp, txr);
1570#endif
1571	/* Reenable this interrupt */
1572	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573	EM_TX_UNLOCK(txr);
1574	return;
1575}
1576
1577/*********************************************************************
1578 *
1579 *  MSIX RX Interrupt Service routine
1580 *
1581 **********************************************************************/
1582
1583static void
1584em_msix_rx(void *arg)
1585{
1586	struct rx_ring	*rxr = arg;
1587	struct adapter	*adapter = rxr->adapter;
1588	bool		more;
1589
1590	++rxr->rx_irq;
1591	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592		return;
1593	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594	if (more)
1595		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596	else
1597		/* Reenable this interrupt */
1598		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599	return;
1600}
1601
1602/*********************************************************************
1603 *
1604 *  MSIX Link Fast Interrupt Service routine
1605 *
1606 **********************************************************************/
1607static void
1608em_msix_link(void *arg)
1609{
1610	struct adapter	*adapter = arg;
1611	u32		reg_icr;
1612
1613	++adapter->link_irq;
1614	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617		adapter->hw.mac.get_link_status = 1;
1618		em_handle_link(adapter, 0);
1619	} else
1620		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621		    EM_MSIX_LINK | E1000_IMS_LSC);
1622	return;
1623}
1624
1625static void
1626em_handle_rx(void *context, int pending)
1627{
1628	struct rx_ring	*rxr = context;
1629	struct adapter	*adapter = rxr->adapter;
1630        bool            more;
1631
1632	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633	if (more)
1634		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635	else
1636		/* Reenable this interrupt */
1637		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638}
1639
1640static void
1641em_handle_tx(void *context, int pending)
1642{
1643	struct tx_ring	*txr = context;
1644	struct adapter	*adapter = txr->adapter;
1645	struct ifnet	*ifp = adapter->ifp;
1646
1647	EM_TX_LOCK(txr);
1648	em_txeof(txr);
1649#ifdef EM_MULTIQUEUE
1650	if (!drbr_empty(ifp, txr->br))
1651		em_mq_start_locked(ifp, txr, NULL);
1652#else
1653	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654		em_start_locked(ifp, txr);
1655#endif
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657	EM_TX_UNLOCK(txr);
1658}
1659
1660static void
1661em_handle_link(void *context, int pending)
1662{
1663	struct adapter	*adapter = context;
1664	struct tx_ring	*txr = adapter->tx_rings;
1665	struct ifnet *ifp = adapter->ifp;
1666
1667	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668		return;
1669
1670	EM_CORE_LOCK(adapter);
1671	callout_stop(&adapter->timer);
1672	em_update_link_status(adapter);
1673	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675	    EM_MSIX_LINK | E1000_IMS_LSC);
1676	if (adapter->link_active) {
1677		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678			EM_TX_LOCK(txr);
1679#ifdef EM_MULTIQUEUE
1680			if (!drbr_empty(ifp, txr->br))
1681				em_mq_start_locked(ifp, txr, NULL);
1682#else
1683			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684				em_start_locked(ifp, txr);
1685#endif
1686			EM_TX_UNLOCK(txr);
1687		}
1688	}
1689	EM_CORE_UNLOCK(adapter);
1690}
1691
1692
1693/*********************************************************************
1694 *
1695 *  Media Ioctl callback
1696 *
1697 *  This routine is called whenever the user queries the status of
1698 *  the interface using ifconfig.
1699 *
1700 **********************************************************************/
1701static void
1702em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703{
1704	struct adapter *adapter = ifp->if_softc;
1705	u_char fiber_type = IFM_1000_SX;
1706
1707	INIT_DEBUGOUT("em_media_status: begin");
1708
1709	EM_CORE_LOCK(adapter);
1710	em_update_link_status(adapter);
1711
1712	ifmr->ifm_status = IFM_AVALID;
1713	ifmr->ifm_active = IFM_ETHER;
1714
1715	if (!adapter->link_active) {
1716		EM_CORE_UNLOCK(adapter);
1717		return;
1718	}
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725	} else {
1726		switch (adapter->link_speed) {
1727		case 10:
1728			ifmr->ifm_active |= IFM_10_T;
1729			break;
1730		case 100:
1731			ifmr->ifm_active |= IFM_100_TX;
1732			break;
1733		case 1000:
1734			ifmr->ifm_active |= IFM_1000_T;
1735			break;
1736		}
1737		if (adapter->link_duplex == FULL_DUPLEX)
1738			ifmr->ifm_active |= IFM_FDX;
1739		else
1740			ifmr->ifm_active |= IFM_HDX;
1741	}
1742	EM_CORE_UNLOCK(adapter);
1743}
1744
1745/*********************************************************************
1746 *
1747 *  Media Ioctl callback
1748 *
1749 *  This routine is called when the user changes speed/duplex using
1750 *  media/mediopt option with ifconfig.
1751 *
1752 **********************************************************************/
1753static int
1754em_media_change(struct ifnet *ifp)
1755{
1756	struct adapter *adapter = ifp->if_softc;
1757	struct ifmedia  *ifm = &adapter->media;
1758
1759	INIT_DEBUGOUT("em_media_change: begin");
1760
1761	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762		return (EINVAL);
1763
1764	EM_CORE_LOCK(adapter);
1765	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766	case IFM_AUTO:
1767		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769		break;
1770	case IFM_1000_LX:
1771	case IFM_1000_SX:
1772	case IFM_1000_T:
1773		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775		break;
1776	case IFM_100_TX:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783		break;
1784	case IFM_10_T:
1785		adapter->hw.mac.autoneg = FALSE;
1786		adapter->hw.phy.autoneg_advertised = 0;
1787		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789		else
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791		break;
1792	default:
1793		device_printf(adapter->dev, "Unsupported media type\n");
1794	}
1795
1796	em_init_locked(adapter);
1797	EM_CORE_UNLOCK(adapter);
1798
1799	return (0);
1800}
1801
1802/*********************************************************************
1803 *
1804 *  This routine maps the mbufs to tx descriptors.
1805 *
1806 *  return 0 on success, positive on failure
1807 **********************************************************************/
1808
1809static int
1810em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811{
1812	struct adapter		*adapter = txr->adapter;
1813	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814	bus_dmamap_t		map;
1815	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816	struct e1000_tx_desc	*ctxd = NULL;
1817	struct mbuf		*m_head;
1818	struct ether_header	*eh;
1819	struct ip		*ip = NULL;
1820	struct tcphdr		*tp = NULL;
1821	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822	int			ip_off, poff;
1823	int			nsegs, i, j, first, last = 0;
1824	int			error, do_tso, tso_desc = 0, remap = 1;
1825
1826retry:
1827	m_head = *m_headp;
1828	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830	ip_off = poff = 0;
1831
1832	/*
1833	 * Intel recommends entire IP/TCP header length reside in a single
1834	 * buffer. If multiple descriptors are used to describe the IP and
1835	 * TCP header, each descriptor should describe one or more
1836	 * complete headers; descriptors referencing only parts of headers
1837	 * are not supported. If all layer headers are not coalesced into
1838	 * a single buffer, each buffer should not cross a 4KB boundary,
1839	 * or be larger than the maximum read request size.
1840	 * Controller also requires modifing IP/TCP header to make TSO work
1841	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842	 * IP/TCP header into a single buffer to meet the requirement of
1843	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844	 * which also has similiar restrictions.
1845	 */
1846	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847		if (do_tso || (m_head->m_next != NULL &&
1848		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849			if (M_WRITABLE(*m_headp) == 0) {
1850				m_head = m_dup(*m_headp, M_NOWAIT);
1851				m_freem(*m_headp);
1852				if (m_head == NULL) {
1853					*m_headp = NULL;
1854					return (ENOBUFS);
1855				}
1856				*m_headp = m_head;
1857			}
1858		}
1859		/*
1860		 * XXX
1861		 * Assume IPv4, we don't have TSO/checksum offload support
1862		 * for IPv6 yet.
1863		 */
1864		ip_off = sizeof(struct ether_header);
1865		m_head = m_pullup(m_head, ip_off);
1866		if (m_head == NULL) {
1867			*m_headp = NULL;
1868			return (ENOBUFS);
1869		}
1870		eh = mtod(m_head, struct ether_header *);
1871		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872			ip_off = sizeof(struct ether_vlan_header);
1873			m_head = m_pullup(m_head, ip_off);
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878		}
1879		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880		if (m_head == NULL) {
1881			*m_headp = NULL;
1882			return (ENOBUFS);
1883		}
1884		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885		poff = ip_off + (ip->ip_hl << 2);
1886		if (do_tso) {
1887			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888			if (m_head == NULL) {
1889				*m_headp = NULL;
1890				return (ENOBUFS);
1891			}
1892			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			/*
1894			 * TSO workaround:
1895			 *   pull 4 more bytes of data into it.
1896			 */
1897			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903			ip->ip_len = 0;
1904			ip->ip_sum = 0;
1905			/*
1906			 * The pseudo TCP checksum does not include TCP payload
1907			 * length so driver should recompute the checksum here
1908			 * what hardware expect to see. This is adherence of
1909			 * Microsoft's Large Send specification.
1910			 */
1911			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930			if (m_head == NULL) {
1931				*m_headp = NULL;
1932				return (ENOBUFS);
1933			}
1934			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935		}
1936		*m_headp = m_head;
1937	}
1938
1939	/*
1940	 * Map the packet for DMA
1941	 *
1942	 * Capture the first descriptor index,
1943	 * this descriptor will have the index
1944	 * of the EOP which is the only one that
1945	 * now gets a DONE bit writeback.
1946	 */
1947	first = txr->next_avail_desc;
1948	tx_buffer = &txr->tx_buffers[first];
1949	tx_buffer_mapped = tx_buffer;
1950	map = tx_buffer->map;
1951
1952	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955	/*
1956	 * There are two types of errors we can (try) to handle:
1957	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958	 *   out of segments.  Defragment the mbuf chain and try again.
1959	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960	 *   at this point in time.  Defer sending and try again later.
1961	 * All other errors, in particular EINVAL, are fatal and prevent the
1962	 * mbuf chain from ever going through.  Drop it and report error.
1963	 */
1964	if (error == EFBIG && remap) {
1965		struct mbuf *m;
1966
1967		m = m_defrag(*m_headp, M_NOWAIT);
1968		if (m == NULL) {
1969			adapter->mbuf_alloc_failed++;
1970			m_freem(*m_headp);
1971			*m_headp = NULL;
1972			return (ENOBUFS);
1973		}
1974		*m_headp = m;
1975
1976		/* Try it again, but only once */
1977		remap = 0;
1978		goto retry;
1979	} else if (error == ENOMEM) {
1980		adapter->no_tx_dma_setup++;
1981		return (error);
1982	} else if (error != 0) {
1983		adapter->no_tx_dma_setup++;
1984		m_freem(*m_headp);
1985		*m_headp = NULL;
1986		return (error);
1987	}
1988
1989	/*
1990	 * TSO Hardware workaround, if this packet is not
1991	 * TSO, and is only a single descriptor long, and
1992	 * it follows a TSO burst, then we need to add a
1993	 * sentinel descriptor to prevent premature writeback.
1994	 */
1995	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996		if (nsegs == 1)
1997			tso_desc = TRUE;
1998		txr->tx_tso = FALSE;
1999	}
2000
2001        if (nsegs > (txr->tx_avail - 2)) {
2002                txr->no_desc_avail++;
2003		bus_dmamap_unload(txr->txtag, map);
2004		return (ENOBUFS);
2005        }
2006	m_head = *m_headp;
2007
2008	/* Do hardware assists */
2009	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011		    &txd_upper, &txd_lower);
2012		/* we need to make a final sentinel transmit desc */
2013		tso_desc = TRUE;
2014	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015		em_transmit_checksum_setup(txr, m_head,
2016		    ip_off, ip, &txd_upper, &txd_lower);
2017
2018	if (m_head->m_flags & M_VLANTAG) {
2019		/* Set the vlan id. */
2020		txd_upper |=
2021		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                /* Tell hardware to add tag */
2023                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024        }
2025
2026	i = txr->next_avail_desc;
2027
2028	/* Set up our transmit descriptors */
2029	for (j = 0; j < nsegs; j++) {
2030		bus_size_t seg_len;
2031		bus_addr_t seg_addr;
2032
2033		tx_buffer = &txr->tx_buffers[i];
2034		ctxd = &txr->tx_base[i];
2035		seg_addr = segs[j].ds_addr;
2036		seg_len  = segs[j].ds_len;
2037		/*
2038		** TSO Workaround:
2039		** If this is the last descriptor, we want to
2040		** split it so we have a small final sentinel
2041		*/
2042		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043			seg_len -= 4;
2044			ctxd->buffer_addr = htole64(seg_addr);
2045			ctxd->lower.data = htole32(
2046			adapter->txd_cmd | txd_lower | seg_len);
2047			ctxd->upper.data =
2048			    htole32(txd_upper);
2049			if (++i == adapter->num_tx_desc)
2050				i = 0;
2051			/* Now make the sentinel */
2052			++txd_used; /* using an extra txd */
2053			ctxd = &txr->tx_base[i];
2054			tx_buffer = &txr->tx_buffers[i];
2055			ctxd->buffer_addr =
2056			    htole64(seg_addr + seg_len);
2057			ctxd->lower.data = htole32(
2058			adapter->txd_cmd | txd_lower | 4);
2059			ctxd->upper.data =
2060			    htole32(txd_upper);
2061			last = i;
2062			if (++i == adapter->num_tx_desc)
2063				i = 0;
2064		} else {
2065			ctxd->buffer_addr = htole64(seg_addr);
2066			ctxd->lower.data = htole32(
2067			adapter->txd_cmd | txd_lower | seg_len);
2068			ctxd->upper.data =
2069			    htole32(txd_upper);
2070			last = i;
2071			if (++i == adapter->num_tx_desc)
2072				i = 0;
2073		}
2074		tx_buffer->m_head = NULL;
2075		tx_buffer->next_eop = -1;
2076	}
2077
2078	txr->next_avail_desc = i;
2079	txr->tx_avail -= nsegs;
2080	if (tso_desc) /* TSO used an extra for sentinel */
2081		txr->tx_avail -= txd_used;
2082
2083        tx_buffer->m_head = m_head;
2084	/*
2085	** Here we swap the map so the last descriptor,
2086	** which gets the completion interrupt has the
2087	** real map, and the first descriptor gets the
2088	** unused map from this descriptor.
2089	*/
2090	tx_buffer_mapped->map = tx_buffer->map;
2091	tx_buffer->map = map;
2092        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094        /*
2095         * Last Descriptor of Packet
2096	 * needs End Of Packet (EOP)
2097	 * and Report Status (RS)
2098         */
2099        ctxd->lower.data |=
2100	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101	/*
2102	 * Keep track in the first buffer which
2103	 * descriptor will be written back
2104	 */
2105	tx_buffer = &txr->tx_buffers[first];
2106	tx_buffer->next_eop = last;
2107	/* Update the watchdog time early and often */
2108	txr->watchdog_time = ticks;
2109
2110	/*
2111	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112	 * that this frame is available to transmit.
2113	 */
2114	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118	return (0);
2119}
2120
2121static void
2122em_set_promisc(struct adapter *adapter)
2123{
2124	struct ifnet	*ifp = adapter->ifp;
2125	u32		reg_rctl;
2126
2127	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129	if (ifp->if_flags & IFF_PROMISC) {
2130		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131		/* Turn this on if you want to see bad packets */
2132		if (em_debug_sbp)
2133			reg_rctl |= E1000_RCTL_SBP;
2134		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136		reg_rctl |= E1000_RCTL_MPE;
2137		reg_rctl &= ~E1000_RCTL_UPE;
2138		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139	}
2140}
2141
2142static void
2143em_disable_promisc(struct adapter *adapter)
2144{
2145	struct ifnet	*ifp = adapter->ifp;
2146	u32		reg_rctl;
2147	int		mcnt = 0;
2148
2149	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150	reg_rctl &=  (~E1000_RCTL_UPE);
2151	if (ifp->if_flags & IFF_ALLMULTI)
2152		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153	else {
2154		struct  ifmultiaddr *ifma;
2155#if __FreeBSD_version < 800000
2156		IF_ADDR_LOCK(ifp);
2157#else
2158		if_maddr_rlock(ifp);
2159#endif
2160		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161			if (ifma->ifma_addr->sa_family != AF_LINK)
2162				continue;
2163			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164				break;
2165			mcnt++;
2166		}
2167#if __FreeBSD_version < 800000
2168		IF_ADDR_UNLOCK(ifp);
2169#else
2170		if_maddr_runlock(ifp);
2171#endif
2172	}
2173	/* Don't disable if in MAX groups */
2174	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175		reg_rctl &=  (~E1000_RCTL_MPE);
2176	reg_rctl &=  (~E1000_RCTL_SBP);
2177	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178}
2179
2180
2181/*********************************************************************
2182 *  Multicast Update
2183 *
2184 *  This routine is called whenever multicast address list is updated.
2185 *
2186 **********************************************************************/
2187
2188static void
2189em_set_multi(struct adapter *adapter)
2190{
2191	struct ifnet	*ifp = adapter->ifp;
2192	struct ifmultiaddr *ifma;
2193	u32 reg_rctl = 0;
2194	u8  *mta; /* Multicast array memory */
2195	int mcnt = 0;
2196
2197	IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199	mta = adapter->mta;
2200	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202	if (adapter->hw.mac.type == e1000_82542 &&
2203	    adapter->hw.revision_id == E1000_REVISION_2) {
2204		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206			e1000_pci_clear_mwi(&adapter->hw);
2207		reg_rctl |= E1000_RCTL_RST;
2208		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209		msec_delay(5);
2210	}
2211
2212#if __FreeBSD_version < 800000
2213	IF_ADDR_LOCK(ifp);
2214#else
2215	if_maddr_rlock(ifp);
2216#endif
2217	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218		if (ifma->ifma_addr->sa_family != AF_LINK)
2219			continue;
2220
2221		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222			break;
2223
2224		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226		mcnt++;
2227	}
2228#if __FreeBSD_version < 800000
2229	IF_ADDR_UNLOCK(ifp);
2230#else
2231	if_maddr_runlock(ifp);
2232#endif
2233	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235		reg_rctl |= E1000_RCTL_MPE;
2236		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237	} else
2238		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240	if (adapter->hw.mac.type == e1000_82542 &&
2241	    adapter->hw.revision_id == E1000_REVISION_2) {
2242		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243		reg_rctl &= ~E1000_RCTL_RST;
2244		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245		msec_delay(5);
2246		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247			e1000_pci_set_mwi(&adapter->hw);
2248	}
2249}
2250
2251
2252/*********************************************************************
2253 *  Timer routine
2254 *
2255 *  This routine checks for link status and updates statistics.
2256 *
2257 **********************************************************************/
2258
2259static void
2260em_local_timer(void *arg)
2261{
2262	struct adapter	*adapter = arg;
2263	struct ifnet	*ifp = adapter->ifp;
2264	struct tx_ring	*txr = adapter->tx_rings;
2265	struct rx_ring	*rxr = adapter->rx_rings;
2266	u32		trigger;
2267
2268	EM_CORE_LOCK_ASSERT(adapter);
2269
2270	em_update_link_status(adapter);
2271	em_update_stats_counters(adapter);
2272
2273	/* Reset LAA into RAR[0] on 82571 */
2274	if ((adapter->hw.mac.type == e1000_82571) &&
2275	    e1000_get_laa_state_82571(&adapter->hw))
2276		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278	/* Mask to use in the irq trigger */
2279	if (adapter->msix_mem)
2280		trigger = rxr->ims; /* RX for 82574 */
2281	else
2282		trigger = E1000_ICS_RXDMT0;
2283
2284	/*
2285	** Check on the state of the TX queue(s), this
2286	** can be done without the lock because its RO
2287	** and the HUNG state will be static if set.
2288	*/
2289	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291		    (adapter->pause_frames == 0))
2292			goto hung;
2293		/* Schedule a TX tasklet if needed */
2294		if (txr->tx_avail <= EM_MAX_SCATTER)
2295			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296	}
2297
2298	adapter->pause_frames = 0;
2299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300#ifndef DEVICE_POLLING
2301	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303#endif
2304	return;
2305hung:
2306	/* Looks like we're hung */
2307	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308	device_printf(adapter->dev,
2309	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313	    "Next TX to Clean = %d\n",
2314	    txr->me, txr->tx_avail, txr->next_to_clean);
2315	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316	adapter->watchdog_events++;
2317	adapter->pause_frames = 0;
2318	em_init_locked(adapter);
2319}
2320
2321
2322static void
2323em_update_link_status(struct adapter *adapter)
2324{
2325	struct e1000_hw *hw = &adapter->hw;
2326	struct ifnet *ifp = adapter->ifp;
2327	device_t dev = adapter->dev;
2328	struct tx_ring *txr = adapter->tx_rings;
2329	u32 link_check = 0;
2330
2331	/* Get the cached link value or read phy for real */
2332	switch (hw->phy.media_type) {
2333	case e1000_media_type_copper:
2334		if (hw->mac.get_link_status) {
2335			/* Do the work to read phy */
2336			e1000_check_for_link(hw);
2337			link_check = !hw->mac.get_link_status;
2338			if (link_check) /* ESB2 fix */
2339				e1000_cfg_on_link_up(hw);
2340		} else
2341			link_check = TRUE;
2342		break;
2343	case e1000_media_type_fiber:
2344		e1000_check_for_link(hw);
2345		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                 E1000_STATUS_LU);
2347		break;
2348	case e1000_media_type_internal_serdes:
2349		e1000_check_for_link(hw);
2350		link_check = adapter->hw.mac.serdes_has_link;
2351		break;
2352	default:
2353	case e1000_media_type_unknown:
2354		break;
2355	}
2356
2357	/* Now check for a transition */
2358	if (link_check && (adapter->link_active == 0)) {
2359		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360		    &adapter->link_duplex);
2361		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362		if ((adapter->link_speed != SPEED_1000) &&
2363		    ((hw->mac.type == e1000_82571) ||
2364		    (hw->mac.type == e1000_82572))) {
2365			int tarc0;
2366			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367			tarc0 &= ~SPEED_MODE_BIT;
2368			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369		}
2370		if (bootverbose)
2371			device_printf(dev, "Link is up %d Mbps %s\n",
2372			    adapter->link_speed,
2373			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374			    "Full Duplex" : "Half Duplex"));
2375		adapter->link_active = 1;
2376		adapter->smartspeed = 0;
2377		ifp->if_baudrate = adapter->link_speed * 1000000;
2378		if_link_state_change(ifp, LINK_STATE_UP);
2379	} else if (!link_check && (adapter->link_active == 1)) {
2380		ifp->if_baudrate = adapter->link_speed = 0;
2381		adapter->link_duplex = 0;
2382		if (bootverbose)
2383			device_printf(dev, "Link is Down\n");
2384		adapter->link_active = 0;
2385		/* Link down, disable watchdog */
2386		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387			txr->queue_status = EM_QUEUE_IDLE;
2388		if_link_state_change(ifp, LINK_STATE_DOWN);
2389	}
2390}
2391
2392/*********************************************************************
2393 *
2394 *  This routine disables all traffic on the adapter by issuing a
2395 *  global reset on the MAC and deallocates TX/RX buffers.
2396 *
2397 *  This routine should always be called with BOTH the CORE
2398 *  and TX locks.
2399 **********************************************************************/
2400
2401static void
2402em_stop(void *arg)
2403{
2404	struct adapter	*adapter = arg;
2405	struct ifnet	*ifp = adapter->ifp;
2406	struct tx_ring	*txr = adapter->tx_rings;
2407
2408	EM_CORE_LOCK_ASSERT(adapter);
2409
2410	INIT_DEBUGOUT("em_stop: begin");
2411
2412	em_disable_intr(adapter);
2413	callout_stop(&adapter->timer);
2414
2415	/* Tell the stack that the interface is no longer active */
2416	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419        /* Unarm watchdog timer. */
2420	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421		EM_TX_LOCK(txr);
2422		txr->queue_status = EM_QUEUE_IDLE;
2423		EM_TX_UNLOCK(txr);
2424	}
2425
2426	e1000_reset_hw(&adapter->hw);
2427	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429	e1000_led_off(&adapter->hw);
2430	e1000_cleanup_led(&adapter->hw);
2431}
2432
2433
2434/*********************************************************************
2435 *
2436 *  Determine hardware revision.
2437 *
2438 **********************************************************************/
2439static void
2440em_identify_hardware(struct adapter *adapter)
2441{
2442	device_t dev = adapter->dev;
2443
2444	/* Make sure our PCI config space has the necessary stuff set */
2445	pci_enable_busmaster(dev);
2446	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2447
2448	/* Save off the information about this board */
2449	adapter->hw.vendor_id = pci_get_vendor(dev);
2450	adapter->hw.device_id = pci_get_device(dev);
2451	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2452	adapter->hw.subsystem_vendor_id =
2453	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2454	adapter->hw.subsystem_device_id =
2455	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2456
2457	/* Do Shared Code Init and Setup */
2458	if (e1000_set_mac_type(&adapter->hw)) {
2459		device_printf(dev, "Setup init failure\n");
2460		return;
2461	}
2462}
2463
2464static int
2465em_allocate_pci_resources(struct adapter *adapter)
2466{
2467	device_t	dev = adapter->dev;
2468	int		rid;
2469
2470	rid = PCIR_BAR(0);
2471	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2472	    &rid, RF_ACTIVE);
2473	if (adapter->memory == NULL) {
2474		device_printf(dev, "Unable to allocate bus resource: memory\n");
2475		return (ENXIO);
2476	}
2477	adapter->osdep.mem_bus_space_tag =
2478	    rman_get_bustag(adapter->memory);
2479	adapter->osdep.mem_bus_space_handle =
2480	    rman_get_bushandle(adapter->memory);
2481	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2482
2483	/* Default to a single queue */
2484	adapter->num_queues = 1;
2485
2486	/*
2487	 * Setup MSI/X or MSI if PCI Express
2488	 */
2489	adapter->msix = em_setup_msix(adapter);
2490
2491	adapter->hw.back = &adapter->osdep;
2492
2493	return (0);
2494}
2495
2496/*********************************************************************
2497 *
2498 *  Setup the Legacy or MSI Interrupt handler
2499 *
2500 **********************************************************************/
2501int
2502em_allocate_legacy(struct adapter *adapter)
2503{
2504	device_t dev = adapter->dev;
2505	struct tx_ring	*txr = adapter->tx_rings;
2506	int error, rid = 0;
2507
2508	/* Manually turn off all interrupts */
2509	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2510
2511	if (adapter->msix == 1) /* using MSI */
2512		rid = 1;
2513	/* We allocate a single interrupt resource */
2514	adapter->res = bus_alloc_resource_any(dev,
2515	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2516	if (adapter->res == NULL) {
2517		device_printf(dev, "Unable to allocate bus resource: "
2518		    "interrupt\n");
2519		return (ENXIO);
2520	}
2521
2522	/*
2523	 * Allocate a fast interrupt and the associated
2524	 * deferred processing contexts.
2525	 */
2526	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2527	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2528	    taskqueue_thread_enqueue, &adapter->tq);
2529	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2530	    device_get_nameunit(adapter->dev));
2531	/* Use a TX only tasklet for local timer */
2532	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2533	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2534	    taskqueue_thread_enqueue, &txr->tq);
2535	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2536	    device_get_nameunit(adapter->dev));
2537	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2538	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2539	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2540		device_printf(dev, "Failed to register fast interrupt "
2541			    "handler: %d\n", error);
2542		taskqueue_free(adapter->tq);
2543		adapter->tq = NULL;
2544		return (error);
2545	}
2546
2547	return (0);
2548}
2549
2550/*********************************************************************
2551 *
2552 *  Setup the MSIX Interrupt handlers
2553 *   This is not really Multiqueue, rather
2554 *   its just seperate interrupt vectors
2555 *   for TX, RX, and Link.
2556 *
2557 **********************************************************************/
2558int
2559em_allocate_msix(struct adapter *adapter)
2560{
2561	device_t	dev = adapter->dev;
2562	struct		tx_ring *txr = adapter->tx_rings;
2563	struct		rx_ring *rxr = adapter->rx_rings;
2564	int		error, rid, vector = 0;
2565
2566
2567	/* Make sure all interrupts are disabled */
2568	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2569
2570	/* First set up ring resources */
2571	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2572
2573		/* RX ring */
2574		rid = vector + 1;
2575
2576		rxr->res = bus_alloc_resource_any(dev,
2577		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2578		if (rxr->res == NULL) {
2579			device_printf(dev,
2580			    "Unable to allocate bus resource: "
2581			    "RX MSIX Interrupt %d\n", i);
2582			return (ENXIO);
2583		}
2584		if ((error = bus_setup_intr(dev, rxr->res,
2585		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2586		    rxr, &rxr->tag)) != 0) {
2587			device_printf(dev, "Failed to register RX handler");
2588			return (error);
2589		}
2590#if __FreeBSD_version >= 800504
2591		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2592#endif
2593		rxr->msix = vector++; /* NOTE increment vector for TX */
2594		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2595		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2596		    taskqueue_thread_enqueue, &rxr->tq);
2597		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2598		    device_get_nameunit(adapter->dev));
2599		/*
2600		** Set the bit to enable interrupt
2601		** in E1000_IMS -- bits 20 and 21
2602		** are for RX0 and RX1, note this has
2603		** NOTHING to do with the MSIX vector
2604		*/
2605		rxr->ims = 1 << (20 + i);
2606		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2607
2608		/* TX ring */
2609		rid = vector + 1;
2610		txr->res = bus_alloc_resource_any(dev,
2611		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2612		if (txr->res == NULL) {
2613			device_printf(dev,
2614			    "Unable to allocate bus resource: "
2615			    "TX MSIX Interrupt %d\n", i);
2616			return (ENXIO);
2617		}
2618		if ((error = bus_setup_intr(dev, txr->res,
2619		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2620		    txr, &txr->tag)) != 0) {
2621			device_printf(dev, "Failed to register TX handler");
2622			return (error);
2623		}
2624#if __FreeBSD_version >= 800504
2625		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2626#endif
2627		txr->msix = vector++; /* Increment vector for next pass */
2628		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2629		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2630		    taskqueue_thread_enqueue, &txr->tq);
2631		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2632		    device_get_nameunit(adapter->dev));
2633		/*
2634		** Set the bit to enable interrupt
2635		** in E1000_IMS -- bits 22 and 23
2636		** are for TX0 and TX1, note this has
2637		** NOTHING to do with the MSIX vector
2638		*/
2639		txr->ims = 1 << (22 + i);
2640		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2641	}
2642
2643	/* Link interrupt */
2644	++rid;
2645	adapter->res = bus_alloc_resource_any(dev,
2646	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2647	if (!adapter->res) {
2648		device_printf(dev,"Unable to allocate "
2649		    "bus resource: Link interrupt [%d]\n", rid);
2650		return (ENXIO);
2651        }
2652	/* Set the link handler function */
2653	error = bus_setup_intr(dev, adapter->res,
2654	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2655	    em_msix_link, adapter, &adapter->tag);
2656	if (error) {
2657		adapter->res = NULL;
2658		device_printf(dev, "Failed to register LINK handler");
2659		return (error);
2660	}
2661#if __FreeBSD_version >= 800504
2662		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2663#endif
2664	adapter->linkvec = vector;
2665	adapter->ivars |=  (8 | vector) << 16;
2666	adapter->ivars |= 0x80000000;
2667
2668	return (0);
2669}
2670
2671
2672static void
2673em_free_pci_resources(struct adapter *adapter)
2674{
2675	device_t	dev = adapter->dev;
2676	struct tx_ring	*txr;
2677	struct rx_ring	*rxr;
2678	int		rid;
2679
2680
2681	/*
2682	** Release all the queue interrupt resources:
2683	*/
2684	for (int i = 0; i < adapter->num_queues; i++) {
2685		txr = &adapter->tx_rings[i];
2686		rxr = &adapter->rx_rings[i];
2687		/* an early abort? */
2688		if ((txr == NULL) || (rxr == NULL))
2689			break;
2690		rid = txr->msix +1;
2691		if (txr->tag != NULL) {
2692			bus_teardown_intr(dev, txr->res, txr->tag);
2693			txr->tag = NULL;
2694		}
2695		if (txr->res != NULL)
2696			bus_release_resource(dev, SYS_RES_IRQ,
2697			    rid, txr->res);
2698		rid = rxr->msix +1;
2699		if (rxr->tag != NULL) {
2700			bus_teardown_intr(dev, rxr->res, rxr->tag);
2701			rxr->tag = NULL;
2702		}
2703		if (rxr->res != NULL)
2704			bus_release_resource(dev, SYS_RES_IRQ,
2705			    rid, rxr->res);
2706	}
2707
2708        if (adapter->linkvec) /* we are doing MSIX */
2709                rid = adapter->linkvec + 1;
2710        else
2711                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2712
2713	if (adapter->tag != NULL) {
2714		bus_teardown_intr(dev, adapter->res, adapter->tag);
2715		adapter->tag = NULL;
2716	}
2717
2718	if (adapter->res != NULL)
2719		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2720
2721
2722	if (adapter->msix)
2723		pci_release_msi(dev);
2724
2725	if (adapter->msix_mem != NULL)
2726		bus_release_resource(dev, SYS_RES_MEMORY,
2727		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2728
2729	if (adapter->memory != NULL)
2730		bus_release_resource(dev, SYS_RES_MEMORY,
2731		    PCIR_BAR(0), adapter->memory);
2732
2733	if (adapter->flash != NULL)
2734		bus_release_resource(dev, SYS_RES_MEMORY,
2735		    EM_FLASH, adapter->flash);
2736}
2737
2738/*
2739 * Setup MSI or MSI/X
2740 */
2741static int
2742em_setup_msix(struct adapter *adapter)
2743{
2744	device_t dev = adapter->dev;
2745	int val;
2746
2747	/*
2748	** Setup MSI/X for Hartwell: tests have shown
2749	** use of two queues to be unstable, and to
2750	** provide no great gain anyway, so we simply
2751	** seperate the interrupts and use a single queue.
2752	*/
2753	if ((adapter->hw.mac.type == e1000_82574) &&
2754	    (em_enable_msix == TRUE)) {
2755		/* Map the MSIX BAR */
2756		int rid = PCIR_BAR(EM_MSIX_BAR);
2757		adapter->msix_mem = bus_alloc_resource_any(dev,
2758		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2759       		if (adapter->msix_mem == NULL) {
2760			/* May not be enabled */
2761               		device_printf(adapter->dev,
2762			    "Unable to map MSIX table \n");
2763			goto msi;
2764       		}
2765		val = pci_msix_count(dev);
2766		/* We only need/want 3 vectors */
2767		if (val >= 3)
2768			val = 3;
2769		else {
2770			bus_release_resource(dev, SYS_RES_MEMORY,
2771			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2772			adapter->msix_mem = NULL;
2773               		device_printf(adapter->dev,
2774			    "MSIX: incorrect vectors, using MSI\n");
2775			goto msi;
2776		}
2777
2778		if (pci_alloc_msix(dev, &val) == 0) {
2779			device_printf(adapter->dev,
2780			    "Using MSIX interrupts "
2781			    "with %d vectors\n", val);
2782			return (val);
2783		}
2784		/* Fall through to MSI */
2785	}
2786msi:
2787       	val = 1;
2788       	if (pci_alloc_msi(dev, &val) == 0) {
2789               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2790		return (val);
2791	}
2792	/* Should only happen due to manual configuration */
2793	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2794	return (0);
2795}
2796
2797
2798/*********************************************************************
2799 *
2800 *  Initialize the hardware to a configuration
2801 *  as specified by the adapter structure.
2802 *
2803 **********************************************************************/
2804static void
2805em_reset(struct adapter *adapter)
2806{
2807	device_t	dev = adapter->dev;
2808	struct ifnet	*ifp = adapter->ifp;
2809	struct e1000_hw	*hw = &adapter->hw;
2810	u16		rx_buffer_size;
2811	u32		pba;
2812
2813	INIT_DEBUGOUT("em_reset: begin");
2814
2815	/* Set up smart power down as default off on newer adapters. */
2816	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2817	    hw->mac.type == e1000_82572)) {
2818		u16 phy_tmp = 0;
2819
2820		/* Speed up time to link by disabling smart power down. */
2821		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2822		phy_tmp &= ~IGP02E1000_PM_SPD;
2823		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2824	}
2825
2826	/*
2827	 * Packet Buffer Allocation (PBA)
2828	 * Writing PBA sets the receive portion of the buffer
2829	 * the remainder is used for the transmit buffer.
2830	 */
2831	switch (hw->mac.type) {
2832	/* Total Packet Buffer on these is 48K */
2833	case e1000_82571:
2834	case e1000_82572:
2835	case e1000_80003es2lan:
2836			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2837		break;
2838	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2839			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2840		break;
2841	case e1000_82574:
2842	case e1000_82583:
2843			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2844		break;
2845	case e1000_ich8lan:
2846		pba = E1000_PBA_8K;
2847		break;
2848	case e1000_ich9lan:
2849	case e1000_ich10lan:
2850		/* Boost Receive side for jumbo frames */
2851		if (adapter->hw.mac.max_frame_size > 4096)
2852			pba = E1000_PBA_14K;
2853		else
2854			pba = E1000_PBA_10K;
2855		break;
2856	case e1000_pchlan:
2857	case e1000_pch2lan:
2858	case e1000_pch_lpt:
2859		pba = E1000_PBA_26K;
2860		break;
2861	default:
2862		if (adapter->hw.mac.max_frame_size > 8192)
2863			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2864		else
2865			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2866	}
2867	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2868
2869	/*
2870	 * These parameters control the automatic generation (Tx) and
2871	 * response (Rx) to Ethernet PAUSE frames.
2872	 * - High water mark should allow for at least two frames to be
2873	 *   received after sending an XOFF.
2874	 * - Low water mark works best when it is very near the high water mark.
2875	 *   This allows the receiver to restart by sending XON when it has
2876	 *   drained a bit. Here we use an arbitary value of 1500 which will
2877	 *   restart after one full frame is pulled from the buffer. There
2878	 *   could be several smaller frames in the buffer and if so they will
2879	 *   not trigger the XON until their total number reduces the buffer
2880	 *   by 1500.
2881	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2882	 */
2883	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2884	hw->fc.high_water = rx_buffer_size -
2885	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2886	hw->fc.low_water = hw->fc.high_water - 1500;
2887
2888	if (adapter->fc) /* locally set flow control value? */
2889		hw->fc.requested_mode = adapter->fc;
2890	else
2891		hw->fc.requested_mode = e1000_fc_full;
2892
2893	if (hw->mac.type == e1000_80003es2lan)
2894		hw->fc.pause_time = 0xFFFF;
2895	else
2896		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2897
2898	hw->fc.send_xon = TRUE;
2899
2900	/* Device specific overrides/settings */
2901	switch (hw->mac.type) {
2902	case e1000_pchlan:
2903		/* Workaround: no TX flow ctrl for PCH */
2904                hw->fc.requested_mode = e1000_fc_rx_pause;
2905		hw->fc.pause_time = 0xFFFF; /* override */
2906		if (ifp->if_mtu > ETHERMTU) {
2907			hw->fc.high_water = 0x3500;
2908			hw->fc.low_water = 0x1500;
2909		} else {
2910			hw->fc.high_water = 0x5000;
2911			hw->fc.low_water = 0x3000;
2912		}
2913		hw->fc.refresh_time = 0x1000;
2914		break;
2915	case e1000_pch2lan:
2916	case e1000_pch_lpt:
2917		hw->fc.high_water = 0x5C20;
2918		hw->fc.low_water = 0x5048;
2919		hw->fc.pause_time = 0x0650;
2920		hw->fc.refresh_time = 0x0400;
2921		/* Jumbos need adjusted PBA */
2922		if (ifp->if_mtu > ETHERMTU)
2923			E1000_WRITE_REG(hw, E1000_PBA, 12);
2924		else
2925			E1000_WRITE_REG(hw, E1000_PBA, 26);
2926		break;
2927        case e1000_ich9lan:
2928        case e1000_ich10lan:
2929		if (ifp->if_mtu > ETHERMTU) {
2930			hw->fc.high_water = 0x2800;
2931			hw->fc.low_water = hw->fc.high_water - 8;
2932			break;
2933		}
2934		/* else fall thru */
2935	default:
2936		if (hw->mac.type == e1000_80003es2lan)
2937			hw->fc.pause_time = 0xFFFF;
2938		break;
2939	}
2940
2941	/* Issue a global reset */
2942	e1000_reset_hw(hw);
2943	E1000_WRITE_REG(hw, E1000_WUC, 0);
2944	em_disable_aspm(adapter);
2945	/* and a re-init */
2946	if (e1000_init_hw(hw) < 0) {
2947		device_printf(dev, "Hardware Initialization Failed\n");
2948		return;
2949	}
2950
2951	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2952	e1000_get_phy_info(hw);
2953	e1000_check_for_link(hw);
2954	return;
2955}
2956
2957/*********************************************************************
2958 *
2959 *  Setup networking device structure and register an interface.
2960 *
2961 **********************************************************************/
2962static int
2963em_setup_interface(device_t dev, struct adapter *adapter)
2964{
2965	struct ifnet   *ifp;
2966
2967	INIT_DEBUGOUT("em_setup_interface: begin");
2968
2969	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2970	if (ifp == NULL) {
2971		device_printf(dev, "can not allocate ifnet structure\n");
2972		return (-1);
2973	}
2974	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2975	ifp->if_init =  em_init;
2976	ifp->if_softc = adapter;
2977	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2978	ifp->if_ioctl = em_ioctl;
2979#ifdef EM_MULTIQUEUE
2980	/* Multiqueue stack interface */
2981	ifp->if_transmit = em_mq_start;
2982	ifp->if_qflush = em_qflush;
2983#else
2984	ifp->if_start = em_start;
2985	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2986	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2987	IFQ_SET_READY(&ifp->if_snd);
2988#endif
2989
2990	ether_ifattach(ifp, adapter->hw.mac.addr);
2991
2992	ifp->if_capabilities = ifp->if_capenable = 0;
2993
2994
2995	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2996	ifp->if_capabilities |= IFCAP_TSO4;
2997	/*
2998	 * Tell the upper layer(s) we
2999	 * support full VLAN capability
3000	 */
3001	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3002	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3003			     |  IFCAP_VLAN_HWTSO
3004			     |  IFCAP_VLAN_MTU;
3005	ifp->if_capenable = ifp->if_capabilities;
3006
3007	/*
3008	** Don't turn this on by default, if vlans are
3009	** created on another pseudo device (eg. lagg)
3010	** then vlan events are not passed thru, breaking
3011	** operation, but with HW FILTER off it works. If
3012	** using vlans directly on the em driver you can
3013	** enable this and get full hardware tag filtering.
3014	*/
3015	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3016
3017#ifdef DEVICE_POLLING
3018	ifp->if_capabilities |= IFCAP_POLLING;
3019#endif
3020
3021	/* Enable only WOL MAGIC by default */
3022	if (adapter->wol) {
3023		ifp->if_capabilities |= IFCAP_WOL;
3024		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3025	}
3026
3027	/*
3028	 * Specify the media types supported by this adapter and register
3029	 * callbacks to update media and link information
3030	 */
3031	ifmedia_init(&adapter->media, IFM_IMASK,
3032	    em_media_change, em_media_status);
3033	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3034	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3035		u_char fiber_type = IFM_1000_SX;	/* default type */
3036
3037		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3038			    0, NULL);
3039		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3040	} else {
3041		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3042		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3043			    0, NULL);
3044		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3045			    0, NULL);
3046		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3047			    0, NULL);
3048		if (adapter->hw.phy.type != e1000_phy_ife) {
3049			ifmedia_add(&adapter->media,
3050				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3051			ifmedia_add(&adapter->media,
3052				IFM_ETHER | IFM_1000_T, 0, NULL);
3053		}
3054	}
3055	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3056	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3057	return (0);
3058}
3059
3060
3061/*
3062 * Manage DMA'able memory.
3063 */
3064static void
3065em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3066{
3067	if (error)
3068		return;
3069	*(bus_addr_t *) arg = segs[0].ds_addr;
3070}
3071
3072static int
3073em_dma_malloc(struct adapter *adapter, bus_size_t size,
3074        struct em_dma_alloc *dma, int mapflags)
3075{
3076	int error;
3077
3078	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3079				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3080				BUS_SPACE_MAXADDR,	/* lowaddr */
3081				BUS_SPACE_MAXADDR,	/* highaddr */
3082				NULL, NULL,		/* filter, filterarg */
3083				size,			/* maxsize */
3084				1,			/* nsegments */
3085				size,			/* maxsegsize */
3086				0,			/* flags */
3087				NULL,			/* lockfunc */
3088				NULL,			/* lockarg */
3089				&dma->dma_tag);
3090	if (error) {
3091		device_printf(adapter->dev,
3092		    "%s: bus_dma_tag_create failed: %d\n",
3093		    __func__, error);
3094		goto fail_0;
3095	}
3096
3097	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3098	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3099	if (error) {
3100		device_printf(adapter->dev,
3101		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3102		    __func__, (uintmax_t)size, error);
3103		goto fail_2;
3104	}
3105
3106	dma->dma_paddr = 0;
3107	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3108	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3109	if (error || dma->dma_paddr == 0) {
3110		device_printf(adapter->dev,
3111		    "%s: bus_dmamap_load failed: %d\n",
3112		    __func__, error);
3113		goto fail_3;
3114	}
3115
3116	return (0);
3117
3118fail_3:
3119	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3120fail_2:
3121	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3122	bus_dma_tag_destroy(dma->dma_tag);
3123fail_0:
3124	dma->dma_map = NULL;
3125	dma->dma_tag = NULL;
3126
3127	return (error);
3128}
3129
3130static void
3131em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3132{
3133	if (dma->dma_tag == NULL)
3134		return;
3135	if (dma->dma_map != NULL) {
3136		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3137		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3138		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3139		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3140		dma->dma_map = NULL;
3141	}
3142	bus_dma_tag_destroy(dma->dma_tag);
3143	dma->dma_tag = NULL;
3144}
3145
3146
3147/*********************************************************************
3148 *
3149 *  Allocate memory for the transmit and receive rings, and then
3150 *  the descriptors associated with each, called only once at attach.
3151 *
3152 **********************************************************************/
3153static int
3154em_allocate_queues(struct adapter *adapter)
3155{
3156	device_t		dev = adapter->dev;
3157	struct tx_ring		*txr = NULL;
3158	struct rx_ring		*rxr = NULL;
3159	int rsize, tsize, error = E1000_SUCCESS;
3160	int txconf = 0, rxconf = 0;
3161
3162
3163	/* Allocate the TX ring struct memory */
3164	if (!(adapter->tx_rings =
3165	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3166	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3167		device_printf(dev, "Unable to allocate TX ring memory\n");
3168		error = ENOMEM;
3169		goto fail;
3170	}
3171
3172	/* Now allocate the RX */
3173	if (!(adapter->rx_rings =
3174	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3175	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3176		device_printf(dev, "Unable to allocate RX ring memory\n");
3177		error = ENOMEM;
3178		goto rx_fail;
3179	}
3180
3181	tsize = roundup2(adapter->num_tx_desc *
3182	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3183	/*
3184	 * Now set up the TX queues, txconf is needed to handle the
3185	 * possibility that things fail midcourse and we need to
3186	 * undo memory gracefully
3187	 */
3188	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3189		/* Set up some basics */
3190		txr = &adapter->tx_rings[i];
3191		txr->adapter = adapter;
3192		txr->me = i;
3193
3194		/* Initialize the TX lock */
3195		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3196		    device_get_nameunit(dev), txr->me);
3197		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3198
3199		if (em_dma_malloc(adapter, tsize,
3200			&txr->txdma, BUS_DMA_NOWAIT)) {
3201			device_printf(dev,
3202			    "Unable to allocate TX Descriptor memory\n");
3203			error = ENOMEM;
3204			goto err_tx_desc;
3205		}
3206		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3207		bzero((void *)txr->tx_base, tsize);
3208
3209        	if (em_allocate_transmit_buffers(txr)) {
3210			device_printf(dev,
3211			    "Critical Failure setting up transmit buffers\n");
3212			error = ENOMEM;
3213			goto err_tx_desc;
3214        	}
3215#if __FreeBSD_version >= 800000
3216		/* Allocate a buf ring */
3217		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3218		    M_WAITOK, &txr->tx_mtx);
3219#endif
3220	}
3221
3222	/*
3223	 * Next the RX queues...
3224	 */
3225	rsize = roundup2(adapter->num_rx_desc *
3226	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3227	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3228		rxr = &adapter->rx_rings[i];
3229		rxr->adapter = adapter;
3230		rxr->me = i;
3231
3232		/* Initialize the RX lock */
3233		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3234		    device_get_nameunit(dev), txr->me);
3235		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3236
3237		if (em_dma_malloc(adapter, rsize,
3238			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3239			device_printf(dev,
3240			    "Unable to allocate RxDescriptor memory\n");
3241			error = ENOMEM;
3242			goto err_rx_desc;
3243		}
3244		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3245		bzero((void *)rxr->rx_base, rsize);
3246
3247        	/* Allocate receive buffers for the ring*/
3248		if (em_allocate_receive_buffers(rxr)) {
3249			device_printf(dev,
3250			    "Critical Failure setting up receive buffers\n");
3251			error = ENOMEM;
3252			goto err_rx_desc;
3253		}
3254	}
3255
3256	return (0);
3257
3258err_rx_desc:
3259	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3260		em_dma_free(adapter, &rxr->rxdma);
3261err_tx_desc:
3262	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3263		em_dma_free(adapter, &txr->txdma);
3264	free(adapter->rx_rings, M_DEVBUF);
3265rx_fail:
3266#if __FreeBSD_version >= 800000
3267	buf_ring_free(txr->br, M_DEVBUF);
3268#endif
3269	free(adapter->tx_rings, M_DEVBUF);
3270fail:
3271	return (error);
3272}
3273
3274
3275/*********************************************************************
3276 *
3277 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3278 *  the information needed to transmit a packet on the wire. This is
3279 *  called only once at attach, setup is done every reset.
3280 *
3281 **********************************************************************/
3282static int
3283em_allocate_transmit_buffers(struct tx_ring *txr)
3284{
3285	struct adapter *adapter = txr->adapter;
3286	device_t dev = adapter->dev;
3287	struct em_buffer *txbuf;
3288	int error, i;
3289
3290	/*
3291	 * Setup DMA descriptor areas.
3292	 */
3293	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3294			       1, 0,			/* alignment, bounds */
3295			       BUS_SPACE_MAXADDR,	/* lowaddr */
3296			       BUS_SPACE_MAXADDR,	/* highaddr */
3297			       NULL, NULL,		/* filter, filterarg */
3298			       EM_TSO_SIZE,		/* maxsize */
3299			       EM_MAX_SCATTER,		/* nsegments */
3300			       PAGE_SIZE,		/* maxsegsize */
3301			       0,			/* flags */
3302			       NULL,			/* lockfunc */
3303			       NULL,			/* lockfuncarg */
3304			       &txr->txtag))) {
3305		device_printf(dev,"Unable to allocate TX DMA tag\n");
3306		goto fail;
3307	}
3308
3309	if (!(txr->tx_buffers =
3310	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3311	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3312		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3313		error = ENOMEM;
3314		goto fail;
3315	}
3316
3317        /* Create the descriptor buffer dma maps */
3318	txbuf = txr->tx_buffers;
3319	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3320		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3321		if (error != 0) {
3322			device_printf(dev, "Unable to create TX DMA map\n");
3323			goto fail;
3324		}
3325	}
3326
3327	return 0;
3328fail:
3329	/* We free all, it handles case where we are in the middle */
3330	em_free_transmit_structures(adapter);
3331	return (error);
3332}
3333
3334/*********************************************************************
3335 *
3336 *  Initialize a transmit ring.
3337 *
3338 **********************************************************************/
3339static void
3340em_setup_transmit_ring(struct tx_ring *txr)
3341{
3342	struct adapter *adapter = txr->adapter;
3343	struct em_buffer *txbuf;
3344	int i;
3345#ifdef DEV_NETMAP
3346	struct netmap_adapter *na = NA(adapter->ifp);
3347	struct netmap_slot *slot;
3348#endif /* DEV_NETMAP */
3349
3350	/* Clear the old descriptor contents */
3351	EM_TX_LOCK(txr);
3352#ifdef DEV_NETMAP
3353	slot = netmap_reset(na, NR_TX, txr->me, 0);
3354#endif /* DEV_NETMAP */
3355
3356	bzero((void *)txr->tx_base,
3357	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3358	/* Reset indices */
3359	txr->next_avail_desc = 0;
3360	txr->next_to_clean = 0;
3361
3362	/* Free any existing tx buffers. */
3363        txbuf = txr->tx_buffers;
3364	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3365		if (txbuf->m_head != NULL) {
3366			bus_dmamap_sync(txr->txtag, txbuf->map,
3367			    BUS_DMASYNC_POSTWRITE);
3368			bus_dmamap_unload(txr->txtag, txbuf->map);
3369			m_freem(txbuf->m_head);
3370			txbuf->m_head = NULL;
3371		}
3372#ifdef DEV_NETMAP
3373		if (slot) {
3374			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3375			uint64_t paddr;
3376			void *addr;
3377
3378			addr = PNMB(slot + si, &paddr);
3379			txr->tx_base[i].buffer_addr = htole64(paddr);
3380			/* reload the map for netmap mode */
3381			netmap_load_map(txr->txtag, txbuf->map, addr);
3382		}
3383#endif /* DEV_NETMAP */
3384
3385		/* clear the watch index */
3386		txbuf->next_eop = -1;
3387        }
3388
3389	/* Set number of descriptors available */
3390	txr->tx_avail = adapter->num_tx_desc;
3391	txr->queue_status = EM_QUEUE_IDLE;
3392
3393	/* Clear checksum offload context. */
3394	txr->last_hw_offload = 0;
3395	txr->last_hw_ipcss = 0;
3396	txr->last_hw_ipcso = 0;
3397	txr->last_hw_tucss = 0;
3398	txr->last_hw_tucso = 0;
3399
3400	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3401	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3402	EM_TX_UNLOCK(txr);
3403}
3404
3405/*********************************************************************
3406 *
3407 *  Initialize all transmit rings.
3408 *
3409 **********************************************************************/
3410static void
3411em_setup_transmit_structures(struct adapter *adapter)
3412{
3413	struct tx_ring *txr = adapter->tx_rings;
3414
3415	for (int i = 0; i < adapter->num_queues; i++, txr++)
3416		em_setup_transmit_ring(txr);
3417
3418	return;
3419}
3420
3421/*********************************************************************
3422 *
3423 *  Enable transmit unit.
3424 *
3425 **********************************************************************/
3426static void
3427em_initialize_transmit_unit(struct adapter *adapter)
3428{
3429	struct tx_ring	*txr = adapter->tx_rings;
3430	struct e1000_hw	*hw = &adapter->hw;
3431	u32	tctl, tarc, tipg = 0;
3432
3433	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3434
3435	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3436		u64 bus_addr = txr->txdma.dma_paddr;
3437		/* Base and Len of TX Ring */
3438		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3439	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3440		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3441	    	    (u32)(bus_addr >> 32));
3442		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3443	    	    (u32)bus_addr);
3444		/* Init the HEAD/TAIL indices */
3445		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3446		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3447
3448		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3449		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3450		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3451
3452		txr->queue_status = EM_QUEUE_IDLE;
3453	}
3454
3455	/* Set the default values for the Tx Inter Packet Gap timer */
3456	switch (adapter->hw.mac.type) {
3457	case e1000_80003es2lan:
3458		tipg = DEFAULT_82543_TIPG_IPGR1;
3459		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3460		    E1000_TIPG_IPGR2_SHIFT;
3461		break;
3462	default:
3463		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3464		    (adapter->hw.phy.media_type ==
3465		    e1000_media_type_internal_serdes))
3466			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3467		else
3468			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3469		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3470		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3471	}
3472
3473	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3474	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3475
3476	if(adapter->hw.mac.type >= e1000_82540)
3477		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3478		    adapter->tx_abs_int_delay.value);
3479
3480	if ((adapter->hw.mac.type == e1000_82571) ||
3481	    (adapter->hw.mac.type == e1000_82572)) {
3482		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3483		tarc |= SPEED_MODE_BIT;
3484		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3485	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3486		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3487		tarc |= 1;
3488		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3489		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3490		tarc |= 1;
3491		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3492	}
3493
3494	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3495	if (adapter->tx_int_delay.value > 0)
3496		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3497
3498	/* Program the Transmit Control Register */
3499	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3500	tctl &= ~E1000_TCTL_CT;
3501	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3502		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3503
3504	if (adapter->hw.mac.type >= e1000_82571)
3505		tctl |= E1000_TCTL_MULR;
3506
3507	/* This write will effectively turn on the transmit unit. */
3508	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3509
3510}
3511
3512
3513/*********************************************************************
3514 *
3515 *  Free all transmit rings.
3516 *
3517 **********************************************************************/
3518static void
3519em_free_transmit_structures(struct adapter *adapter)
3520{
3521	struct tx_ring *txr = adapter->tx_rings;
3522
3523	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3524		EM_TX_LOCK(txr);
3525		em_free_transmit_buffers(txr);
3526		em_dma_free(adapter, &txr->txdma);
3527		EM_TX_UNLOCK(txr);
3528		EM_TX_LOCK_DESTROY(txr);
3529	}
3530
3531	free(adapter->tx_rings, M_DEVBUF);
3532}
3533
3534/*********************************************************************
3535 *
3536 *  Free transmit ring related data structures.
3537 *
3538 **********************************************************************/
3539static void
3540em_free_transmit_buffers(struct tx_ring *txr)
3541{
3542	struct adapter		*adapter = txr->adapter;
3543	struct em_buffer	*txbuf;
3544
3545	INIT_DEBUGOUT("free_transmit_ring: begin");
3546
3547	if (txr->tx_buffers == NULL)
3548		return;
3549
3550	for (int i = 0; i < adapter->num_tx_desc; i++) {
3551		txbuf = &txr->tx_buffers[i];
3552		if (txbuf->m_head != NULL) {
3553			bus_dmamap_sync(txr->txtag, txbuf->map,
3554			    BUS_DMASYNC_POSTWRITE);
3555			bus_dmamap_unload(txr->txtag,
3556			    txbuf->map);
3557			m_freem(txbuf->m_head);
3558			txbuf->m_head = NULL;
3559			if (txbuf->map != NULL) {
3560				bus_dmamap_destroy(txr->txtag,
3561				    txbuf->map);
3562				txbuf->map = NULL;
3563			}
3564		} else if (txbuf->map != NULL) {
3565			bus_dmamap_unload(txr->txtag,
3566			    txbuf->map);
3567			bus_dmamap_destroy(txr->txtag,
3568			    txbuf->map);
3569			txbuf->map = NULL;
3570		}
3571	}
3572#if __FreeBSD_version >= 800000
3573	if (txr->br != NULL)
3574		buf_ring_free(txr->br, M_DEVBUF);
3575#endif
3576	if (txr->tx_buffers != NULL) {
3577		free(txr->tx_buffers, M_DEVBUF);
3578		txr->tx_buffers = NULL;
3579	}
3580	if (txr->txtag != NULL) {
3581		bus_dma_tag_destroy(txr->txtag);
3582		txr->txtag = NULL;
3583	}
3584	return;
3585}
3586
3587
3588/*********************************************************************
3589 *  The offload context is protocol specific (TCP/UDP) and thus
3590 *  only needs to be set when the protocol changes. The occasion
3591 *  of a context change can be a performance detriment, and
3592 *  might be better just disabled. The reason arises in the way
3593 *  in which the controller supports pipelined requests from the
3594 *  Tx data DMA. Up to four requests can be pipelined, and they may
3595 *  belong to the same packet or to multiple packets. However all
3596 *  requests for one packet are issued before a request is issued
3597 *  for a subsequent packet and if a request for the next packet
3598 *  requires a context change, that request will be stalled
3599 *  until the previous request completes. This means setting up
3600 *  a new context effectively disables pipelined Tx data DMA which
3601 *  in turn greatly slow down performance to send small sized
3602 *  frames.
3603 **********************************************************************/
3604static void
3605em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3606    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3607{
3608	struct adapter			*adapter = txr->adapter;
3609	struct e1000_context_desc	*TXD = NULL;
3610	struct em_buffer		*tx_buffer;
3611	int				cur, hdr_len;
3612	u32				cmd = 0;
3613	u16				offload = 0;
3614	u8				ipcso, ipcss, tucso, tucss;
3615
3616	ipcss = ipcso = tucss = tucso = 0;
3617	hdr_len = ip_off + (ip->ip_hl << 2);
3618	cur = txr->next_avail_desc;
3619
3620	/* Setup of IP header checksum. */
3621	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3622		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3623		offload |= CSUM_IP;
3624		ipcss = ip_off;
3625		ipcso = ip_off + offsetof(struct ip, ip_sum);
3626		/*
3627		 * Start offset for header checksum calculation.
3628		 * End offset for header checksum calculation.
3629		 * Offset of place to put the checksum.
3630		 */
3631		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3632		TXD->lower_setup.ip_fields.ipcss = ipcss;
3633		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3634		TXD->lower_setup.ip_fields.ipcso = ipcso;
3635		cmd |= E1000_TXD_CMD_IP;
3636	}
3637
3638	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3639 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3640 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3641 		offload |= CSUM_TCP;
3642 		tucss = hdr_len;
3643 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3644 		/*
3645 		 * Setting up new checksum offload context for every frames
3646 		 * takes a lot of processing time for hardware. This also
3647 		 * reduces performance a lot for small sized frames so avoid
3648 		 * it if driver can use previously configured checksum
3649 		 * offload context.
3650 		 */
3651 		if (txr->last_hw_offload == offload) {
3652 			if (offload & CSUM_IP) {
3653 				if (txr->last_hw_ipcss == ipcss &&
3654 				    txr->last_hw_ipcso == ipcso &&
3655 				    txr->last_hw_tucss == tucss &&
3656 				    txr->last_hw_tucso == tucso)
3657 					return;
3658 			} else {
3659 				if (txr->last_hw_tucss == tucss &&
3660 				    txr->last_hw_tucso == tucso)
3661 					return;
3662 			}
3663  		}
3664 		txr->last_hw_offload = offload;
3665 		txr->last_hw_tucss = tucss;
3666 		txr->last_hw_tucso = tucso;
3667 		/*
3668 		 * Start offset for payload checksum calculation.
3669 		 * End offset for payload checksum calculation.
3670 		 * Offset of place to put the checksum.
3671 		 */
3672		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3673 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3674 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3675 		TXD->upper_setup.tcp_fields.tucso = tucso;
3676 		cmd |= E1000_TXD_CMD_TCP;
3677 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3678 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3679 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3680 		tucss = hdr_len;
3681 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3682 		/*
3683 		 * Setting up new checksum offload context for every frames
3684 		 * takes a lot of processing time for hardware. This also
3685 		 * reduces performance a lot for small sized frames so avoid
3686 		 * it if driver can use previously configured checksum
3687 		 * offload context.
3688 		 */
3689 		if (txr->last_hw_offload == offload) {
3690 			if (offload & CSUM_IP) {
3691 				if (txr->last_hw_ipcss == ipcss &&
3692 				    txr->last_hw_ipcso == ipcso &&
3693 				    txr->last_hw_tucss == tucss &&
3694 				    txr->last_hw_tucso == tucso)
3695 					return;
3696 			} else {
3697 				if (txr->last_hw_tucss == tucss &&
3698 				    txr->last_hw_tucso == tucso)
3699 					return;
3700 			}
3701 		}
3702 		txr->last_hw_offload = offload;
3703 		txr->last_hw_tucss = tucss;
3704 		txr->last_hw_tucso = tucso;
3705 		/*
3706 		 * Start offset for header checksum calculation.
3707 		 * End offset for header checksum calculation.
3708 		 * Offset of place to put the checksum.
3709 		 */
3710		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3711 		TXD->upper_setup.tcp_fields.tucss = tucss;
3712 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3713 		TXD->upper_setup.tcp_fields.tucso = tucso;
3714  	}
3715
3716 	if (offload & CSUM_IP) {
3717 		txr->last_hw_ipcss = ipcss;
3718 		txr->last_hw_ipcso = ipcso;
3719  	}
3720
3721	TXD->tcp_seg_setup.data = htole32(0);
3722	TXD->cmd_and_length =
3723	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3724	tx_buffer = &txr->tx_buffers[cur];
3725	tx_buffer->m_head = NULL;
3726	tx_buffer->next_eop = -1;
3727
3728	if (++cur == adapter->num_tx_desc)
3729		cur = 0;
3730
3731	txr->tx_avail--;
3732	txr->next_avail_desc = cur;
3733}
3734
3735
3736/**********************************************************************
3737 *
3738 *  Setup work for hardware segmentation offload (TSO)
3739 *
3740 **********************************************************************/
3741static void
3742em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3743    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3744{
3745	struct adapter			*adapter = txr->adapter;
3746	struct e1000_context_desc	*TXD;
3747	struct em_buffer		*tx_buffer;
3748	int cur, hdr_len;
3749
3750	/*
3751	 * In theory we can use the same TSO context if and only if
3752	 * frame is the same type(IP/TCP) and the same MSS. However
3753	 * checking whether a frame has the same IP/TCP structure is
3754	 * hard thing so just ignore that and always restablish a
3755	 * new TSO context.
3756	 */
3757	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3758	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3759		      E1000_TXD_DTYP_D |	/* Data descr type */
3760		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3761
3762	/* IP and/or TCP header checksum calculation and insertion. */
3763	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3764
3765	cur = txr->next_avail_desc;
3766	tx_buffer = &txr->tx_buffers[cur];
3767	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3768
3769	/*
3770	 * Start offset for header checksum calculation.
3771	 * End offset for header checksum calculation.
3772	 * Offset of place put the checksum.
3773	 */
3774	TXD->lower_setup.ip_fields.ipcss = ip_off;
3775	TXD->lower_setup.ip_fields.ipcse =
3776	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3777	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3778	/*
3779	 * Start offset for payload checksum calculation.
3780	 * End offset for payload checksum calculation.
3781	 * Offset of place to put the checksum.
3782	 */
3783	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3784	TXD->upper_setup.tcp_fields.tucse = 0;
3785	TXD->upper_setup.tcp_fields.tucso =
3786	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3787	/*
3788	 * Payload size per packet w/o any headers.
3789	 * Length of all headers up to payload.
3790	 */
3791	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3792	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3793
3794	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3795				E1000_TXD_CMD_DEXT |	/* Extended descr */
3796				E1000_TXD_CMD_TSE |	/* TSE context */
3797				E1000_TXD_CMD_IP |	/* Do IP csum */
3798				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3799				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3800
3801	tx_buffer->m_head = NULL;
3802	tx_buffer->next_eop = -1;
3803
3804	if (++cur == adapter->num_tx_desc)
3805		cur = 0;
3806
3807	txr->tx_avail--;
3808	txr->next_avail_desc = cur;
3809	txr->tx_tso = TRUE;
3810}
3811
3812
3813/**********************************************************************
3814 *
3815 *  Examine each tx_buffer in the used queue. If the hardware is done
3816 *  processing the packet then free associated resources. The
3817 *  tx_buffer is put back on the free queue.
3818 *
3819 **********************************************************************/
3820static void
3821em_txeof(struct tx_ring *txr)
3822{
3823	struct adapter	*adapter = txr->adapter;
3824        int first, last, done, processed;
3825        struct em_buffer *tx_buffer;
3826        struct e1000_tx_desc   *tx_desc, *eop_desc;
3827	struct ifnet   *ifp = adapter->ifp;
3828
3829	EM_TX_LOCK_ASSERT(txr);
3830#ifdef DEV_NETMAP
3831	if (netmap_tx_irq(ifp, txr->me |
3832	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3833		return;
3834#endif /* DEV_NETMAP */
3835
3836	/* No work, make sure watchdog is off */
3837        if (txr->tx_avail == adapter->num_tx_desc) {
3838		txr->queue_status = EM_QUEUE_IDLE;
3839                return;
3840	}
3841
3842	processed = 0;
3843        first = txr->next_to_clean;
3844        tx_desc = &txr->tx_base[first];
3845        tx_buffer = &txr->tx_buffers[first];
3846	last = tx_buffer->next_eop;
3847        eop_desc = &txr->tx_base[last];
3848
3849	/*
3850	 * What this does is get the index of the
3851	 * first descriptor AFTER the EOP of the
3852	 * first packet, that way we can do the
3853	 * simple comparison on the inner while loop.
3854	 */
3855	if (++last == adapter->num_tx_desc)
3856 		last = 0;
3857	done = last;
3858
3859        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3860            BUS_DMASYNC_POSTREAD);
3861
3862        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3863		/* We clean the range of the packet */
3864		while (first != done) {
3865                	tx_desc->upper.data = 0;
3866                	tx_desc->lower.data = 0;
3867                	tx_desc->buffer_addr = 0;
3868                	++txr->tx_avail;
3869			++processed;
3870
3871			if (tx_buffer->m_head) {
3872				bus_dmamap_sync(txr->txtag,
3873				    tx_buffer->map,
3874				    BUS_DMASYNC_POSTWRITE);
3875				bus_dmamap_unload(txr->txtag,
3876				    tx_buffer->map);
3877                        	m_freem(tx_buffer->m_head);
3878                        	tx_buffer->m_head = NULL;
3879                	}
3880			tx_buffer->next_eop = -1;
3881			txr->watchdog_time = ticks;
3882
3883	                if (++first == adapter->num_tx_desc)
3884				first = 0;
3885
3886	                tx_buffer = &txr->tx_buffers[first];
3887			tx_desc = &txr->tx_base[first];
3888		}
3889		++ifp->if_opackets;
3890		/* See if we can continue to the next packet */
3891		last = tx_buffer->next_eop;
3892		if (last != -1) {
3893        		eop_desc = &txr->tx_base[last];
3894			/* Get new done point */
3895			if (++last == adapter->num_tx_desc) last = 0;
3896			done = last;
3897		} else
3898			break;
3899        }
3900        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3901            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3902
3903        txr->next_to_clean = first;
3904
3905	/*
3906	** Watchdog calculation, we know there's
3907	** work outstanding or the first return
3908	** would have been taken, so none processed
3909	** for too long indicates a hang. local timer
3910	** will examine this and do a reset if needed.
3911	*/
3912	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3913		txr->queue_status = EM_QUEUE_HUNG;
3914
3915        /*
3916         * If we have a minimum free, clear IFF_DRV_OACTIVE
3917         * to tell the stack that it is OK to send packets.
3918	 * Notice that all writes of OACTIVE happen under the
3919	 * TX lock which, with a single queue, guarantees
3920	 * sanity.
3921         */
3922        if (txr->tx_avail >= EM_MAX_SCATTER)
3923		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3924
3925	/* Disable watchdog if all clean */
3926	if (txr->tx_avail == adapter->num_tx_desc) {
3927		txr->queue_status = EM_QUEUE_IDLE;
3928	}
3929}
3930
3931
3932/*********************************************************************
3933 *
3934 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3935 *
3936 **********************************************************************/
3937static void
3938em_refresh_mbufs(struct rx_ring *rxr, int limit)
3939{
3940	struct adapter		*adapter = rxr->adapter;
3941	struct mbuf		*m;
3942	bus_dma_segment_t	segs[1];
3943	struct em_buffer	*rxbuf;
3944	int			i, j, error, nsegs;
3945	bool			cleaned = FALSE;
3946
3947	i = j = rxr->next_to_refresh;
3948	/*
3949	** Get one descriptor beyond
3950	** our work mark to control
3951	** the loop.
3952	*/
3953	if (++j == adapter->num_rx_desc)
3954		j = 0;
3955
3956	while (j != limit) {
3957		rxbuf = &rxr->rx_buffers[i];
3958		if (rxbuf->m_head == NULL) {
3959			m = m_getjcl(M_NOWAIT, MT_DATA,
3960			    M_PKTHDR, adapter->rx_mbuf_sz);
3961			/*
3962			** If we have a temporary resource shortage
3963			** that causes a failure, just abort refresh
3964			** for now, we will return to this point when
3965			** reinvoked from em_rxeof.
3966			*/
3967			if (m == NULL)
3968				goto update;
3969		} else
3970			m = rxbuf->m_head;
3971
3972		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3973		m->m_flags |= M_PKTHDR;
3974		m->m_data = m->m_ext.ext_buf;
3975
3976		/* Use bus_dma machinery to setup the memory mapping  */
3977		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3978		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3979		if (error != 0) {
3980			printf("Refresh mbufs: hdr dmamap load"
3981			    " failure - %d\n", error);
3982			m_free(m);
3983			rxbuf->m_head = NULL;
3984			goto update;
3985		}
3986		rxbuf->m_head = m;
3987		bus_dmamap_sync(rxr->rxtag,
3988		    rxbuf->map, BUS_DMASYNC_PREREAD);
3989		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3990		cleaned = TRUE;
3991
3992		i = j; /* Next is precalulated for us */
3993		rxr->next_to_refresh = i;
3994		/* Calculate next controlling index */
3995		if (++j == adapter->num_rx_desc)
3996			j = 0;
3997	}
3998update:
3999	/*
4000	** Update the tail pointer only if,
4001	** and as far as we have refreshed.
4002	*/
4003	if (cleaned)
4004		E1000_WRITE_REG(&adapter->hw,
4005		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4006
4007	return;
4008}
4009
4010
4011/*********************************************************************
4012 *
4013 *  Allocate memory for rx_buffer structures. Since we use one
4014 *  rx_buffer per received packet, the maximum number of rx_buffer's
4015 *  that we'll need is equal to the number of receive descriptors
4016 *  that we've allocated.
4017 *
4018 **********************************************************************/
4019static int
4020em_allocate_receive_buffers(struct rx_ring *rxr)
4021{
4022	struct adapter		*adapter = rxr->adapter;
4023	device_t		dev = adapter->dev;
4024	struct em_buffer	*rxbuf;
4025	int			error;
4026
4027	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4028	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4029	if (rxr->rx_buffers == NULL) {
4030		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4031		return (ENOMEM);
4032	}
4033
4034	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4035				1, 0,			/* alignment, bounds */
4036				BUS_SPACE_MAXADDR,	/* lowaddr */
4037				BUS_SPACE_MAXADDR,	/* highaddr */
4038				NULL, NULL,		/* filter, filterarg */
4039				MJUM9BYTES,		/* maxsize */
4040				1,			/* nsegments */
4041				MJUM9BYTES,		/* maxsegsize */
4042				0,			/* flags */
4043				NULL,			/* lockfunc */
4044				NULL,			/* lockarg */
4045				&rxr->rxtag);
4046	if (error) {
4047		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4048		    __func__, error);
4049		goto fail;
4050	}
4051
4052	rxbuf = rxr->rx_buffers;
4053	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4054		rxbuf = &rxr->rx_buffers[i];
4055		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4056		    &rxbuf->map);
4057		if (error) {
4058			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4059			    __func__, error);
4060			goto fail;
4061		}
4062	}
4063
4064	return (0);
4065
4066fail:
4067	em_free_receive_structures(adapter);
4068	return (error);
4069}
4070
4071
4072/*********************************************************************
4073 *
4074 *  Initialize a receive ring and its buffers.
4075 *
4076 **********************************************************************/
4077static int
4078em_setup_receive_ring(struct rx_ring *rxr)
4079{
4080	struct	adapter 	*adapter = rxr->adapter;
4081	struct em_buffer	*rxbuf;
4082	bus_dma_segment_t	seg[1];
4083	int			rsize, nsegs, error = 0;
4084#ifdef DEV_NETMAP
4085	struct netmap_adapter *na = NA(adapter->ifp);
4086	struct netmap_slot *slot;
4087#endif
4088
4089
4090	/* Clear the ring contents */
4091	EM_RX_LOCK(rxr);
4092	rsize = roundup2(adapter->num_rx_desc *
4093	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4094	bzero((void *)rxr->rx_base, rsize);
4095#ifdef DEV_NETMAP
4096	slot = netmap_reset(na, NR_RX, 0, 0);
4097#endif
4098
4099	/*
4100	** Free current RX buffer structs and their mbufs
4101	*/
4102	for (int i = 0; i < adapter->num_rx_desc; i++) {
4103		rxbuf = &rxr->rx_buffers[i];
4104		if (rxbuf->m_head != NULL) {
4105			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4106			    BUS_DMASYNC_POSTREAD);
4107			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4108			m_freem(rxbuf->m_head);
4109			rxbuf->m_head = NULL; /* mark as freed */
4110		}
4111	}
4112
4113	/* Now replenish the mbufs */
4114        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4115		rxbuf = &rxr->rx_buffers[j];
4116#ifdef DEV_NETMAP
4117		if (slot) {
4118			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4119			uint64_t paddr;
4120			void *addr;
4121
4122			addr = PNMB(slot + si, &paddr);
4123			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4124			/* Update descriptor */
4125			rxr->rx_base[j].buffer_addr = htole64(paddr);
4126			continue;
4127		}
4128#endif /* DEV_NETMAP */
4129		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4130		    M_PKTHDR, adapter->rx_mbuf_sz);
4131		if (rxbuf->m_head == NULL) {
4132			error = ENOBUFS;
4133			goto fail;
4134		}
4135		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4136		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4137		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4138
4139		/* Get the memory mapping */
4140		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4141		    rxbuf->map, rxbuf->m_head, seg,
4142		    &nsegs, BUS_DMA_NOWAIT);
4143		if (error != 0) {
4144			m_freem(rxbuf->m_head);
4145			rxbuf->m_head = NULL;
4146			goto fail;
4147		}
4148		bus_dmamap_sync(rxr->rxtag,
4149		    rxbuf->map, BUS_DMASYNC_PREREAD);
4150
4151		/* Update descriptor */
4152		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4153	}
4154	rxr->next_to_check = 0;
4155	rxr->next_to_refresh = 0;
4156	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4157	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4158
4159fail:
4160	EM_RX_UNLOCK(rxr);
4161	return (error);
4162}
4163
4164/*********************************************************************
4165 *
4166 *  Initialize all receive rings.
4167 *
4168 **********************************************************************/
4169static int
4170em_setup_receive_structures(struct adapter *adapter)
4171{
4172	struct rx_ring *rxr = adapter->rx_rings;
4173	int q;
4174
4175	for (q = 0; q < adapter->num_queues; q++, rxr++)
4176		if (em_setup_receive_ring(rxr))
4177			goto fail;
4178
4179	return (0);
4180fail:
4181	/*
4182	 * Free RX buffers allocated so far, we will only handle
4183	 * the rings that completed, the failing case will have
4184	 * cleaned up for itself. 'q' failed, so its the terminus.
4185	 */
4186	for (int i = 0; i < q; ++i) {
4187		rxr = &adapter->rx_rings[i];
4188		for (int n = 0; n < adapter->num_rx_desc; n++) {
4189			struct em_buffer *rxbuf;
4190			rxbuf = &rxr->rx_buffers[n];
4191			if (rxbuf->m_head != NULL) {
4192				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4193			  	  BUS_DMASYNC_POSTREAD);
4194				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4195				m_freem(rxbuf->m_head);
4196				rxbuf->m_head = NULL;
4197			}
4198		}
4199		rxr->next_to_check = 0;
4200		rxr->next_to_refresh = 0;
4201	}
4202
4203	return (ENOBUFS);
4204}
4205
4206/*********************************************************************
4207 *
4208 *  Free all receive rings.
4209 *
4210 **********************************************************************/
4211static void
4212em_free_receive_structures(struct adapter *adapter)
4213{
4214	struct rx_ring *rxr = adapter->rx_rings;
4215
4216	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4217		em_free_receive_buffers(rxr);
4218		/* Free the ring memory as well */
4219		em_dma_free(adapter, &rxr->rxdma);
4220		EM_RX_LOCK_DESTROY(rxr);
4221	}
4222
4223	free(adapter->rx_rings, M_DEVBUF);
4224}
4225
4226
4227/*********************************************************************
4228 *
4229 *  Free receive ring data structures
4230 *
4231 **********************************************************************/
4232static void
4233em_free_receive_buffers(struct rx_ring *rxr)
4234{
4235	struct adapter		*adapter = rxr->adapter;
4236	struct em_buffer	*rxbuf = NULL;
4237
4238	INIT_DEBUGOUT("free_receive_buffers: begin");
4239
4240	if (rxr->rx_buffers != NULL) {
4241		for (int i = 0; i < adapter->num_rx_desc; i++) {
4242			rxbuf = &rxr->rx_buffers[i];
4243			if (rxbuf->map != NULL) {
4244				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4245				    BUS_DMASYNC_POSTREAD);
4246				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4247				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4248			}
4249			if (rxbuf->m_head != NULL) {
4250				m_freem(rxbuf->m_head);
4251				rxbuf->m_head = NULL;
4252			}
4253		}
4254		free(rxr->rx_buffers, M_DEVBUF);
4255		rxr->rx_buffers = NULL;
4256		rxr->next_to_check = 0;
4257		rxr->next_to_refresh = 0;
4258	}
4259
4260	if (rxr->rxtag != NULL) {
4261		bus_dma_tag_destroy(rxr->rxtag);
4262		rxr->rxtag = NULL;
4263	}
4264
4265	return;
4266}
4267
4268
4269/*********************************************************************
4270 *
4271 *  Enable receive unit.
4272 *
4273 **********************************************************************/
4274
4275static void
4276em_initialize_receive_unit(struct adapter *adapter)
4277{
4278	struct rx_ring	*rxr = adapter->rx_rings;
4279	struct ifnet	*ifp = adapter->ifp;
4280	struct e1000_hw	*hw = &adapter->hw;
4281	u64	bus_addr;
4282	u32	rctl, rxcsum;
4283
4284	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4285
4286	/*
4287	 * Make sure receives are disabled while setting
4288	 * up the descriptor ring
4289	 */
4290	rctl = E1000_READ_REG(hw, E1000_RCTL);
4291	/* Do not disable if ever enabled on this hardware */
4292	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4293		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4294
4295	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4296	    adapter->rx_abs_int_delay.value);
4297	/*
4298	 * Set the interrupt throttling rate. Value is calculated
4299	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4300	 */
4301	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4302
4303	/*
4304	** When using MSIX interrupts we need to throttle
4305	** using the EITR register (82574 only)
4306	*/
4307	if (hw->mac.type == e1000_82574) {
4308		for (int i = 0; i < 4; i++)
4309			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4310			    DEFAULT_ITR);
4311		/* Disable accelerated acknowledge */
4312		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4313	}
4314
4315	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4316	if (ifp->if_capenable & IFCAP_RXCSUM)
4317		rxcsum |= E1000_RXCSUM_TUOFL;
4318	else
4319		rxcsum &= ~E1000_RXCSUM_TUOFL;
4320	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4321
4322	/*
4323	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4324	** long latencies are observed, like Lenovo X60. This
4325	** change eliminates the problem, but since having positive
4326	** values in RDTR is a known source of problems on other
4327	** platforms another solution is being sought.
4328	*/
4329	if (hw->mac.type == e1000_82573)
4330		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4331
4332	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4333		/* Setup the Base and Length of the Rx Descriptor Ring */
4334		u32 rdt = adapter->num_rx_desc - 1; /* default */
4335
4336		bus_addr = rxr->rxdma.dma_paddr;
4337		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4338		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4339		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4340		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4341		/* Setup the Head and Tail Descriptor Pointers */
4342		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4343#ifdef DEV_NETMAP
4344		/*
4345		 * an init() while a netmap client is active must
4346		 * preserve the rx buffers passed to userspace.
4347		 */
4348		if (ifp->if_capenable & IFCAP_NETMAP)
4349			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4350#endif /* DEV_NETMAP */
4351		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4352	}
4353
4354	/* Set PTHRESH for improved jumbo performance */
4355	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4356	    (adapter->hw.mac.type == e1000_pch2lan) ||
4357	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4358	    (ifp->if_mtu > ETHERMTU)) {
4359		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4360		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4361	}
4362
4363	if (adapter->hw.mac.type >= e1000_pch2lan) {
4364		if (ifp->if_mtu > ETHERMTU)
4365			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4366		else
4367			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4368	}
4369
4370	/* Setup the Receive Control Register */
4371	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4372	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4373	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4374	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4375
4376        /* Strip the CRC */
4377        rctl |= E1000_RCTL_SECRC;
4378
4379        /* Make sure VLAN Filters are off */
4380        rctl &= ~E1000_RCTL_VFE;
4381	rctl &= ~E1000_RCTL_SBP;
4382
4383	if (adapter->rx_mbuf_sz == MCLBYTES)
4384		rctl |= E1000_RCTL_SZ_2048;
4385	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4386		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4387	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4388		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4389
4390	if (ifp->if_mtu > ETHERMTU)
4391		rctl |= E1000_RCTL_LPE;
4392	else
4393		rctl &= ~E1000_RCTL_LPE;
4394
4395	/* Write out the settings */
4396	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4397
4398	return;
4399}
4400
4401
4402/*********************************************************************
4403 *
4404 *  This routine executes in interrupt context. It replenishes
4405 *  the mbufs in the descriptor and sends data which has been
4406 *  dma'ed into host memory to upper layer.
4407 *
4408 *  We loop at most count times if count is > 0, or until done if
4409 *  count < 0.
4410 *
4411 *  For polling we also now return the number of cleaned packets
4412 *********************************************************************/
4413static bool
4414em_rxeof(struct rx_ring *rxr, int count, int *done)
4415{
4416	struct adapter		*adapter = rxr->adapter;
4417	struct ifnet		*ifp = adapter->ifp;
4418	struct mbuf		*mp, *sendmp;
4419	u8			status = 0;
4420	u16 			len;
4421	int			i, processed, rxdone = 0;
4422	bool			eop;
4423	struct e1000_rx_desc	*cur;
4424
4425	EM_RX_LOCK(rxr);
4426
4427#ifdef DEV_NETMAP
4428	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4429		return (FALSE);
4430#endif /* DEV_NETMAP */
4431
4432	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4433
4434		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4435			break;
4436
4437		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4438		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4439
4440		cur = &rxr->rx_base[i];
4441		status = cur->status;
4442		mp = sendmp = NULL;
4443
4444		if ((status & E1000_RXD_STAT_DD) == 0)
4445			break;
4446
4447		len = le16toh(cur->length);
4448		eop = (status & E1000_RXD_STAT_EOP) != 0;
4449
4450		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4451		    (rxr->discard == TRUE)) {
4452			adapter->dropped_pkts++;
4453			++rxr->rx_discarded;
4454			if (!eop) /* Catch subsequent segs */
4455				rxr->discard = TRUE;
4456			else
4457				rxr->discard = FALSE;
4458			em_rx_discard(rxr, i);
4459			goto next_desc;
4460		}
4461
4462		/* Assign correct length to the current fragment */
4463		mp = rxr->rx_buffers[i].m_head;
4464		mp->m_len = len;
4465
4466		/* Trigger for refresh */
4467		rxr->rx_buffers[i].m_head = NULL;
4468
4469		/* First segment? */
4470		if (rxr->fmp == NULL) {
4471			mp->m_pkthdr.len = len;
4472			rxr->fmp = rxr->lmp = mp;
4473		} else {
4474			/* Chain mbuf's together */
4475			mp->m_flags &= ~M_PKTHDR;
4476			rxr->lmp->m_next = mp;
4477			rxr->lmp = mp;
4478			rxr->fmp->m_pkthdr.len += len;
4479		}
4480
4481		if (eop) {
4482			--count;
4483			sendmp = rxr->fmp;
4484			sendmp->m_pkthdr.rcvif = ifp;
4485			ifp->if_ipackets++;
4486			em_receive_checksum(cur, sendmp);
4487#ifndef __NO_STRICT_ALIGNMENT
4488			if (adapter->hw.mac.max_frame_size >
4489			    (MCLBYTES - ETHER_ALIGN) &&
4490			    em_fixup_rx(rxr) != 0)
4491				goto skip;
4492#endif
4493			if (status & E1000_RXD_STAT_VP) {
4494				sendmp->m_pkthdr.ether_vtag =
4495				    le16toh(cur->special);
4496				sendmp->m_flags |= M_VLANTAG;
4497			}
4498#ifndef __NO_STRICT_ALIGNMENT
4499skip:
4500#endif
4501			rxr->fmp = rxr->lmp = NULL;
4502		}
4503next_desc:
4504		/* Zero out the receive descriptors status. */
4505		cur->status = 0;
4506		++rxdone;	/* cumulative for POLL */
4507		++processed;
4508
4509		/* Advance our pointers to the next descriptor. */
4510		if (++i == adapter->num_rx_desc)
4511			i = 0;
4512
4513		/* Send to the stack */
4514		if (sendmp != NULL) {
4515			rxr->next_to_check = i;
4516			EM_RX_UNLOCK(rxr);
4517			(*ifp->if_input)(ifp, sendmp);
4518			EM_RX_LOCK(rxr);
4519			i = rxr->next_to_check;
4520		}
4521
4522		/* Only refresh mbufs every 8 descriptors */
4523		if (processed == 8) {
4524			em_refresh_mbufs(rxr, i);
4525			processed = 0;
4526		}
4527	}
4528
4529	/* Catch any remaining refresh work */
4530	if (e1000_rx_unrefreshed(rxr))
4531		em_refresh_mbufs(rxr, i);
4532
4533	rxr->next_to_check = i;
4534	if (done != NULL)
4535		*done = rxdone;
4536	EM_RX_UNLOCK(rxr);
4537
4538	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4539}
4540
4541static __inline void
4542em_rx_discard(struct rx_ring *rxr, int i)
4543{
4544	struct em_buffer	*rbuf;
4545
4546	rbuf = &rxr->rx_buffers[i];
4547	/* Free any previous pieces */
4548	if (rxr->fmp != NULL) {
4549		rxr->fmp->m_flags |= M_PKTHDR;
4550		m_freem(rxr->fmp);
4551		rxr->fmp = NULL;
4552		rxr->lmp = NULL;
4553	}
4554	/*
4555	** Free buffer and allow em_refresh_mbufs()
4556	** to clean up and recharge buffer.
4557	*/
4558	if (rbuf->m_head) {
4559		m_free(rbuf->m_head);
4560		rbuf->m_head = NULL;
4561	}
4562	return;
4563}
4564
4565#ifndef __NO_STRICT_ALIGNMENT
4566/*
4567 * When jumbo frames are enabled we should realign entire payload on
4568 * architecures with strict alignment. This is serious design mistake of 8254x
4569 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4570 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4571 * payload. On architecures without strict alignment restrictions 8254x still
4572 * performs unaligned memory access which would reduce the performance too.
4573 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4574 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4575 * existing mbuf chain.
4576 *
4577 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4578 * not used at all on architectures with strict alignment.
4579 */
4580static int
4581em_fixup_rx(struct rx_ring *rxr)
4582{
4583	struct adapter *adapter = rxr->adapter;
4584	struct mbuf *m, *n;
4585	int error;
4586
4587	error = 0;
4588	m = rxr->fmp;
4589	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4590		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4591		m->m_data += ETHER_HDR_LEN;
4592	} else {
4593		MGETHDR(n, M_NOWAIT, MT_DATA);
4594		if (n != NULL) {
4595			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4596			m->m_data += ETHER_HDR_LEN;
4597			m->m_len -= ETHER_HDR_LEN;
4598			n->m_len = ETHER_HDR_LEN;
4599			M_MOVE_PKTHDR(n, m);
4600			n->m_next = m;
4601			rxr->fmp = n;
4602		} else {
4603			adapter->dropped_pkts++;
4604			m_freem(rxr->fmp);
4605			rxr->fmp = NULL;
4606			error = ENOMEM;
4607		}
4608	}
4609
4610	return (error);
4611}
4612#endif
4613
4614/*********************************************************************
4615 *
4616 *  Verify that the hardware indicated that the checksum is valid.
4617 *  Inform the stack about the status of checksum so that stack
4618 *  doesn't spend time verifying the checksum.
4619 *
4620 *********************************************************************/
4621static void
4622em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4623{
4624	mp->m_pkthdr.csum_flags = 0;
4625
4626	/* Ignore Checksum bit is set */
4627	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4628		return;
4629
4630	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4631		return;
4632
4633	/* IP Checksum Good? */
4634	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4635		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4636
4637	/* TCP or UDP checksum */
4638	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4639		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4640		mp->m_pkthdr.csum_data = htons(0xffff);
4641	}
4642}
4643
4644/*
4645 * This routine is run via an vlan
4646 * config EVENT
4647 */
4648static void
4649em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4650{
4651	struct adapter	*adapter = ifp->if_softc;
4652	u32		index, bit;
4653
4654	if (ifp->if_softc !=  arg)   /* Not our event */
4655		return;
4656
4657	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4658                return;
4659
4660	EM_CORE_LOCK(adapter);
4661	index = (vtag >> 5) & 0x7F;
4662	bit = vtag & 0x1F;
4663	adapter->shadow_vfta[index] |= (1 << bit);
4664	++adapter->num_vlans;
4665	/* Re-init to load the changes */
4666	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4667		em_init_locked(adapter);
4668	EM_CORE_UNLOCK(adapter);
4669}
4670
4671/*
4672 * This routine is run via an vlan
4673 * unconfig EVENT
4674 */
4675static void
4676em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4677{
4678	struct adapter	*adapter = ifp->if_softc;
4679	u32		index, bit;
4680
4681	if (ifp->if_softc !=  arg)
4682		return;
4683
4684	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4685                return;
4686
4687	EM_CORE_LOCK(adapter);
4688	index = (vtag >> 5) & 0x7F;
4689	bit = vtag & 0x1F;
4690	adapter->shadow_vfta[index] &= ~(1 << bit);
4691	--adapter->num_vlans;
4692	/* Re-init to load the changes */
4693	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4694		em_init_locked(adapter);
4695	EM_CORE_UNLOCK(adapter);
4696}
4697
4698static void
4699em_setup_vlan_hw_support(struct adapter *adapter)
4700{
4701	struct e1000_hw *hw = &adapter->hw;
4702	u32             reg;
4703
4704	/*
4705	** We get here thru init_locked, meaning
4706	** a soft reset, this has already cleared
4707	** the VFTA and other state, so if there
4708	** have been no vlan's registered do nothing.
4709	*/
4710	if (adapter->num_vlans == 0)
4711                return;
4712
4713	/*
4714	** A soft reset zero's out the VFTA, so
4715	** we need to repopulate it now.
4716	*/
4717	for (int i = 0; i < EM_VFTA_SIZE; i++)
4718                if (adapter->shadow_vfta[i] != 0)
4719			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4720                            i, adapter->shadow_vfta[i]);
4721
4722	reg = E1000_READ_REG(hw, E1000_CTRL);
4723	reg |= E1000_CTRL_VME;
4724	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4725
4726	/* Enable the Filter Table */
4727	reg = E1000_READ_REG(hw, E1000_RCTL);
4728	reg &= ~E1000_RCTL_CFIEN;
4729	reg |= E1000_RCTL_VFE;
4730	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4731}
4732
4733static void
4734em_enable_intr(struct adapter *adapter)
4735{
4736	struct e1000_hw *hw = &adapter->hw;
4737	u32 ims_mask = IMS_ENABLE_MASK;
4738
4739	if (hw->mac.type == e1000_82574) {
4740		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4741		ims_mask |= EM_MSIX_MASK;
4742	}
4743	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4744}
4745
4746static void
4747em_disable_intr(struct adapter *adapter)
4748{
4749	struct e1000_hw *hw = &adapter->hw;
4750
4751	if (hw->mac.type == e1000_82574)
4752		E1000_WRITE_REG(hw, EM_EIAC, 0);
4753	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4754}
4755
4756/*
4757 * Bit of a misnomer, what this really means is
4758 * to enable OS management of the system... aka
4759 * to disable special hardware management features
4760 */
4761static void
4762em_init_manageability(struct adapter *adapter)
4763{
4764	/* A shared code workaround */
4765#define E1000_82542_MANC2H E1000_MANC2H
4766	if (adapter->has_manage) {
4767		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4768		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4769
4770		/* disable hardware interception of ARP */
4771		manc &= ~(E1000_MANC_ARP_EN);
4772
4773                /* enable receiving management packets to the host */
4774		manc |= E1000_MANC_EN_MNG2HOST;
4775#define E1000_MNG2HOST_PORT_623 (1 << 5)
4776#define E1000_MNG2HOST_PORT_664 (1 << 6)
4777		manc2h |= E1000_MNG2HOST_PORT_623;
4778		manc2h |= E1000_MNG2HOST_PORT_664;
4779		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4780		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4781	}
4782}
4783
4784/*
4785 * Give control back to hardware management
4786 * controller if there is one.
4787 */
4788static void
4789em_release_manageability(struct adapter *adapter)
4790{
4791	if (adapter->has_manage) {
4792		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4793
4794		/* re-enable hardware interception of ARP */
4795		manc |= E1000_MANC_ARP_EN;
4796		manc &= ~E1000_MANC_EN_MNG2HOST;
4797
4798		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4799	}
4800}
4801
4802/*
4803 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4804 * For ASF and Pass Through versions of f/w this means
4805 * that the driver is loaded. For AMT version type f/w
4806 * this means that the network i/f is open.
4807 */
4808static void
4809em_get_hw_control(struct adapter *adapter)
4810{
4811	u32 ctrl_ext, swsm;
4812
4813	if (adapter->hw.mac.type == e1000_82573) {
4814		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4815		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4816		    swsm | E1000_SWSM_DRV_LOAD);
4817		return;
4818	}
4819	/* else */
4820	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4821	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4822	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4823	return;
4824}
4825
4826/*
4827 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4828 * For ASF and Pass Through versions of f/w this means that
4829 * the driver is no longer loaded. For AMT versions of the
4830 * f/w this means that the network i/f is closed.
4831 */
4832static void
4833em_release_hw_control(struct adapter *adapter)
4834{
4835	u32 ctrl_ext, swsm;
4836
4837	if (!adapter->has_manage)
4838		return;
4839
4840	if (adapter->hw.mac.type == e1000_82573) {
4841		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4842		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4843		    swsm & ~E1000_SWSM_DRV_LOAD);
4844		return;
4845	}
4846	/* else */
4847	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4848	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4849	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4850	return;
4851}
4852
4853static int
4854em_is_valid_ether_addr(u8 *addr)
4855{
4856	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4857
4858	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4859		return (FALSE);
4860	}
4861
4862	return (TRUE);
4863}
4864
4865/*
4866** Parse the interface capabilities with regard
4867** to both system management and wake-on-lan for
4868** later use.
4869*/
4870static void
4871em_get_wakeup(device_t dev)
4872{
4873	struct adapter	*adapter = device_get_softc(dev);
4874	u16		eeprom_data = 0, device_id, apme_mask;
4875
4876	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4877	apme_mask = EM_EEPROM_APME;
4878
4879	switch (adapter->hw.mac.type) {
4880	case e1000_82573:
4881	case e1000_82583:
4882		adapter->has_amt = TRUE;
4883		/* Falls thru */
4884	case e1000_82571:
4885	case e1000_82572:
4886	case e1000_80003es2lan:
4887		if (adapter->hw.bus.func == 1) {
4888			e1000_read_nvm(&adapter->hw,
4889			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4890			break;
4891		} else
4892			e1000_read_nvm(&adapter->hw,
4893			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4894		break;
4895	case e1000_ich8lan:
4896	case e1000_ich9lan:
4897	case e1000_ich10lan:
4898	case e1000_pchlan:
4899	case e1000_pch2lan:
4900		apme_mask = E1000_WUC_APME;
4901		adapter->has_amt = TRUE;
4902		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4903		break;
4904	default:
4905		e1000_read_nvm(&adapter->hw,
4906		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4907		break;
4908	}
4909	if (eeprom_data & apme_mask)
4910		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4911	/*
4912         * We have the eeprom settings, now apply the special cases
4913         * where the eeprom may be wrong or the board won't support
4914         * wake on lan on a particular port
4915	 */
4916	device_id = pci_get_device(dev);
4917        switch (device_id) {
4918	case E1000_DEV_ID_82571EB_FIBER:
4919		/* Wake events only supported on port A for dual fiber
4920		 * regardless of eeprom setting */
4921		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4922		    E1000_STATUS_FUNC_1)
4923			adapter->wol = 0;
4924		break;
4925	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4926	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4927	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4928                /* if quad port adapter, disable WoL on all but port A */
4929		if (global_quad_port_a != 0)
4930			adapter->wol = 0;
4931		/* Reset for multiple quad port adapters */
4932		if (++global_quad_port_a == 4)
4933			global_quad_port_a = 0;
4934                break;
4935	}
4936	return;
4937}
4938
4939
4940/*
4941 * Enable PCI Wake On Lan capability
4942 */
4943static void
4944em_enable_wakeup(device_t dev)
4945{
4946	struct adapter	*adapter = device_get_softc(dev);
4947	struct ifnet	*ifp = adapter->ifp;
4948	u32		pmc, ctrl, ctrl_ext, rctl;
4949	u16     	status;
4950
4951	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4952		return;
4953
4954	/* Advertise the wakeup capability */
4955	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4956	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4957	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4958	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4959
4960	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4961	    (adapter->hw.mac.type == e1000_pchlan) ||
4962	    (adapter->hw.mac.type == e1000_ich9lan) ||
4963	    (adapter->hw.mac.type == e1000_ich10lan))
4964		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4965
4966	/* Keep the laser running on Fiber adapters */
4967	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4968	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4969		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4970		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4971		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4972	}
4973
4974	/*
4975	** Determine type of Wakeup: note that wol
4976	** is set with all bits on by default.
4977	*/
4978	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4979		adapter->wol &= ~E1000_WUFC_MAG;
4980
4981	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4982		adapter->wol &= ~E1000_WUFC_MC;
4983	else {
4984		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4985		rctl |= E1000_RCTL_MPE;
4986		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4987	}
4988
4989	if ((adapter->hw.mac.type == e1000_pchlan) ||
4990	    (adapter->hw.mac.type == e1000_pch2lan)) {
4991		if (em_enable_phy_wakeup(adapter))
4992			return;
4993	} else {
4994		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4995		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4996	}
4997
4998	if (adapter->hw.phy.type == e1000_phy_igp_3)
4999		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5000
5001        /* Request PME */
5002        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5003	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5004	if (ifp->if_capenable & IFCAP_WOL)
5005		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5006        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5007
5008	return;
5009}
5010
5011/*
5012** WOL in the newer chipset interfaces (pchlan)
5013** require thing to be copied into the phy
5014*/
5015static int
5016em_enable_phy_wakeup(struct adapter *adapter)
5017{
5018	struct e1000_hw *hw = &adapter->hw;
5019	u32 mreg, ret = 0;
5020	u16 preg;
5021
5022	/* copy MAC RARs to PHY RARs */
5023	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5024
5025	/* copy MAC MTA to PHY MTA */
5026	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5027		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5028		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5029		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5030		    (u16)((mreg >> 16) & 0xFFFF));
5031	}
5032
5033	/* configure PHY Rx Control register */
5034	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5035	mreg = E1000_READ_REG(hw, E1000_RCTL);
5036	if (mreg & E1000_RCTL_UPE)
5037		preg |= BM_RCTL_UPE;
5038	if (mreg & E1000_RCTL_MPE)
5039		preg |= BM_RCTL_MPE;
5040	preg &= ~(BM_RCTL_MO_MASK);
5041	if (mreg & E1000_RCTL_MO_3)
5042		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5043				<< BM_RCTL_MO_SHIFT);
5044	if (mreg & E1000_RCTL_BAM)
5045		preg |= BM_RCTL_BAM;
5046	if (mreg & E1000_RCTL_PMCF)
5047		preg |= BM_RCTL_PMCF;
5048	mreg = E1000_READ_REG(hw, E1000_CTRL);
5049	if (mreg & E1000_CTRL_RFCE)
5050		preg |= BM_RCTL_RFCE;
5051	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5052
5053	/* enable PHY wakeup in MAC register */
5054	E1000_WRITE_REG(hw, E1000_WUC,
5055	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5056	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5057
5058	/* configure and enable PHY wakeup in PHY registers */
5059	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5060	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5061
5062	/* activate PHY wakeup */
5063	ret = hw->phy.ops.acquire(hw);
5064	if (ret) {
5065		printf("Could not acquire PHY\n");
5066		return ret;
5067	}
5068	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5069	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5070	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5071	if (ret) {
5072		printf("Could not read PHY page 769\n");
5073		goto out;
5074	}
5075	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5076	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5077	if (ret)
5078		printf("Could not set PHY Host Wakeup bit\n");
5079out:
5080	hw->phy.ops.release(hw);
5081
5082	return ret;
5083}
5084
5085static void
5086em_led_func(void *arg, int onoff)
5087{
5088	struct adapter	*adapter = arg;
5089
5090	EM_CORE_LOCK(adapter);
5091	if (onoff) {
5092		e1000_setup_led(&adapter->hw);
5093		e1000_led_on(&adapter->hw);
5094	} else {
5095		e1000_led_off(&adapter->hw);
5096		e1000_cleanup_led(&adapter->hw);
5097	}
5098	EM_CORE_UNLOCK(adapter);
5099}
5100
5101/*
5102** Disable the L0S and L1 LINK states
5103*/
5104static void
5105em_disable_aspm(struct adapter *adapter)
5106{
5107	int		base, reg;
5108	u16		link_cap,link_ctrl;
5109	device_t	dev = adapter->dev;
5110
5111	switch (adapter->hw.mac.type) {
5112		case e1000_82573:
5113		case e1000_82574:
5114		case e1000_82583:
5115			break;
5116		default:
5117			return;
5118	}
5119	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5120		return;
5121	reg = base + PCIER_LINK_CAP;
5122	link_cap = pci_read_config(dev, reg, 2);
5123	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5124		return;
5125	reg = base + PCIER_LINK_CTL;
5126	link_ctrl = pci_read_config(dev, reg, 2);
5127	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5128	pci_write_config(dev, reg, link_ctrl, 2);
5129	return;
5130}
5131
5132/**********************************************************************
5133 *
5134 *  Update the board statistics counters.
5135 *
5136 **********************************************************************/
5137static void
5138em_update_stats_counters(struct adapter *adapter)
5139{
5140	struct ifnet   *ifp;
5141
5142	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5143	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5144		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5145		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5146	}
5147	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5148	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5149	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5150	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5151
5152	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5153	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5154	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5155	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5156	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5157	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5158	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5159	/*
5160	** For watchdog management we need to know if we have been
5161	** paused during the last interval, so capture that here.
5162	*/
5163	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5164	adapter->stats.xoffrxc += adapter->pause_frames;
5165	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5166	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5167	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5168	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5169	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5170	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5171	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5172	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5173	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5174	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5175	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5176	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5177
5178	/* For the 64-bit byte counters the low dword must be read first. */
5179	/* Both registers clear on the read of the high dword */
5180
5181	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5182	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5183	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5184	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5185
5186	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5187	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5188	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5189	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5190	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5191
5192	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5193	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5194
5195	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5196	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5197	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5198	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5199	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5200	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5201	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5202	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5203	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5204	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5205
5206	/* Interrupt Counts */
5207
5208	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5209	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5210	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5211	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5212	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5213	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5214	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5215	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5216	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5217
5218	if (adapter->hw.mac.type >= e1000_82543) {
5219		adapter->stats.algnerrc +=
5220		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5221		adapter->stats.rxerrc +=
5222		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5223		adapter->stats.tncrs +=
5224		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5225		adapter->stats.cexterr +=
5226		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5227		adapter->stats.tsctc +=
5228		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5229		adapter->stats.tsctfc +=
5230		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5231	}
5232	ifp = adapter->ifp;
5233
5234	ifp->if_collisions = adapter->stats.colc;
5235
5236	/* Rx Errors */
5237	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5238	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5239	    adapter->stats.ruc + adapter->stats.roc +
5240	    adapter->stats.mpc + adapter->stats.cexterr;
5241
5242	/* Tx Errors */
5243	ifp->if_oerrors = adapter->stats.ecol +
5244	    adapter->stats.latecol + adapter->watchdog_events;
5245}
5246
5247/* Export a single 32-bit register via a read-only sysctl. */
5248static int
5249em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5250{
5251	struct adapter *adapter;
5252	u_int val;
5253
5254	adapter = oidp->oid_arg1;
5255	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5256	return (sysctl_handle_int(oidp, &val, 0, req));
5257}
5258
5259/*
5260 * Add sysctl variables, one per statistic, to the system.
5261 */
5262static void
5263em_add_hw_stats(struct adapter *adapter)
5264{
5265	device_t dev = adapter->dev;
5266
5267	struct tx_ring *txr = adapter->tx_rings;
5268	struct rx_ring *rxr = adapter->rx_rings;
5269
5270	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5271	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5272	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5273	struct e1000_hw_stats *stats = &adapter->stats;
5274
5275	struct sysctl_oid *stat_node, *queue_node, *int_node;
5276	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5277
5278#define QUEUE_NAME_LEN 32
5279	char namebuf[QUEUE_NAME_LEN];
5280
5281	/* Driver Statistics */
5282	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5283			CTLFLAG_RD, &adapter->link_irq,
5284			"Link MSIX IRQ Handled");
5285	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5286			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5287			 "Std mbuf failed");
5288	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5289			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5290			 "Std mbuf cluster failed");
5291	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5292			CTLFLAG_RD, &adapter->dropped_pkts,
5293			"Driver dropped packets");
5294	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5295			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5296			"Driver tx dma failure in xmit");
5297	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5298			CTLFLAG_RD, &adapter->rx_overruns,
5299			"RX overruns");
5300	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5301			CTLFLAG_RD, &adapter->watchdog_events,
5302			"Watchdog timeouts");
5303
5304	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5305			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5306			em_sysctl_reg_handler, "IU",
5307			"Device Control Register");
5308	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5309			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5310			em_sysctl_reg_handler, "IU",
5311			"Receiver Control Register");
5312	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5313			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5314			"Flow Control High Watermark");
5315	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5316			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5317			"Flow Control Low Watermark");
5318
5319	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5320		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5321		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5322					    CTLFLAG_RD, NULL, "Queue Name");
5323		queue_list = SYSCTL_CHILDREN(queue_node);
5324
5325		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5326				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5327				E1000_TDH(txr->me),
5328				em_sysctl_reg_handler, "IU",
5329 				"Transmit Descriptor Head");
5330		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5331				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5332				E1000_TDT(txr->me),
5333				em_sysctl_reg_handler, "IU",
5334 				"Transmit Descriptor Tail");
5335		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5336				CTLFLAG_RD, &txr->tx_irq,
5337				"Queue MSI-X Transmit Interrupts");
5338		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5339				CTLFLAG_RD, &txr->no_desc_avail,
5340				"Queue No Descriptor Available");
5341
5342		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5343				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344				E1000_RDH(rxr->me),
5345				em_sysctl_reg_handler, "IU",
5346				"Receive Descriptor Head");
5347		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5348				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5349				E1000_RDT(rxr->me),
5350				em_sysctl_reg_handler, "IU",
5351				"Receive Descriptor Tail");
5352		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5353				CTLFLAG_RD, &rxr->rx_irq,
5354				"Queue MSI-X Receive Interrupts");
5355	}
5356
5357	/* MAC stats get their own sub node */
5358
5359	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5360				    CTLFLAG_RD, NULL, "Statistics");
5361	stat_list = SYSCTL_CHILDREN(stat_node);
5362
5363	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5364			CTLFLAG_RD, &stats->ecol,
5365			"Excessive collisions");
5366	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5367			CTLFLAG_RD, &stats->scc,
5368			"Single collisions");
5369	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5370			CTLFLAG_RD, &stats->mcc,
5371			"Multiple collisions");
5372	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5373			CTLFLAG_RD, &stats->latecol,
5374			"Late collisions");
5375	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5376			CTLFLAG_RD, &stats->colc,
5377			"Collision Count");
5378	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5379			CTLFLAG_RD, &adapter->stats.symerrs,
5380			"Symbol Errors");
5381	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5382			CTLFLAG_RD, &adapter->stats.sec,
5383			"Sequence Errors");
5384	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5385			CTLFLAG_RD, &adapter->stats.dc,
5386			"Defer Count");
5387	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5388			CTLFLAG_RD, &adapter->stats.mpc,
5389			"Missed Packets");
5390	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5391			CTLFLAG_RD, &adapter->stats.rnbc,
5392			"Receive No Buffers");
5393	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5394			CTLFLAG_RD, &adapter->stats.ruc,
5395			"Receive Undersize");
5396	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5397			CTLFLAG_RD, &adapter->stats.rfc,
5398			"Fragmented Packets Received ");
5399	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5400			CTLFLAG_RD, &adapter->stats.roc,
5401			"Oversized Packets Received");
5402	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5403			CTLFLAG_RD, &adapter->stats.rjc,
5404			"Recevied Jabber");
5405	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5406			CTLFLAG_RD, &adapter->stats.rxerrc,
5407			"Receive Errors");
5408	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5409			CTLFLAG_RD, &adapter->stats.crcerrs,
5410			"CRC errors");
5411	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5412			CTLFLAG_RD, &adapter->stats.algnerrc,
5413			"Alignment Errors");
5414	/* On 82575 these are collision counts */
5415	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5416			CTLFLAG_RD, &adapter->stats.cexterr,
5417			"Collision/Carrier extension errors");
5418	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5419			CTLFLAG_RD, &adapter->stats.xonrxc,
5420			"XON Received");
5421	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5422			CTLFLAG_RD, &adapter->stats.xontxc,
5423			"XON Transmitted");
5424	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5425			CTLFLAG_RD, &adapter->stats.xoffrxc,
5426			"XOFF Received");
5427	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5428			CTLFLAG_RD, &adapter->stats.xofftxc,
5429			"XOFF Transmitted");
5430
5431	/* Packet Reception Stats */
5432	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5433			CTLFLAG_RD, &adapter->stats.tpr,
5434			"Total Packets Received ");
5435	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5436			CTLFLAG_RD, &adapter->stats.gprc,
5437			"Good Packets Received");
5438	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5439			CTLFLAG_RD, &adapter->stats.bprc,
5440			"Broadcast Packets Received");
5441	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5442			CTLFLAG_RD, &adapter->stats.mprc,
5443			"Multicast Packets Received");
5444	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5445			CTLFLAG_RD, &adapter->stats.prc64,
5446			"64 byte frames received ");
5447	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5448			CTLFLAG_RD, &adapter->stats.prc127,
5449			"65-127 byte frames received");
5450	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5451			CTLFLAG_RD, &adapter->stats.prc255,
5452			"128-255 byte frames received");
5453	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5454			CTLFLAG_RD, &adapter->stats.prc511,
5455			"256-511 byte frames received");
5456	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5457			CTLFLAG_RD, &adapter->stats.prc1023,
5458			"512-1023 byte frames received");
5459	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5460			CTLFLAG_RD, &adapter->stats.prc1522,
5461			"1023-1522 byte frames received");
5462 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5463 			CTLFLAG_RD, &adapter->stats.gorc,
5464 			"Good Octets Received");
5465
5466	/* Packet Transmission Stats */
5467 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5468 			CTLFLAG_RD, &adapter->stats.gotc,
5469 			"Good Octets Transmitted");
5470	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5471			CTLFLAG_RD, &adapter->stats.tpt,
5472			"Total Packets Transmitted");
5473	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5474			CTLFLAG_RD, &adapter->stats.gptc,
5475			"Good Packets Transmitted");
5476	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5477			CTLFLAG_RD, &adapter->stats.bptc,
5478			"Broadcast Packets Transmitted");
5479	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5480			CTLFLAG_RD, &adapter->stats.mptc,
5481			"Multicast Packets Transmitted");
5482	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5483			CTLFLAG_RD, &adapter->stats.ptc64,
5484			"64 byte frames transmitted ");
5485	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5486			CTLFLAG_RD, &adapter->stats.ptc127,
5487			"65-127 byte frames transmitted");
5488	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5489			CTLFLAG_RD, &adapter->stats.ptc255,
5490			"128-255 byte frames transmitted");
5491	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5492			CTLFLAG_RD, &adapter->stats.ptc511,
5493			"256-511 byte frames transmitted");
5494	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5495			CTLFLAG_RD, &adapter->stats.ptc1023,
5496			"512-1023 byte frames transmitted");
5497	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5498			CTLFLAG_RD, &adapter->stats.ptc1522,
5499			"1024-1522 byte frames transmitted");
5500	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5501			CTLFLAG_RD, &adapter->stats.tsctc,
5502			"TSO Contexts Transmitted");
5503	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5504			CTLFLAG_RD, &adapter->stats.tsctfc,
5505			"TSO Contexts Failed");
5506
5507
5508	/* Interrupt Stats */
5509
5510	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5511				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5512	int_list = SYSCTL_CHILDREN(int_node);
5513
5514	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5515			CTLFLAG_RD, &adapter->stats.iac,
5516			"Interrupt Assertion Count");
5517
5518	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5519			CTLFLAG_RD, &adapter->stats.icrxptc,
5520			"Interrupt Cause Rx Pkt Timer Expire Count");
5521
5522	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5523			CTLFLAG_RD, &adapter->stats.icrxatc,
5524			"Interrupt Cause Rx Abs Timer Expire Count");
5525
5526	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5527			CTLFLAG_RD, &adapter->stats.ictxptc,
5528			"Interrupt Cause Tx Pkt Timer Expire Count");
5529
5530	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5531			CTLFLAG_RD, &adapter->stats.ictxatc,
5532			"Interrupt Cause Tx Abs Timer Expire Count");
5533
5534	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5535			CTLFLAG_RD, &adapter->stats.ictxqec,
5536			"Interrupt Cause Tx Queue Empty Count");
5537
5538	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5539			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5540			"Interrupt Cause Tx Queue Min Thresh Count");
5541
5542	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5543			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5544			"Interrupt Cause Rx Desc Min Thresh Count");
5545
5546	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5547			CTLFLAG_RD, &adapter->stats.icrxoc,
5548			"Interrupt Cause Receiver Overrun Count");
5549}
5550
5551/**********************************************************************
5552 *
5553 *  This routine provides a way to dump out the adapter eeprom,
5554 *  often a useful debug/service tool. This only dumps the first
5555 *  32 words, stuff that matters is in that extent.
5556 *
5557 **********************************************************************/
5558static int
5559em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5560{
5561	struct adapter *adapter = (struct adapter *)arg1;
5562	int error;
5563	int result;
5564
5565	result = -1;
5566	error = sysctl_handle_int(oidp, &result, 0, req);
5567
5568	if (error || !req->newptr)
5569		return (error);
5570
5571	/*
5572	 * This value will cause a hex dump of the
5573	 * first 32 16-bit words of the EEPROM to
5574	 * the screen.
5575	 */
5576	if (result == 1)
5577		em_print_nvm_info(adapter);
5578
5579	return (error);
5580}
5581
5582static void
5583em_print_nvm_info(struct adapter *adapter)
5584{
5585	u16	eeprom_data;
5586	int	i, j, row = 0;
5587
5588	/* Its a bit crude, but it gets the job done */
5589	printf("\nInterface EEPROM Dump:\n");
5590	printf("Offset\n0x0000  ");
5591	for (i = 0, j = 0; i < 32; i++, j++) {
5592		if (j == 8) { /* Make the offset block */
5593			j = 0; ++row;
5594			printf("\n0x00%x0  ",row);
5595		}
5596		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5597		printf("%04x ", eeprom_data);
5598	}
5599	printf("\n");
5600}
5601
5602static int
5603em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5604{
5605	struct em_int_delay_info *info;
5606	struct adapter *adapter;
5607	u32 regval;
5608	int error, usecs, ticks;
5609
5610	info = (struct em_int_delay_info *)arg1;
5611	usecs = info->value;
5612	error = sysctl_handle_int(oidp, &usecs, 0, req);
5613	if (error != 0 || req->newptr == NULL)
5614		return (error);
5615	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5616		return (EINVAL);
5617	info->value = usecs;
5618	ticks = EM_USECS_TO_TICKS(usecs);
5619	if (info->offset == E1000_ITR)	/* units are 256ns here */
5620		ticks *= 4;
5621
5622	adapter = info->adapter;
5623
5624	EM_CORE_LOCK(adapter);
5625	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5626	regval = (regval & ~0xffff) | (ticks & 0xffff);
5627	/* Handle a few special cases. */
5628	switch (info->offset) {
5629	case E1000_RDTR:
5630		break;
5631	case E1000_TIDV:
5632		if (ticks == 0) {
5633			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5634			/* Don't write 0 into the TIDV register. */
5635			regval++;
5636		} else
5637			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5638		break;
5639	}
5640	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5641	EM_CORE_UNLOCK(adapter);
5642	return (0);
5643}
5644
5645static void
5646em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5647	const char *description, struct em_int_delay_info *info,
5648	int offset, int value)
5649{
5650	info->adapter = adapter;
5651	info->offset = offset;
5652	info->value = value;
5653	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5654	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5655	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5656	    info, 0, em_sysctl_int_delay, "I", description);
5657}
5658
5659static void
5660em_set_sysctl_value(struct adapter *adapter, const char *name,
5661	const char *description, int *limit, int value)
5662{
5663	*limit = value;
5664	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5665	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5666	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5667}
5668
5669
5670/*
5671** Set flow control using sysctl:
5672** Flow control values:
5673**      0 - off
5674**      1 - rx pause
5675**      2 - tx pause
5676**      3 - full
5677*/
5678static int
5679em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5680{
5681        int		error;
5682	static int	input = 3; /* default is full */
5683        struct adapter	*adapter = (struct adapter *) arg1;
5684
5685        error = sysctl_handle_int(oidp, &input, 0, req);
5686
5687        if ((error) || (req->newptr == NULL))
5688                return (error);
5689
5690	if (input == adapter->fc) /* no change? */
5691		return (error);
5692
5693        switch (input) {
5694                case e1000_fc_rx_pause:
5695                case e1000_fc_tx_pause:
5696                case e1000_fc_full:
5697                case e1000_fc_none:
5698                        adapter->hw.fc.requested_mode = input;
5699			adapter->fc = input;
5700                        break;
5701                default:
5702			/* Do nothing */
5703			return (error);
5704        }
5705
5706        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5707        e1000_force_mac_fc(&adapter->hw);
5708        return (error);
5709}
5710
5711/*
5712** Manage Energy Efficient Ethernet:
5713** Control values:
5714**     0/1 - enabled/disabled
5715*/
5716static int
5717em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5718{
5719       struct adapter *adapter = (struct adapter *) arg1;
5720       int             error, value;
5721
5722       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5723       error = sysctl_handle_int(oidp, &value, 0, req);
5724       if (error || req->newptr == NULL)
5725               return (error);
5726       EM_CORE_LOCK(adapter);
5727       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5728       em_init_locked(adapter);
5729       EM_CORE_UNLOCK(adapter);
5730       return (0);
5731}
5732
5733static int
5734em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5735{
5736	struct adapter *adapter;
5737	int error;
5738	int result;
5739
5740	result = -1;
5741	error = sysctl_handle_int(oidp, &result, 0, req);
5742
5743	if (error || !req->newptr)
5744		return (error);
5745
5746	if (result == 1) {
5747		adapter = (struct adapter *)arg1;
5748		em_print_debug_info(adapter);
5749        }
5750
5751	return (error);
5752}
5753
5754/*
5755** This routine is meant to be fluid, add whatever is
5756** needed for debugging a problem.  -jfv
5757*/
5758static void
5759em_print_debug_info(struct adapter *adapter)
5760{
5761	device_t dev = adapter->dev;
5762	struct tx_ring *txr = adapter->tx_rings;
5763	struct rx_ring *rxr = adapter->rx_rings;
5764
5765	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5766		printf("Interface is RUNNING ");
5767	else
5768		printf("Interface is NOT RUNNING\n");
5769
5770	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5771		printf("and INACTIVE\n");
5772	else
5773		printf("and ACTIVE\n");
5774
5775	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5776	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5777	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5778	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5779	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5780	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5781	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5782	device_printf(dev, "TX descriptors avail = %d\n",
5783	    txr->tx_avail);
5784	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5785	    txr->no_desc_avail);
5786	device_printf(dev, "RX discarded packets = %ld\n",
5787	    rxr->rx_discarded);
5788	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5789	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5790}
5791