1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_em.c 354209 2019-10-30 21:49:34Z marius $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_arp.h>
74#include <net/if_dl.h>
75#include <net/if_media.h>
76
77#include <net/if_types.h>
78#include <net/if_vlan_var.h>
79
80#include <netinet/in_systm.h>
81#include <netinet/in.h>
82#include <netinet/if_ether.h>
83#include <netinet/ip.h>
84#include <netinet/ip6.h>
85#include <netinet/tcp.h>
86#include <netinet/udp.h>
87
88#include <machine/in_cksum.h>
89#include <dev/led/led.h>
90#include <dev/pci/pcivar.h>
91#include <dev/pci/pcireg.h>
92
93#include "e1000_api.h"
94#include "e1000_82571.h"
95#include "if_em.h"
96
97/*********************************************************************
98 *  Driver version:
99 *********************************************************************/
100char em_driver_version[] = "7.6.1-k";
101
102/*********************************************************************
103 *  PCI Device ID Table
104 *
105 *  Used by probe to select devices to load on
106 *  Last field stores an index into e1000_strings
107 *  Last entry must be all 0s
108 *
109 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
111
112static em_vendor_info_t em_vendor_info_array[] =
113{
114	/* Intel(R) PRO/1000 Network Connection */
115	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127						PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
134
135	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144						PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181						PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183						PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191                                                PCI_ANY_ID, PCI_ANY_ID, 0},
192	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194						PCI_ANY_ID, PCI_ANY_ID, 0},
195	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196						PCI_ANY_ID, PCI_ANY_ID, 0},
197	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199						PCI_ANY_ID, PCI_ANY_ID, 0},
200	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202						PCI_ANY_ID, PCI_ANY_ID, 0},
203	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205						PCI_ANY_ID, PCI_ANY_ID, 0},
206	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208						PCI_ANY_ID, PCI_ANY_ID, 0},
209	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211						PCI_ANY_ID, PCI_ANY_ID, 0},
212	{ 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214						PCI_ANY_ID, PCI_ANY_ID, 0},
215	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217						PCI_ANY_ID, PCI_ANY_ID, 0},
218	{ 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219	/* required last entry */
220	{ 0, 0, 0, 0, 0}
221};
222
223/*********************************************************************
224 *  Table of branding strings for all supported NICs.
225 *********************************************************************/
226
227static char *em_strings[] = {
228	"Intel(R) PRO/1000 Network Connection"
229};
230
231/*********************************************************************
232 *  Function prototypes
233 *********************************************************************/
234static int	em_probe(device_t);
235static int	em_attach(device_t);
236static int	em_detach(device_t);
237static int	em_shutdown(device_t);
238static int	em_suspend(device_t);
239static int	em_resume(device_t);
240#ifdef EM_MULTIQUEUE
241static int	em_mq_start(struct ifnet *, struct mbuf *);
242static int	em_mq_start_locked(struct ifnet *,
243		    struct tx_ring *);
244static void	em_qflush(struct ifnet *);
245#else
246static void	em_start(struct ifnet *);
247static void	em_start_locked(struct ifnet *, struct tx_ring *);
248#endif
249static int	em_ioctl(struct ifnet *, u_long, caddr_t);
250static void	em_init(void *);
251static void	em_init_locked(struct adapter *);
252static void	em_stop(void *);
253static void	em_media_status(struct ifnet *, struct ifmediareq *);
254static int	em_media_change(struct ifnet *);
255static void	em_identify_hardware(struct adapter *);
256static int	em_allocate_pci_resources(struct adapter *);
257static int	em_allocate_legacy(struct adapter *);
258static int	em_allocate_msix(struct adapter *);
259static int	em_allocate_queues(struct adapter *);
260static int	em_setup_msix(struct adapter *);
261static void	em_free_pci_resources(struct adapter *);
262static void	em_local_timer(void *);
263static void	em_reset(struct adapter *);
264static int	em_setup_interface(device_t, struct adapter *);
265static void	em_flush_desc_rings(struct adapter *);
266
267static void	em_setup_transmit_structures(struct adapter *);
268static void	em_initialize_transmit_unit(struct adapter *);
269static int	em_allocate_transmit_buffers(struct tx_ring *);
270static void	em_free_transmit_structures(struct adapter *);
271static void	em_free_transmit_buffers(struct tx_ring *);
272
273static int	em_setup_receive_structures(struct adapter *);
274static int	em_allocate_receive_buffers(struct rx_ring *);
275static void	em_initialize_receive_unit(struct adapter *);
276static void	em_free_receive_structures(struct adapter *);
277static void	em_free_receive_buffers(struct rx_ring *);
278
279static void	em_enable_intr(struct adapter *);
280static void	em_disable_intr(struct adapter *);
281static void	em_update_stats_counters(struct adapter *);
282static void	em_add_hw_stats(struct adapter *adapter);
283static void	em_txeof(struct tx_ring *);
284static bool	em_rxeof(struct rx_ring *, int, int *);
285#ifndef __NO_STRICT_ALIGNMENT
286static int	em_fixup_rx(struct rx_ring *);
287#endif
288static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
289		    const struct em_rxbuffer *rxbuf);
290static void	em_receive_checksum(uint32_t status, struct mbuf *);
291static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292		    struct ip *, u32 *, u32 *);
293static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294		    struct tcphdr *, u32 *, u32 *);
295static void	em_set_promisc(struct adapter *);
296static void	em_disable_promisc(struct adapter *);
297static void	em_set_multi(struct adapter *);
298static void	em_update_link_status(struct adapter *);
299static void	em_refresh_mbufs(struct rx_ring *, int);
300static void	em_register_vlan(void *, struct ifnet *, u16);
301static void	em_unregister_vlan(void *, struct ifnet *, u16);
302static void	em_setup_vlan_hw_support(struct adapter *);
303static int	em_xmit(struct tx_ring *, struct mbuf **);
304static int	em_dma_malloc(struct adapter *, bus_size_t,
305		    struct em_dma_alloc *, int);
306static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
307static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308static void	em_print_nvm_info(struct adapter *);
309static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310static void	em_print_debug_info(struct adapter *);
311static int 	em_is_valid_ether_addr(u8 *);
312static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313static void	em_add_int_delay_sysctl(struct adapter *, const char *,
314		    const char *, struct em_int_delay_info *, int, int);
315/* Management and WOL Support */
316static void	em_init_manageability(struct adapter *);
317static void	em_release_manageability(struct adapter *);
318static void     em_get_hw_control(struct adapter *);
319static void     em_release_hw_control(struct adapter *);
320static void	em_get_wakeup(device_t);
321static void     em_enable_wakeup(device_t);
322static int	em_enable_phy_wakeup(struct adapter *);
323static void	em_led_func(void *, int);
324static void	em_disable_aspm(struct adapter *);
325
326static int	em_irq_fast(void *);
327
328/* MSIX handlers */
329static void	em_msix_tx(void *);
330static void	em_msix_rx(void *);
331static void	em_msix_link(void *);
332static void	em_handle_tx(void *context, int pending);
333static void	em_handle_rx(void *context, int pending);
334static void	em_handle_link(void *context, int pending);
335
336#ifdef EM_MULTIQUEUE
337static void	em_enable_vectors_82574(struct adapter *);
338#endif
339
340static void	em_set_sysctl_value(struct adapter *, const char *,
341		    const char *, int *, int);
342static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
344
345static __inline void em_rx_discard(struct rx_ring *, int);
346
347#ifdef DEVICE_POLLING
348static poll_handler_t em_poll;
349#endif /* POLLING */
350
351/*********************************************************************
352 *  FreeBSD Device Interface Entry Points
353 *********************************************************************/
354
355static device_method_t em_methods[] = {
356	/* Device interface */
357	DEVMETHOD(device_probe, em_probe),
358	DEVMETHOD(device_attach, em_attach),
359	DEVMETHOD(device_detach, em_detach),
360	DEVMETHOD(device_shutdown, em_shutdown),
361	DEVMETHOD(device_suspend, em_suspend),
362	DEVMETHOD(device_resume, em_resume),
363	DEVMETHOD_END
364};
365
366static driver_t em_driver = {
367	"em", em_methods, sizeof(struct adapter),
368};
369
370devclass_t em_devclass;
371DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372MODULE_DEPEND(em, pci, 1, 1, 1);
373MODULE_DEPEND(em, ether, 1, 1, 1);
374
375/*********************************************************************
376 *  Tunable default values.
377 *********************************************************************/
378
379#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
380#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
381#define M_TSO_LEN			66
382
383#define MAX_INTS_PER_SEC	8000
384#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
385
386#define TSO_WORKAROUND	4
387
388static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
389
390static int em_disable_crc_stripping = 0;
391SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392    &em_disable_crc_stripping, 0, "Disable CRC Stripping");
393
394static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399    0, "Default transmit interrupt delay in usecs");
400SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401    0, "Default receive interrupt delay in usecs");
402
403static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408    &em_tx_abs_int_delay_dflt, 0,
409    "Default transmit interrupt delay limit in usecs");
410SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411    &em_rx_abs_int_delay_dflt, 0,
412    "Default receive interrupt delay limit in usecs");
413
414static int em_rxd = EM_DEFAULT_RXD;
415static int em_txd = EM_DEFAULT_TXD;
416TUNABLE_INT("hw.em.rxd", &em_rxd);
417TUNABLE_INT("hw.em.txd", &em_txd);
418SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419    "Number of receive descriptors per queue");
420SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421    "Number of transmit descriptors per queue");
422
423static int em_smart_pwr_down = FALSE;
424TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426    0, "Set to true to leave smart power down enabled on newer adapters");
427
428/* Controls whether promiscuous also shows bad packets */
429static int em_debug_sbp = FALSE;
430TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432    "Show bad packets in promiscuous mode");
433
434static int em_enable_msix = TRUE;
435TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437    "Enable MSI-X interrupts");
438
439#ifdef EM_MULTIQUEUE
440static int em_num_queues = 1;
441TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443    "82574 only: Number of queues to configure, 0 indicates autoconfigure");
444#endif
445
446/*
447** Global variable to store last used CPU when binding queues
448** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
449** queue is bound to a cpu.
450*/
451static int em_last_bind_cpu = -1;
452
453/* How many packets rxeof tries to clean at a time */
454static int em_rx_process_limit = 100;
455TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457    &em_rx_process_limit, 0,
458    "Maximum number of received packets to process "
459    "at a time, -1 means unlimited");
460
461/* Energy efficient ethernet - default to OFF */
462static int eee_setting = 1;
463TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465    "Enable Energy Efficient Ethernet");
466
467/* Global used in WOL setup with multiport cards */
468static int global_quad_port_a = 0;
469
470#ifdef DEV_NETMAP	/* see ixgbe.c for details */
471#include <dev/netmap/if_em_netmap.h>
472#endif /* DEV_NETMAP */
473
474/*********************************************************************
475 *  Device identification routine
476 *
477 *  em_probe determines if the driver should be loaded on
478 *  adapter based on PCI vendor/device id of the adapter.
479 *
480 *  return BUS_PROBE_DEFAULT on success, positive on failure
481 *********************************************************************/
482
483static int
484em_probe(device_t dev)
485{
486	char		adapter_name[60];
487	uint16_t	pci_vendor_id = 0;
488	uint16_t	pci_device_id = 0;
489	uint16_t	pci_subvendor_id = 0;
490	uint16_t	pci_subdevice_id = 0;
491	em_vendor_info_t *ent;
492
493	INIT_DEBUGOUT("em_probe: begin");
494
495	pci_vendor_id = pci_get_vendor(dev);
496	if (pci_vendor_id != EM_VENDOR_ID)
497		return (ENXIO);
498
499	pci_device_id = pci_get_device(dev);
500	pci_subvendor_id = pci_get_subvendor(dev);
501	pci_subdevice_id = pci_get_subdevice(dev);
502
503	ent = em_vendor_info_array;
504	while (ent->vendor_id != 0) {
505		if ((pci_vendor_id == ent->vendor_id) &&
506		    (pci_device_id == ent->device_id) &&
507
508		    ((pci_subvendor_id == ent->subvendor_id) ||
509		    (ent->subvendor_id == PCI_ANY_ID)) &&
510
511		    ((pci_subdevice_id == ent->subdevice_id) ||
512		    (ent->subdevice_id == PCI_ANY_ID))) {
513			sprintf(adapter_name, "%s %s",
514				em_strings[ent->index],
515				em_driver_version);
516			device_set_desc_copy(dev, adapter_name);
517			return (BUS_PROBE_DEFAULT);
518		}
519		ent++;
520	}
521
522	return (ENXIO);
523}
524
525/*********************************************************************
526 *  Device initialization routine
527 *
528 *  The attach entry point is called when the driver is being loaded.
529 *  This routine identifies the type of hardware, allocates all resources
530 *  and initializes the hardware.
531 *
532 *  return 0 on success, positive on failure
533 *********************************************************************/
534
535static int
536em_attach(device_t dev)
537{
538	struct adapter	*adapter;
539	struct e1000_hw	*hw;
540	int		error = 0;
541
542	INIT_DEBUGOUT("em_attach: begin");
543
544	if (resource_disabled("em", device_get_unit(dev))) {
545		device_printf(dev, "Disabled by device hint\n");
546		return (ENXIO);
547	}
548
549	adapter = device_get_softc(dev);
550	adapter->dev = adapter->osdep.dev = dev;
551	hw = &adapter->hw;
552	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
553
554	/* SYSCTL stuff */
555	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558	    em_sysctl_nvm_info, "I", "NVM Information");
559
560	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563	    em_sysctl_debug_info, "I", "Debug Information");
564
565	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568	    em_set_flowcntl, "I", "Flow Control");
569
570	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
571
572	/* Determine hardware and mac info */
573	em_identify_hardware(adapter);
574
575	/* Setup PCI resources */
576	if (em_allocate_pci_resources(adapter)) {
577		device_printf(dev, "Allocation of PCI resources failed\n");
578		error = ENXIO;
579		goto err_pci;
580	}
581
582	/*
583	** For ICH8 and family we need to
584	** map the flash memory, and this
585	** must happen after the MAC is
586	** identified
587	*/
588	if ((hw->mac.type == e1000_ich8lan) ||
589	    (hw->mac.type == e1000_ich9lan) ||
590	    (hw->mac.type == e1000_ich10lan) ||
591	    (hw->mac.type == e1000_pchlan) ||
592	    (hw->mac.type == e1000_pch2lan) ||
593	    (hw->mac.type == e1000_pch_lpt)) {
594		int rid = EM_BAR_TYPE_FLASH;
595		adapter->flash = bus_alloc_resource_any(dev,
596		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
597		if (adapter->flash == NULL) {
598			device_printf(dev, "Mapping of Flash failed\n");
599			error = ENXIO;
600			goto err_pci;
601		}
602		/* This is used in the shared code */
603		hw->flash_address = (u8 *)adapter->flash;
604		adapter->osdep.flash_bus_space_tag =
605		    rman_get_bustag(adapter->flash);
606		adapter->osdep.flash_bus_space_handle =
607		    rman_get_bushandle(adapter->flash);
608	}
609	/*
610	** In the new SPT device flash is not  a
611	** seperate BAR, rather it is also in BAR0,
612	** so use the same tag and an offset handle for the
613	** FLASH read/write macros in the shared code.
614	*/
615	else if (hw->mac.type >= e1000_pch_spt) {
616		adapter->osdep.flash_bus_space_tag =
617		    adapter->osdep.mem_bus_space_tag;
618		adapter->osdep.flash_bus_space_handle =
619		    adapter->osdep.mem_bus_space_handle
620		    + E1000_FLASH_BASE_ADDR;
621	}
622
623	/* Do Shared Code initialization */
624	error = e1000_setup_init_funcs(hw, TRUE);
625	if (error) {
626		device_printf(dev, "Setup of Shared code failed, error %d\n",
627		    error);
628		error = ENXIO;
629		goto err_pci;
630	}
631
632	/*
633	 * Setup MSI/X or MSI if PCI Express
634	 */
635	adapter->msix = em_setup_msix(adapter);
636
637	e1000_get_bus_info(hw);
638
639	/* Set up some sysctls for the tunable interrupt delays */
640	em_add_int_delay_sysctl(adapter, "rx_int_delay",
641	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
642	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643	em_add_int_delay_sysctl(adapter, "tx_int_delay",
644	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647	    "receive interrupt delay limit in usecs",
648	    &adapter->rx_abs_int_delay,
649	    E1000_REGISTER(hw, E1000_RADV),
650	    em_rx_abs_int_delay_dflt);
651	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652	    "transmit interrupt delay limit in usecs",
653	    &adapter->tx_abs_int_delay,
654	    E1000_REGISTER(hw, E1000_TADV),
655	    em_tx_abs_int_delay_dflt);
656	em_add_int_delay_sysctl(adapter, "itr",
657	    "interrupt delay limit in usecs/4",
658	    &adapter->tx_itr,
659	    E1000_REGISTER(hw, E1000_ITR),
660	    DEFAULT_ITR);
661
662	/* Sysctl for limiting the amount of work done in the taskqueue */
663	em_set_sysctl_value(adapter, "rx_processing_limit",
664	    "max number of rx packets to process", &adapter->rx_process_limit,
665	    em_rx_process_limit);
666
667	/*
668	 * Validate number of transmit and receive descriptors. It
669	 * must not exceed hardware maximum, and must be multiple
670	 * of E1000_DBA_ALIGN.
671	 */
672	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675		    EM_DEFAULT_TXD, em_txd);
676		adapter->num_tx_desc = EM_DEFAULT_TXD;
677	} else
678		adapter->num_tx_desc = em_txd;
679
680	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683		    EM_DEFAULT_RXD, em_rxd);
684		adapter->num_rx_desc = EM_DEFAULT_RXD;
685	} else
686		adapter->num_rx_desc = em_rxd;
687
688	hw->mac.autoneg = DO_AUTO_NEG;
689	hw->phy.autoneg_wait_to_complete = FALSE;
690	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
691
692	/* Copper options */
693	if (hw->phy.media_type == e1000_media_type_copper) {
694		hw->phy.mdix = AUTO_ALL_MODES;
695		hw->phy.disable_polarity_correction = FALSE;
696		hw->phy.ms_type = EM_MASTER_SLAVE;
697	}
698
699	/*
700	 * Set the frame limits assuming
701	 * standard ethernet sized frames.
702	 */
703	adapter->hw.mac.max_frame_size =
704	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
705
706	/*
707	 * This controls when hardware reports transmit completion
708	 * status.
709	 */
710	hw->mac.report_tx_early = 1;
711
712	/*
713	** Get queue/ring memory
714	*/
715	if (em_allocate_queues(adapter)) {
716		error = ENOMEM;
717		goto err_pci;
718	}
719
720	/* Allocate multicast array memory. */
721	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723	if (adapter->mta == NULL) {
724		device_printf(dev, "Can not allocate multicast setup array\n");
725		error = ENOMEM;
726		goto err_late;
727	}
728
729	/* Check SOL/IDER usage */
730	if (e1000_check_reset_block(hw))
731		device_printf(dev, "PHY reset is blocked"
732		    " due to SOL/IDER session.\n");
733
734	/* Sysctl for setting Energy Efficient Ethernet */
735	hw->dev_spec.ich8lan.eee_disable = eee_setting;
736	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739	    adapter, 0, em_sysctl_eee, "I",
740	    "Disable Energy Efficient Ethernet");
741
742	/*
743	** Start from a known state, this is
744	** important in reading the nvm and
745	** mac from that.
746	*/
747	e1000_reset_hw(hw);
748
749
750	/* Make sure we have a good EEPROM before we read from it */
751	if (e1000_validate_nvm_checksum(hw) < 0) {
752		/*
753		** Some PCI-E parts fail the first check due to
754		** the link being in sleep state, call it again,
755		** if it fails a second time its a real issue.
756		*/
757		if (e1000_validate_nvm_checksum(hw) < 0) {
758			device_printf(dev,
759			    "The EEPROM Checksum Is Not Valid\n");
760			error = EIO;
761			goto err_late;
762		}
763	}
764
765	/* Copy the permanent MAC address out of the EEPROM */
766	if (e1000_read_mac_addr(hw) < 0) {
767		device_printf(dev, "EEPROM read error while reading MAC"
768		    " address\n");
769		error = EIO;
770		goto err_late;
771	}
772
773	if (!em_is_valid_ether_addr(hw->mac.addr)) {
774		device_printf(dev, "Invalid MAC address\n");
775		error = EIO;
776		goto err_late;
777	}
778
779	/* Disable ULP support */
780	e1000_disable_ulp_lpt_lp(hw, TRUE);
781
782	/*
783	**  Do interrupt configuration
784	*/
785	if (adapter->msix > 1) /* Do MSIX */
786		error = em_allocate_msix(adapter);
787	else  /* MSI or Legacy */
788		error = em_allocate_legacy(adapter);
789	if (error)
790		goto err_late;
791
792	/*
793	 * Get Wake-on-Lan and Management info for later use
794	 */
795	em_get_wakeup(dev);
796
797	/* Setup OS specific network interface */
798	if (em_setup_interface(dev, adapter) != 0)
799		goto err_late;
800
801	em_reset(adapter);
802
803	/* Initialize statistics */
804	em_update_stats_counters(adapter);
805
806	hw->mac.get_link_status = 1;
807	em_update_link_status(adapter);
808
809	/* Register for VLAN events */
810	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
814
815	em_add_hw_stats(adapter);
816
817	/* Non-AMT based hardware can now take control from firmware */
818	if (adapter->has_manage && !adapter->has_amt)
819		em_get_hw_control(adapter);
820
821	/* Tell the stack that the interface is not active */
822	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
824
825	adapter->led_dev = led_create(em_led_func, adapter,
826	    device_get_nameunit(dev));
827#ifdef DEV_NETMAP
828	em_netmap_attach(adapter);
829#endif /* DEV_NETMAP */
830
831	INIT_DEBUGOUT("em_attach: end");
832
833	return (0);
834
835err_late:
836	em_free_transmit_structures(adapter);
837	em_free_receive_structures(adapter);
838	em_release_hw_control(adapter);
839	if (adapter->ifp != NULL)
840		if_free(adapter->ifp);
841err_pci:
842	em_free_pci_resources(adapter);
843	free(adapter->mta, M_DEVBUF);
844	EM_CORE_LOCK_DESTROY(adapter);
845
846	return (error);
847}
848
849/*********************************************************************
850 *  Device removal routine
851 *
852 *  The detach entry point is called when the driver is being removed.
853 *  This routine stops the adapter and deallocates all the resources
854 *  that were allocated for driver operation.
855 *
856 *  return 0 on success, positive on failure
857 *********************************************************************/
858
859static int
860em_detach(device_t dev)
861{
862	struct adapter	*adapter = device_get_softc(dev);
863	struct ifnet	*ifp = adapter->ifp;
864
865	INIT_DEBUGOUT("em_detach: begin");
866
867	/* Make sure VLANS are not using driver */
868	if (adapter->ifp->if_vlantrunk != NULL) {
869		device_printf(dev,"Vlan in use, detach first\n");
870		return (EBUSY);
871	}
872
873#ifdef DEVICE_POLLING
874	if (ifp->if_capenable & IFCAP_POLLING)
875		ether_poll_deregister(ifp);
876#endif
877
878	if (adapter->led_dev != NULL)
879		led_destroy(adapter->led_dev);
880
881	EM_CORE_LOCK(adapter);
882	adapter->in_detach = 1;
883	em_stop(adapter);
884	EM_CORE_UNLOCK(adapter);
885	EM_CORE_LOCK_DESTROY(adapter);
886
887	e1000_phy_hw_reset(&adapter->hw);
888
889	em_release_manageability(adapter);
890	em_release_hw_control(adapter);
891
892	/* Unregister VLAN events */
893	if (adapter->vlan_attach != NULL)
894		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895	if (adapter->vlan_detach != NULL)
896		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
897
898	ether_ifdetach(adapter->ifp);
899	callout_drain(&adapter->timer);
900
901#ifdef DEV_NETMAP
902	netmap_detach(ifp);
903#endif /* DEV_NETMAP */
904
905	em_free_pci_resources(adapter);
906	bus_generic_detach(dev);
907	if_free(ifp);
908
909	em_free_transmit_structures(adapter);
910	em_free_receive_structures(adapter);
911
912	em_release_hw_control(adapter);
913	free(adapter->mta, M_DEVBUF);
914
915	return (0);
916}
917
918/*********************************************************************
919 *
920 *  Shutdown entry point
921 *
922 **********************************************************************/
923
924static int
925em_shutdown(device_t dev)
926{
927	return em_suspend(dev);
928}
929
930/*
931 * Suspend/resume device methods.
932 */
933static int
934em_suspend(device_t dev)
935{
936	struct adapter *adapter = device_get_softc(dev);
937
938	EM_CORE_LOCK(adapter);
939
940        em_release_manageability(adapter);
941	em_release_hw_control(adapter);
942	em_enable_wakeup(dev);
943
944	EM_CORE_UNLOCK(adapter);
945
946	return bus_generic_suspend(dev);
947}
948
949static int
950em_resume(device_t dev)
951{
952	struct adapter *adapter = device_get_softc(dev);
953	struct tx_ring	*txr = adapter->tx_rings;
954	struct ifnet *ifp = adapter->ifp;
955
956	EM_CORE_LOCK(adapter);
957	if (adapter->hw.mac.type == e1000_pch2lan)
958		e1000_resume_workarounds_pchlan(&adapter->hw);
959	em_init_locked(adapter);
960	em_init_manageability(adapter);
961
962	if ((ifp->if_flags & IFF_UP) &&
963	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964		for (int i = 0; i < adapter->num_queues; i++, txr++) {
965			EM_TX_LOCK(txr);
966#ifdef EM_MULTIQUEUE
967			if (!drbr_empty(ifp, txr->br))
968				em_mq_start_locked(ifp, txr);
969#else
970			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971				em_start_locked(ifp, txr);
972#endif
973			EM_TX_UNLOCK(txr);
974		}
975	}
976	EM_CORE_UNLOCK(adapter);
977
978	return bus_generic_resume(dev);
979}
980
981
982#ifndef EM_MULTIQUEUE
983static void
984em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
985{
986	struct adapter	*adapter = ifp->if_softc;
987	struct mbuf	*m_head;
988
989	EM_TX_LOCK_ASSERT(txr);
990
991	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
992	    IFF_DRV_RUNNING)
993		return;
994
995	if (!adapter->link_active)
996		return;
997
998	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999        	/* Call cleanup if number of TX descriptors low */
1000		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1001			em_txeof(txr);
1002		if (txr->tx_avail < EM_MAX_SCATTER) {
1003			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1004			break;
1005		}
1006		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1007		if (m_head == NULL)
1008			break;
1009		/*
1010		 *  Encapsulation can modify our pointer, and or make it
1011		 *  NULL on failure.  In that event, we can't requeue.
1012		 */
1013		if (em_xmit(txr, &m_head)) {
1014			if (m_head == NULL)
1015				break;
1016			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1017			break;
1018		}
1019
1020		/* Mark the queue as having work */
1021		if (txr->busy == EM_TX_IDLE)
1022			txr->busy = EM_TX_BUSY;
1023
1024		/* Send a copy of the frame to the BPF listener */
1025		ETHER_BPF_MTAP(ifp, m_head);
1026
1027	}
1028
1029	return;
1030}
1031
1032static void
1033em_start(struct ifnet *ifp)
1034{
1035	struct adapter	*adapter = ifp->if_softc;
1036	struct tx_ring	*txr = adapter->tx_rings;
1037
1038	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1039		EM_TX_LOCK(txr);
1040		em_start_locked(ifp, txr);
1041		EM_TX_UNLOCK(txr);
1042	}
1043	return;
1044}
1045#else /* EM_MULTIQUEUE */
1046/*********************************************************************
1047 *  Multiqueue Transmit routines
1048 *
1049 *  em_mq_start is called by the stack to initiate a transmit.
1050 *  however, if busy the driver can queue the request rather
1051 *  than do an immediate send. It is this that is an advantage
1052 *  in this driver, rather than also having multiple tx queues.
1053 **********************************************************************/
1054/*
1055** Multiqueue capable stack interface
1056*/
1057static int
1058em_mq_start(struct ifnet *ifp, struct mbuf *m)
1059{
1060	struct adapter	*adapter = ifp->if_softc;
1061	struct tx_ring	*txr = adapter->tx_rings;
1062	unsigned int	i, error;
1063
1064	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065		i = m->m_pkthdr.flowid % adapter->num_queues;
1066	else
1067		i = curcpu % adapter->num_queues;
1068
1069	txr = &adapter->tx_rings[i];
1070
1071	error = drbr_enqueue(ifp, txr->br, m);
1072	if (error)
1073		return (error);
1074
1075	if (EM_TX_TRYLOCK(txr)) {
1076		em_mq_start_locked(ifp, txr);
1077		EM_TX_UNLOCK(txr);
1078	} else
1079		taskqueue_enqueue(txr->tq, &txr->tx_task);
1080
1081	return (0);
1082}
1083
1084static int
1085em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1086{
1087	struct adapter  *adapter = txr->adapter;
1088        struct mbuf     *next;
1089        int             err = 0, enq = 0;
1090
1091	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092	    IFF_DRV_RUNNING || adapter->link_active == 0) {
1093		return (ENETDOWN);
1094	}
1095
1096	/* Process the queue */
1097	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098		if ((err = em_xmit(txr, &next)) != 0) {
1099			if (next == NULL) {
1100				/* It was freed, move forward */
1101				drbr_advance(ifp, txr->br);
1102			} else {
1103				/*
1104				 * Still have one left, it may not be
1105				 * the same since the transmit function
1106				 * may have changed it.
1107				 */
1108				drbr_putback(ifp, txr->br, next);
1109			}
1110			break;
1111		}
1112		drbr_advance(ifp, txr->br);
1113		enq++;
1114		ifp->if_obytes += next->m_pkthdr.len;
1115		if (next->m_flags & M_MCAST)
1116			ifp->if_omcasts++;
1117		ETHER_BPF_MTAP(ifp, next);
1118		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1119                        break;
1120	}
1121
1122	/* Mark the queue as having work */
1123	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124		txr->busy = EM_TX_BUSY;
1125
1126	if (txr->tx_avail < EM_MAX_SCATTER)
1127		em_txeof(txr);
1128	if (txr->tx_avail < EM_MAX_SCATTER) {
1129		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1130	}
1131	return (err);
1132}
1133
1134/*
1135** Flush all ring buffers
1136*/
1137static void
1138em_qflush(struct ifnet *ifp)
1139{
1140	struct adapter  *adapter = ifp->if_softc;
1141	struct tx_ring  *txr = adapter->tx_rings;
1142	struct mbuf     *m;
1143
1144	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1145		EM_TX_LOCK(txr);
1146		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1147			m_freem(m);
1148		EM_TX_UNLOCK(txr);
1149	}
1150	if_qflush(ifp);
1151}
1152#endif /* EM_MULTIQUEUE */
1153
1154/*********************************************************************
1155 *  Ioctl entry point
1156 *
1157 *  em_ioctl is called when the user wants to configure the
1158 *  interface.
1159 *
1160 *  return 0 on success, positive on failure
1161 **********************************************************************/
1162
1163static int
1164em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1165{
1166	struct adapter	*adapter = ifp->if_softc;
1167	struct ifreq	*ifr = (struct ifreq *)data;
1168#if defined(INET) || defined(INET6)
1169	struct ifaddr	*ifa = (struct ifaddr *)data;
1170#endif
1171	bool		avoid_reset = FALSE;
1172	int		error = 0;
1173
1174	if (adapter->in_detach)
1175		return (error);
1176
1177	switch (command) {
1178	case SIOCSIFADDR:
1179#ifdef INET
1180		if (ifa->ifa_addr->sa_family == AF_INET)
1181			avoid_reset = TRUE;
1182#endif
1183#ifdef INET6
1184		if (ifa->ifa_addr->sa_family == AF_INET6)
1185			avoid_reset = TRUE;
1186#endif
1187		/*
1188		** Calling init results in link renegotiation,
1189		** so we avoid doing it when possible.
1190		*/
1191		if (avoid_reset) {
1192			ifp->if_flags |= IFF_UP;
1193			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1194				em_init(adapter);
1195#ifdef INET
1196			if (!(ifp->if_flags & IFF_NOARP))
1197				arp_ifinit(ifp, ifa);
1198#endif
1199		} else
1200			error = ether_ioctl(ifp, command, data);
1201		break;
1202	case SIOCSIFMTU:
1203	    {
1204		int max_frame_size;
1205
1206		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1207
1208		EM_CORE_LOCK(adapter);
1209		switch (adapter->hw.mac.type) {
1210		case e1000_82571:
1211		case e1000_82572:
1212		case e1000_ich9lan:
1213		case e1000_ich10lan:
1214		case e1000_pch2lan:
1215		case e1000_pch_lpt:
1216		case e1000_pch_spt:
1217		case e1000_pch_cnp:
1218		case e1000_82574:
1219		case e1000_82583:
1220		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1221			max_frame_size = 9234;
1222			break;
1223		case e1000_pchlan:
1224			max_frame_size = 4096;
1225			break;
1226			/* Adapters that do not support jumbo frames */
1227		case e1000_ich8lan:
1228			max_frame_size = ETHER_MAX_LEN;
1229			break;
1230		default:
1231			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1232		}
1233		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1234		    ETHER_CRC_LEN) {
1235			EM_CORE_UNLOCK(adapter);
1236			error = EINVAL;
1237			break;
1238		}
1239
1240		ifp->if_mtu = ifr->ifr_mtu;
1241		adapter->hw.mac.max_frame_size =
1242		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244			em_init_locked(adapter);
1245		EM_CORE_UNLOCK(adapter);
1246		break;
1247	    }
1248	case SIOCSIFFLAGS:
1249		IOCTL_DEBUGOUT("ioctl rcv'd:\
1250		    SIOCSIFFLAGS (Set Interface Flags)");
1251		EM_CORE_LOCK(adapter);
1252		if (ifp->if_flags & IFF_UP) {
1253			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254				if ((ifp->if_flags ^ adapter->if_flags) &
1255				    (IFF_PROMISC | IFF_ALLMULTI)) {
1256					em_disable_promisc(adapter);
1257					em_set_promisc(adapter);
1258				}
1259			} else
1260				em_init_locked(adapter);
1261		} else
1262			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1263				em_stop(adapter);
1264		adapter->if_flags = ifp->if_flags;
1265		EM_CORE_UNLOCK(adapter);
1266		break;
1267	case SIOCADDMULTI:
1268	case SIOCDELMULTI:
1269		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271			EM_CORE_LOCK(adapter);
1272			em_disable_intr(adapter);
1273			em_set_multi(adapter);
1274#ifdef DEVICE_POLLING
1275			if (!(ifp->if_capenable & IFCAP_POLLING))
1276#endif
1277				em_enable_intr(adapter);
1278			EM_CORE_UNLOCK(adapter);
1279		}
1280		break;
1281	case SIOCSIFMEDIA:
1282		/* Check SOL/IDER usage */
1283		EM_CORE_LOCK(adapter);
1284		if (e1000_check_reset_block(&adapter->hw)) {
1285			EM_CORE_UNLOCK(adapter);
1286			device_printf(adapter->dev, "Media change is"
1287			    " blocked due to SOL/IDER session.\n");
1288			break;
1289		}
1290		EM_CORE_UNLOCK(adapter);
1291		/* falls thru */
1292	case SIOCGIFMEDIA:
1293		IOCTL_DEBUGOUT("ioctl rcv'd: \
1294		    SIOCxIFMEDIA (Get/Set Interface Media)");
1295		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1296		break;
1297	case SIOCSIFCAP:
1298	    {
1299		int mask, reinit;
1300
1301		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1302		reinit = 0;
1303		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304#ifdef DEVICE_POLLING
1305		if (mask & IFCAP_POLLING) {
1306			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307				error = ether_poll_register(em_poll, ifp);
1308				if (error)
1309					return (error);
1310				EM_CORE_LOCK(adapter);
1311				em_disable_intr(adapter);
1312				ifp->if_capenable |= IFCAP_POLLING;
1313				EM_CORE_UNLOCK(adapter);
1314			} else {
1315				error = ether_poll_deregister(ifp);
1316				/* Enable interrupt even in error case */
1317				EM_CORE_LOCK(adapter);
1318				em_enable_intr(adapter);
1319				ifp->if_capenable &= ~IFCAP_POLLING;
1320				EM_CORE_UNLOCK(adapter);
1321			}
1322		}
1323#endif
1324		if (mask & IFCAP_HWCSUM) {
1325			ifp->if_capenable ^= IFCAP_HWCSUM;
1326			reinit = 1;
1327		}
1328		if (mask & IFCAP_TSO4) {
1329			ifp->if_capenable ^= IFCAP_TSO4;
1330			reinit = 1;
1331		}
1332		if (mask & IFCAP_VLAN_HWTAGGING) {
1333			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1334			reinit = 1;
1335		}
1336		if (mask & IFCAP_VLAN_HWFILTER) {
1337			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1338			reinit = 1;
1339		}
1340		if (mask & IFCAP_VLAN_HWTSO) {
1341			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1342			reinit = 1;
1343		}
1344		if ((mask & IFCAP_WOL) &&
1345		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346			if (mask & IFCAP_WOL_MCAST)
1347				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348			if (mask & IFCAP_WOL_MAGIC)
1349				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1350		}
1351		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1352			em_init(adapter);
1353		VLAN_CAPABILITIES(ifp);
1354		break;
1355	    }
1356
1357	default:
1358		error = ether_ioctl(ifp, command, data);
1359		break;
1360	}
1361
1362	return (error);
1363}
1364
1365
1366/*********************************************************************
1367 *  Init entry point
1368 *
1369 *  This routine is used in two ways. It is used by the stack as
1370 *  init entry point in network interface structure. It is also used
1371 *  by the driver as a hw/sw initialization routine to get to a
1372 *  consistent state.
1373 *
1374 *  return 0 on success, positive on failure
1375 **********************************************************************/
1376
1377static void
1378em_init_locked(struct adapter *adapter)
1379{
1380	struct ifnet	*ifp = adapter->ifp;
1381	device_t	dev = adapter->dev;
1382
1383	INIT_DEBUGOUT("em_init: begin");
1384
1385	EM_CORE_LOCK_ASSERT(adapter);
1386
1387	em_disable_intr(adapter);
1388	callout_stop(&adapter->timer);
1389
1390	/* Get the latest mac address, User can use a LAA */
1391        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1392              ETHER_ADDR_LEN);
1393
1394	/* Put the address into the Receive Address Array */
1395	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1396
1397	/*
1398	 * With the 82571 adapter, RAR[0] may be overwritten
1399	 * when the other port is reset, we make a duplicate
1400	 * in RAR[14] for that eventuality, this assures
1401	 * the interface continues to function.
1402	 */
1403	if (adapter->hw.mac.type == e1000_82571) {
1404		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406		    E1000_RAR_ENTRIES - 1);
1407	}
1408
1409	/* Initialize the hardware */
1410	em_reset(adapter);
1411	em_update_link_status(adapter);
1412
1413	/* Setup VLAN support, basic and offload if available */
1414	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1415
1416	/* Set hardware offload abilities */
1417	if (ifp->if_capenable & IFCAP_TXCSUM)
1418		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1419	else
1420		ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1421
1422	/* Configure for OS presence */
1423	em_init_manageability(adapter);
1424
1425	/* Prepare transmit descriptors and buffers */
1426	em_setup_transmit_structures(adapter);
1427	em_initialize_transmit_unit(adapter);
1428
1429	/* Setup Multicast table */
1430	em_set_multi(adapter);
1431
1432	/*
1433	** Figure out the desired mbuf
1434	** pool for doing jumbos
1435	*/
1436	if (adapter->hw.mac.max_frame_size <= 2048)
1437		adapter->rx_mbuf_sz = MCLBYTES;
1438#ifndef CONTIGMALLOC_WORKS
1439       else
1440               adapter->rx_mbuf_sz = MJUMPAGESIZE;
1441#else
1442	else if (adapter->hw.mac.max_frame_size <= 4096)
1443		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1444	else
1445		adapter->rx_mbuf_sz = MJUM9BYTES;
1446#endif
1447
1448	/* Prepare receive descriptors and buffers */
1449	if (em_setup_receive_structures(adapter)) {
1450		device_printf(dev, "Could not setup receive structures\n");
1451		em_stop(adapter);
1452		return;
1453	}
1454	em_initialize_receive_unit(adapter);
1455
1456	/* Use real VLAN Filter support? */
1457	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1458		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1459			/* Use real VLAN Filter support */
1460			em_setup_vlan_hw_support(adapter);
1461		else {
1462			u32 ctrl;
1463			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1464			ctrl |= E1000_CTRL_VME;
1465			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1466		}
1467	}
1468
1469	/* Don't lose promiscuous settings */
1470	em_set_promisc(adapter);
1471
1472	/* Set the interface as ACTIVE */
1473	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1474	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1475
1476	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1477	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1478
1479	/* MSI/X configuration for 82574 */
1480	if (adapter->hw.mac.type == e1000_82574) {
1481		int tmp;
1482		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1483		tmp |= E1000_CTRL_EXT_PBA_CLR;
1484		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1485		/* Set the IVAR - interrupt vector routing. */
1486		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1487	}
1488
1489#ifdef DEVICE_POLLING
1490	/*
1491	 * Only enable interrupts if we are not polling, make sure
1492	 * they are off otherwise.
1493	 */
1494	if (ifp->if_capenable & IFCAP_POLLING)
1495		em_disable_intr(adapter);
1496	else
1497#endif /* DEVICE_POLLING */
1498		em_enable_intr(adapter);
1499
1500	/* AMT based hardware can now take control from firmware */
1501	if (adapter->has_manage && adapter->has_amt)
1502		em_get_hw_control(adapter);
1503}
1504
1505static void
1506em_init(void *arg)
1507{
1508	struct adapter *adapter = arg;
1509
1510	EM_CORE_LOCK(adapter);
1511	em_init_locked(adapter);
1512	EM_CORE_UNLOCK(adapter);
1513}
1514
1515
1516#ifdef DEVICE_POLLING
1517/*********************************************************************
1518 *
1519 *  Legacy polling routine: note this only works with single queue
1520 *
1521 *********************************************************************/
1522static int
1523em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1524{
1525	struct adapter *adapter = ifp->if_softc;
1526	struct tx_ring	*txr = adapter->tx_rings;
1527	struct rx_ring	*rxr = adapter->rx_rings;
1528	u32		reg_icr;
1529	int		rx_done;
1530
1531	EM_CORE_LOCK(adapter);
1532	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1533		EM_CORE_UNLOCK(adapter);
1534		return (0);
1535	}
1536
1537	if (cmd == POLL_AND_CHECK_STATUS) {
1538		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540			callout_stop(&adapter->timer);
1541			adapter->hw.mac.get_link_status = 1;
1542			em_update_link_status(adapter);
1543			callout_reset(&adapter->timer, hz,
1544			    em_local_timer, adapter);
1545		}
1546	}
1547	EM_CORE_UNLOCK(adapter);
1548
1549	em_rxeof(rxr, count, &rx_done);
1550
1551	EM_TX_LOCK(txr);
1552	em_txeof(txr);
1553#ifdef EM_MULTIQUEUE
1554	if (!drbr_empty(ifp, txr->br))
1555		em_mq_start_locked(ifp, txr);
1556#else
1557	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1558		em_start_locked(ifp, txr);
1559#endif
1560	EM_TX_UNLOCK(txr);
1561
1562	return (rx_done);
1563}
1564#endif /* DEVICE_POLLING */
1565
1566
1567/*********************************************************************
1568 *
1569 *  Fast Legacy/MSI Combined Interrupt Service routine
1570 *
1571 *********************************************************************/
1572static int
1573em_irq_fast(void *arg)
1574{
1575	struct adapter	*adapter = arg;
1576	struct ifnet	*ifp;
1577	u32		reg_icr;
1578
1579	ifp = adapter->ifp;
1580
1581	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1582
1583	/* Hot eject?  */
1584	if (reg_icr == 0xffffffff)
1585		return FILTER_STRAY;
1586
1587	/* Definitely not our interrupt.  */
1588	if (reg_icr == 0x0)
1589		return FILTER_STRAY;
1590
1591	/*
1592	 * Starting with the 82571 chip, bit 31 should be used to
1593	 * determine whether the interrupt belongs to us.
1594	 */
1595	if (adapter->hw.mac.type >= e1000_82571 &&
1596	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1597		return FILTER_STRAY;
1598
1599	em_disable_intr(adapter);
1600	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1601
1602	/* Link status change */
1603	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604		adapter->hw.mac.get_link_status = 1;
1605		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1606	}
1607
1608	if (reg_icr & E1000_ICR_RXO)
1609		adapter->rx_overruns++;
1610	return FILTER_HANDLED;
1611}
1612
1613/* Combined RX/TX handler, used by Legacy and MSI */
1614static void
1615em_handle_que(void *context, int pending)
1616{
1617	struct adapter	*adapter = context;
1618	struct ifnet	*ifp = adapter->ifp;
1619	struct tx_ring	*txr = adapter->tx_rings;
1620	struct rx_ring	*rxr = adapter->rx_rings;
1621
1622	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1623		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1624
1625		EM_TX_LOCK(txr);
1626		em_txeof(txr);
1627#ifdef EM_MULTIQUEUE
1628		if (!drbr_empty(ifp, txr->br))
1629			em_mq_start_locked(ifp, txr);
1630#else
1631		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1632			em_start_locked(ifp, txr);
1633#endif
1634		EM_TX_UNLOCK(txr);
1635		if (more) {
1636			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1637			return;
1638		}
1639	}
1640
1641	em_enable_intr(adapter);
1642	return;
1643}
1644
1645
1646/*********************************************************************
1647 *
1648 *  MSIX Interrupt Service Routines
1649 *
1650 **********************************************************************/
1651static void
1652em_msix_tx(void *arg)
1653{
1654	struct tx_ring *txr = arg;
1655	struct adapter *adapter = txr->adapter;
1656	struct ifnet	*ifp = adapter->ifp;
1657
1658	++txr->tx_irq;
1659	EM_TX_LOCK(txr);
1660	em_txeof(txr);
1661#ifdef EM_MULTIQUEUE
1662	if (!drbr_empty(ifp, txr->br))
1663		em_mq_start_locked(ifp, txr);
1664#else
1665	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666		em_start_locked(ifp, txr);
1667#endif
1668
1669	/* Reenable this interrupt */
1670	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1671	EM_TX_UNLOCK(txr);
1672	return;
1673}
1674
1675/*********************************************************************
1676 *
1677 *  MSIX RX Interrupt Service routine
1678 *
1679 **********************************************************************/
1680
1681static void
1682em_msix_rx(void *arg)
1683{
1684	struct rx_ring	*rxr = arg;
1685	struct adapter	*adapter = rxr->adapter;
1686	bool		more;
1687
1688	++rxr->rx_irq;
1689	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1690		return;
1691	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1692	if (more)
1693		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1694	else {
1695		/* Reenable this interrupt */
1696		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1697	}
1698	return;
1699}
1700
1701/*********************************************************************
1702 *
1703 *  MSIX Link Fast Interrupt Service routine
1704 *
1705 **********************************************************************/
1706static void
1707em_msix_link(void *arg)
1708{
1709	struct adapter	*adapter = arg;
1710	u32		reg_icr;
1711
1712	++adapter->link_irq;
1713	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1714
1715	if (reg_icr & E1000_ICR_RXO)
1716		adapter->rx_overruns++;
1717
1718	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719		adapter->hw.mac.get_link_status = 1;
1720		em_handle_link(adapter, 0);
1721	} else
1722		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1723		    EM_MSIX_LINK | E1000_IMS_LSC);
1724	/*
1725 	** Because we must read the ICR for this interrupt
1726 	** it may clear other causes using autoclear, for
1727 	** this reason we simply create a soft interrupt
1728 	** for all these vectors.
1729 	*/
1730	if (reg_icr) {
1731		E1000_WRITE_REG(&adapter->hw,
1732			E1000_ICS, adapter->ims);
1733	}
1734	return;
1735}
1736
1737static void
1738em_handle_rx(void *context, int pending)
1739{
1740	struct rx_ring	*rxr = context;
1741	struct adapter	*adapter = rxr->adapter;
1742        bool            more;
1743
1744	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1745	if (more)
1746		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1747	else {
1748		/* Reenable this interrupt */
1749		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1750	}
1751}
1752
1753static void
1754em_handle_tx(void *context, int pending)
1755{
1756	struct tx_ring	*txr = context;
1757	struct adapter	*adapter = txr->adapter;
1758	struct ifnet	*ifp = adapter->ifp;
1759
1760	EM_TX_LOCK(txr);
1761	em_txeof(txr);
1762#ifdef EM_MULTIQUEUE
1763	if (!drbr_empty(ifp, txr->br))
1764		em_mq_start_locked(ifp, txr);
1765#else
1766	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1767		em_start_locked(ifp, txr);
1768#endif
1769	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1770	EM_TX_UNLOCK(txr);
1771}
1772
1773static void
1774em_handle_link(void *context, int pending)
1775{
1776	struct adapter	*adapter = context;
1777	struct e1000_hw *hw = &adapter->hw;
1778	struct tx_ring	*txr = adapter->tx_rings;
1779	struct ifnet *ifp = adapter->ifp;
1780
1781	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1782		return;
1783
1784	EM_CORE_LOCK(adapter);
1785	callout_stop(&adapter->timer);
1786	em_update_link_status(adapter);
1787	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1788	if (hw->mac.type == e1000_82574 && adapter->msix_mem != NULL)
1789		E1000_WRITE_REG(hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
1790	if (adapter->link_active) {
1791		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1792			EM_TX_LOCK(txr);
1793#ifdef EM_MULTIQUEUE
1794			if (!drbr_empty(ifp, txr->br))
1795				em_mq_start_locked(ifp, txr);
1796#else
1797			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1798				em_start_locked(ifp, txr);
1799#endif
1800			EM_TX_UNLOCK(txr);
1801		}
1802	}
1803	EM_CORE_UNLOCK(adapter);
1804}
1805
1806
1807/*********************************************************************
1808 *
1809 *  Media Ioctl callback
1810 *
1811 *  This routine is called whenever the user queries the status of
1812 *  the interface using ifconfig.
1813 *
1814 **********************************************************************/
1815static void
1816em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1817{
1818	struct adapter *adapter = ifp->if_softc;
1819	u_char fiber_type = IFM_1000_SX;
1820
1821	INIT_DEBUGOUT("em_media_status: begin");
1822
1823	EM_CORE_LOCK(adapter);
1824	em_update_link_status(adapter);
1825
1826	ifmr->ifm_status = IFM_AVALID;
1827	ifmr->ifm_active = IFM_ETHER;
1828
1829	if (!adapter->link_active) {
1830		EM_CORE_UNLOCK(adapter);
1831		return;
1832	}
1833
1834	ifmr->ifm_status |= IFM_ACTIVE;
1835
1836	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1837	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1838		ifmr->ifm_active |= fiber_type | IFM_FDX;
1839	} else {
1840		switch (adapter->link_speed) {
1841		case 10:
1842			ifmr->ifm_active |= IFM_10_T;
1843			break;
1844		case 100:
1845			ifmr->ifm_active |= IFM_100_TX;
1846			break;
1847		case 1000:
1848			ifmr->ifm_active |= IFM_1000_T;
1849			break;
1850		}
1851		if (adapter->link_duplex == FULL_DUPLEX)
1852			ifmr->ifm_active |= IFM_FDX;
1853		else
1854			ifmr->ifm_active |= IFM_HDX;
1855	}
1856	EM_CORE_UNLOCK(adapter);
1857}
1858
1859/*********************************************************************
1860 *
1861 *  Media Ioctl callback
1862 *
1863 *  This routine is called when the user changes speed/duplex using
1864 *  media/mediopt option with ifconfig.
1865 *
1866 **********************************************************************/
1867static int
1868em_media_change(struct ifnet *ifp)
1869{
1870	struct adapter *adapter = ifp->if_softc;
1871	struct ifmedia  *ifm = &adapter->media;
1872
1873	INIT_DEBUGOUT("em_media_change: begin");
1874
1875	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1876		return (EINVAL);
1877
1878	EM_CORE_LOCK(adapter);
1879	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1880	case IFM_AUTO:
1881		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1882		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1883		break;
1884	case IFM_1000_LX:
1885	case IFM_1000_SX:
1886	case IFM_1000_T:
1887		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1888		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1889		break;
1890	case IFM_100_TX:
1891		adapter->hw.mac.autoneg = FALSE;
1892		adapter->hw.phy.autoneg_advertised = 0;
1893		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1894			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1895		else
1896			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1897		break;
1898	case IFM_10_T:
1899		adapter->hw.mac.autoneg = FALSE;
1900		adapter->hw.phy.autoneg_advertised = 0;
1901		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1902			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1903		else
1904			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1905		break;
1906	default:
1907		device_printf(adapter->dev, "Unsupported media type\n");
1908	}
1909
1910	em_init_locked(adapter);
1911	EM_CORE_UNLOCK(adapter);
1912
1913	return (0);
1914}
1915
1916/*********************************************************************
1917 *
1918 *  This routine maps the mbufs to tx descriptors.
1919 *
1920 *  return 0 on success, positive on failure
1921 **********************************************************************/
1922
1923static int
1924em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1925{
1926	struct adapter		*adapter = txr->adapter;
1927	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1928	bus_dmamap_t		map;
1929	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1930	struct e1000_tx_desc	*ctxd = NULL;
1931	struct mbuf		*m_head;
1932	struct ether_header	*eh;
1933	struct ip		*ip = NULL;
1934	struct tcphdr		*tp = NULL;
1935	u32			txd_upper = 0, txd_lower = 0;
1936	int			ip_off, poff;
1937	int			nsegs, i, j, first, last = 0;
1938	int			error;
1939	bool			do_tso, tso_desc, remap = TRUE;
1940
1941	m_head = *m_headp;
1942	do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1943	tso_desc = FALSE;
1944	ip_off = poff = 0;
1945
1946	/*
1947	 * Intel recommends entire IP/TCP header length reside in a single
1948	 * buffer. If multiple descriptors are used to describe the IP and
1949	 * TCP header, each descriptor should describe one or more
1950	 * complete headers; descriptors referencing only parts of headers
1951	 * are not supported. If all layer headers are not coalesced into
1952	 * a single buffer, each buffer should not cross a 4KB boundary,
1953	 * or be larger than the maximum read request size.
1954	 * Controller also requires modifing IP/TCP header to make TSO work
1955	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1956	 * IP/TCP header into a single buffer to meet the requirement of
1957	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1958	 * which also has similiar restrictions.
1959	 */
1960	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1961		if (do_tso || (m_head->m_next != NULL &&
1962		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1963			if (M_WRITABLE(*m_headp) == 0) {
1964				m_head = m_dup(*m_headp, M_NOWAIT);
1965				m_freem(*m_headp);
1966				if (m_head == NULL) {
1967					*m_headp = NULL;
1968					return (ENOBUFS);
1969				}
1970				*m_headp = m_head;
1971			}
1972		}
1973		/*
1974		 * XXX
1975		 * Assume IPv4, we don't have TSO/checksum offload support
1976		 * for IPv6 yet.
1977		 */
1978		ip_off = sizeof(struct ether_header);
1979		if (m_head->m_len < ip_off) {
1980			m_head = m_pullup(m_head, ip_off);
1981			if (m_head == NULL) {
1982				*m_headp = NULL;
1983				return (ENOBUFS);
1984			}
1985		}
1986		eh = mtod(m_head, struct ether_header *);
1987		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1988			ip_off = sizeof(struct ether_vlan_header);
1989			if (m_head->m_len < ip_off) {
1990				m_head = m_pullup(m_head, ip_off);
1991				if (m_head == NULL) {
1992					*m_headp = NULL;
1993					return (ENOBUFS);
1994				}
1995			}
1996		}
1997		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1998			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1999			if (m_head == NULL) {
2000				*m_headp = NULL;
2001				return (ENOBUFS);
2002			}
2003		}
2004		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005		poff = ip_off + (ip->ip_hl << 2);
2006
2007		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2008			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2009				m_head = m_pullup(m_head, poff +
2010				    sizeof(struct tcphdr));
2011				if (m_head == NULL) {
2012					*m_headp = NULL;
2013					return (ENOBUFS);
2014				}
2015			}
2016			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2017			/*
2018			 * TSO workaround:
2019			 *   pull 4 more bytes of data into it.
2020			 */
2021			if (m_head->m_len < poff + (tp->th_off << 2)) {
2022				m_head = m_pullup(m_head, poff +
2023				                 (tp->th_off << 2) +
2024				                 TSO_WORKAROUND);
2025				if (m_head == NULL) {
2026					*m_headp = NULL;
2027					return (ENOBUFS);
2028				}
2029			}
2030			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2032			if (do_tso) {
2033				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2034				                  (ip->ip_hl << 2) +
2035				                  (tp->th_off << 2));
2036				ip->ip_sum = 0;
2037				/*
2038				 * The pseudo TCP checksum does not include TCP
2039				 * payload length so driver should recompute
2040				 * the checksum here what hardware expect to
2041				 * see. This is adherence of Microsoft's Large
2042				 * Send specification.
2043			 	*/
2044				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2045				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2046			}
2047		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2048			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2049				m_head = m_pullup(m_head, poff +
2050				    sizeof(struct udphdr));
2051				if (m_head == NULL) {
2052					*m_headp = NULL;
2053					return (ENOBUFS);
2054				}
2055			}
2056			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2057		}
2058		*m_headp = m_head;
2059	}
2060
2061	/*
2062	 * Map the packet for DMA
2063	 *
2064	 * Capture the first descriptor index,
2065	 * this descriptor will have the index
2066	 * of the EOP which is the only one that
2067	 * now gets a DONE bit writeback.
2068	 */
2069	first = txr->next_avail_desc;
2070	tx_buffer = &txr->tx_buffers[first];
2071	tx_buffer_mapped = tx_buffer;
2072	map = tx_buffer->map;
2073
2074retry:
2075	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2076	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2077
2078	/*
2079	 * There are two types of errors we can (try) to handle:
2080	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2081	 *   out of segments.  Defragment the mbuf chain and try again.
2082	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2083	 *   at this point in time.  Defer sending and try again later.
2084	 * All other errors, in particular EINVAL, are fatal and prevent the
2085	 * mbuf chain from ever going through.  Drop it and report error.
2086	 */
2087	if (error == EFBIG && remap) {
2088		struct mbuf *m;
2089
2090		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2091		if (m == NULL) {
2092			adapter->mbuf_defrag_failed++;
2093			m_freem(*m_headp);
2094			*m_headp = NULL;
2095			return (ENOBUFS);
2096		}
2097		*m_headp = m;
2098
2099		/* Try it again, but only once */
2100		remap = FALSE;
2101		goto retry;
2102	} else if (error != 0) {
2103		adapter->no_tx_dma_setup++;
2104		m_freem(*m_headp);
2105		*m_headp = NULL;
2106		return (error);
2107	}
2108
2109	/*
2110	 * TSO Hardware workaround, if this packet is not
2111	 * TSO, and is only a single descriptor long, and
2112	 * it follows a TSO burst, then we need to add a
2113	 * sentinel descriptor to prevent premature writeback.
2114	 */
2115	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2116		if (nsegs == 1)
2117			tso_desc = TRUE;
2118		txr->tx_tso = FALSE;
2119	}
2120
2121        if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2122                txr->no_desc_avail++;
2123		bus_dmamap_unload(txr->txtag, map);
2124		return (ENOBUFS);
2125        }
2126	m_head = *m_headp;
2127
2128	/* Do hardware assists */
2129	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2130		em_tso_setup(txr, m_head, ip_off, ip, tp,
2131		    &txd_upper, &txd_lower);
2132		/* we need to make a final sentinel transmit desc */
2133		tso_desc = TRUE;
2134	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2135		em_transmit_checksum_setup(txr, m_head,
2136		    ip_off, ip, &txd_upper, &txd_lower);
2137
2138	if (m_head->m_flags & M_VLANTAG) {
2139		/* Set the vlan id. */
2140		txd_upper |=
2141		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2142                /* Tell hardware to add tag */
2143                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2144        }
2145
2146	i = txr->next_avail_desc;
2147
2148	/* Set up our transmit descriptors */
2149	for (j = 0; j < nsegs; j++) {
2150		bus_size_t seg_len;
2151		bus_addr_t seg_addr;
2152
2153		tx_buffer = &txr->tx_buffers[i];
2154		ctxd = &txr->tx_base[i];
2155		seg_addr = segs[j].ds_addr;
2156		seg_len  = segs[j].ds_len;
2157		/*
2158		** TSO Workaround:
2159		** If this is the last descriptor, we want to
2160		** split it so we have a small final sentinel
2161		*/
2162		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2163			seg_len -= TSO_WORKAROUND;
2164			ctxd->buffer_addr = htole64(seg_addr);
2165			ctxd->lower.data = htole32(
2166				adapter->txd_cmd | txd_lower | seg_len);
2167			ctxd->upper.data = htole32(txd_upper);
2168			if (++i == adapter->num_tx_desc)
2169				i = 0;
2170
2171			/* Now make the sentinel */
2172			txr->tx_avail--;
2173			ctxd = &txr->tx_base[i];
2174			tx_buffer = &txr->tx_buffers[i];
2175			ctxd->buffer_addr =
2176			    htole64(seg_addr + seg_len);
2177			ctxd->lower.data = htole32(
2178			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2179			ctxd->upper.data =
2180			    htole32(txd_upper);
2181			last = i;
2182			if (++i == adapter->num_tx_desc)
2183				i = 0;
2184		} else {
2185			ctxd->buffer_addr = htole64(seg_addr);
2186			ctxd->lower.data = htole32(
2187			adapter->txd_cmd | txd_lower | seg_len);
2188			ctxd->upper.data = htole32(txd_upper);
2189			last = i;
2190			if (++i == adapter->num_tx_desc)
2191				i = 0;
2192		}
2193		tx_buffer->m_head = NULL;
2194		tx_buffer->next_eop = -1;
2195	}
2196
2197	txr->next_avail_desc = i;
2198	txr->tx_avail -= nsegs;
2199
2200        tx_buffer->m_head = m_head;
2201	/*
2202	** Here we swap the map so the last descriptor,
2203	** which gets the completion interrupt has the
2204	** real map, and the first descriptor gets the
2205	** unused map from this descriptor.
2206	*/
2207	tx_buffer_mapped->map = tx_buffer->map;
2208	tx_buffer->map = map;
2209        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2210
2211        /*
2212         * Last Descriptor of Packet
2213	 * needs End Of Packet (EOP)
2214	 * and Report Status (RS)
2215         */
2216        ctxd->lower.data |=
2217	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2218	/*
2219	 * Keep track in the first buffer which
2220	 * descriptor will be written back
2221	 */
2222	tx_buffer = &txr->tx_buffers[first];
2223	tx_buffer->next_eop = last;
2224
2225	/*
2226	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2227	 * that this frame is available to transmit.
2228	 */
2229	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2230	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2231	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2232
2233	return (0);
2234}
2235
2236static void
2237em_set_promisc(struct adapter *adapter)
2238{
2239	struct ifnet	*ifp = adapter->ifp;
2240	u32		reg_rctl;
2241
2242	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243
2244	if (ifp->if_flags & IFF_PROMISC) {
2245		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2246		/* Turn this on if you want to see bad packets */
2247		if (em_debug_sbp)
2248			reg_rctl |= E1000_RCTL_SBP;
2249		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250	} else if (ifp->if_flags & IFF_ALLMULTI) {
2251		reg_rctl |= E1000_RCTL_MPE;
2252		reg_rctl &= ~E1000_RCTL_UPE;
2253		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2254	}
2255}
2256
2257static void
2258em_disable_promisc(struct adapter *adapter)
2259{
2260	struct ifnet	*ifp = adapter->ifp;
2261	u32		reg_rctl;
2262	int		mcnt = 0;
2263
2264	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265	reg_rctl &=  (~E1000_RCTL_UPE);
2266	if (ifp->if_flags & IFF_ALLMULTI)
2267		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2268	else {
2269		struct  ifmultiaddr *ifma;
2270#if __FreeBSD_version < 800000
2271		IF_ADDR_LOCK(ifp);
2272#else
2273		if_maddr_rlock(ifp);
2274#endif
2275		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2276			if (ifma->ifma_addr->sa_family != AF_LINK)
2277				continue;
2278			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2279				break;
2280			mcnt++;
2281		}
2282#if __FreeBSD_version < 800000
2283		IF_ADDR_UNLOCK(ifp);
2284#else
2285		if_maddr_runlock(ifp);
2286#endif
2287	}
2288	/* Don't disable if in MAX groups */
2289	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2290		reg_rctl &=  (~E1000_RCTL_MPE);
2291	reg_rctl &=  (~E1000_RCTL_SBP);
2292	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2293}
2294
2295
2296/*********************************************************************
2297 *  Multicast Update
2298 *
2299 *  This routine is called whenever multicast address list is updated.
2300 *
2301 **********************************************************************/
2302
2303static void
2304em_set_multi(struct adapter *adapter)
2305{
2306	struct ifnet	*ifp = adapter->ifp;
2307	struct ifmultiaddr *ifma;
2308	u32 reg_rctl = 0;
2309	u8  *mta; /* Multicast array memory */
2310	int mcnt = 0;
2311
2312	IOCTL_DEBUGOUT("em_set_multi: begin");
2313
2314	mta = adapter->mta;
2315	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2316
2317	if (adapter->hw.mac.type == e1000_82542 &&
2318	    adapter->hw.revision_id == E1000_REVISION_2) {
2319		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2320		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2321			e1000_pci_clear_mwi(&adapter->hw);
2322		reg_rctl |= E1000_RCTL_RST;
2323		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2324		msec_delay(5);
2325	}
2326
2327#if __FreeBSD_version < 800000
2328	IF_ADDR_LOCK(ifp);
2329#else
2330	if_maddr_rlock(ifp);
2331#endif
2332	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2333		if (ifma->ifma_addr->sa_family != AF_LINK)
2334			continue;
2335
2336		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2337			break;
2338
2339		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2340		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2341		mcnt++;
2342	}
2343#if __FreeBSD_version < 800000
2344	IF_ADDR_UNLOCK(ifp);
2345#else
2346	if_maddr_runlock(ifp);
2347#endif
2348	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2349		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2350		reg_rctl |= E1000_RCTL_MPE;
2351		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2352	} else
2353		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2354
2355	if (adapter->hw.mac.type == e1000_82542 &&
2356	    adapter->hw.revision_id == E1000_REVISION_2) {
2357		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2358		reg_rctl &= ~E1000_RCTL_RST;
2359		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2360		msec_delay(5);
2361		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2362			e1000_pci_set_mwi(&adapter->hw);
2363	}
2364}
2365
2366
2367/*********************************************************************
2368 *  Timer routine
2369 *
2370 *  This routine checks for link status and updates statistics.
2371 *
2372 **********************************************************************/
2373
2374static void
2375em_local_timer(void *arg)
2376{
2377	struct adapter	*adapter = arg;
2378	struct ifnet	*ifp = adapter->ifp;
2379	struct tx_ring	*txr = adapter->tx_rings;
2380	struct rx_ring	*rxr = adapter->rx_rings;
2381	u32		trigger = 0;
2382
2383	EM_CORE_LOCK_ASSERT(adapter);
2384
2385	em_update_link_status(adapter);
2386	em_update_stats_counters(adapter);
2387
2388	/* Reset LAA into RAR[0] on 82571 */
2389	if ((adapter->hw.mac.type == e1000_82571) &&
2390	    e1000_get_laa_state_82571(&adapter->hw))
2391		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2392
2393	/* Mask to use in the irq trigger */
2394	if (adapter->msix_mem) {
2395		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2396			trigger |= rxr->ims;
2397		rxr = adapter->rx_rings;
2398	} else
2399		trigger = E1000_ICS_RXDMT0;
2400
2401	/*
2402	** Check on the state of the TX queue(s), this
2403	** can be done without the lock because its RO
2404	** and the HUNG state will be static if set.
2405	*/
2406	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2407		if (txr->busy == EM_TX_HUNG)
2408			goto hung;
2409		if (txr->busy >= EM_TX_MAXTRIES)
2410			txr->busy = EM_TX_HUNG;
2411		/* Schedule a TX tasklet if needed */
2412		if (txr->tx_avail <= EM_MAX_SCATTER)
2413			taskqueue_enqueue(txr->tq, &txr->tx_task);
2414	}
2415
2416	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2417#ifndef DEVICE_POLLING
2418	/* Trigger an RX interrupt to guarantee mbuf refresh */
2419	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2420#endif
2421	return;
2422hung:
2423	/* Looks like we're hung */
2424	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2425			txr->me);
2426	em_print_debug_info(adapter);
2427	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2428	adapter->watchdog_events++;
2429	em_init_locked(adapter);
2430}
2431
2432
2433static void
2434em_update_link_status(struct adapter *adapter)
2435{
2436	struct e1000_hw *hw = &adapter->hw;
2437	struct ifnet *ifp = adapter->ifp;
2438	device_t dev = adapter->dev;
2439	struct tx_ring *txr = adapter->tx_rings;
2440	u32 link_check = 0;
2441
2442	/* Get the cached link value or read phy for real */
2443	switch (hw->phy.media_type) {
2444	case e1000_media_type_copper:
2445		if (hw->mac.get_link_status) {
2446			if (hw->mac.type == e1000_pch_spt)
2447				msec_delay(50);
2448			/* Do the work to read phy */
2449			e1000_check_for_link(hw);
2450			link_check = !hw->mac.get_link_status;
2451			if (link_check) /* ESB2 fix */
2452				e1000_cfg_on_link_up(hw);
2453		} else
2454			link_check = TRUE;
2455		break;
2456	case e1000_media_type_fiber:
2457		e1000_check_for_link(hw);
2458		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2459                                 E1000_STATUS_LU);
2460		break;
2461	case e1000_media_type_internal_serdes:
2462		e1000_check_for_link(hw);
2463		link_check = adapter->hw.mac.serdes_has_link;
2464		break;
2465	default:
2466	case e1000_media_type_unknown:
2467		break;
2468	}
2469
2470	/* Now check for a transition */
2471	if (link_check && (adapter->link_active == 0)) {
2472		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2473		    &adapter->link_duplex);
2474
2475		/*
2476		** There have proven to be problems with TSO when not at full
2477		** gigabit speed, so disable the assist automatically when at
2478		** lower speeds.  -jfv
2479		*/
2480		if (ifp->if_capenable & IFCAP_TSO4) {
2481			if (adapter->link_speed == SPEED_1000)
2482				ifp->if_hwassist |= CSUM_IP_TSO;
2483			else
2484				ifp->if_hwassist &= ~CSUM_IP_TSO;
2485		}
2486
2487		/* Check if we must disable SPEED_MODE bit on PCI-E */
2488		if ((adapter->link_speed != SPEED_1000) &&
2489		    ((hw->mac.type == e1000_82571) ||
2490		    (hw->mac.type == e1000_82572))) {
2491			int tarc0;
2492			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2493			tarc0 &= ~TARC_SPEED_MODE_BIT;
2494			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2495		}
2496		if (bootverbose)
2497			device_printf(dev, "Link is up %d Mbps %s\n",
2498			    adapter->link_speed,
2499			    ((adapter->link_duplex == FULL_DUPLEX) ?
2500			    "Full Duplex" : "Half Duplex"));
2501		adapter->link_active = 1;
2502		adapter->smartspeed = 0;
2503		ifp->if_baudrate = adapter->link_speed * 1000000;
2504		if_link_state_change(ifp, LINK_STATE_UP);
2505	} else if (!link_check && (adapter->link_active == 1)) {
2506		ifp->if_baudrate = adapter->link_speed = 0;
2507		adapter->link_duplex = 0;
2508		if (bootverbose)
2509			device_printf(dev, "Link is Down\n");
2510		adapter->link_active = 0;
2511		/* Link down, disable hang detection */
2512		for (int i = 0; i < adapter->num_queues; i++, txr++)
2513			txr->busy = EM_TX_IDLE;
2514		if_link_state_change(ifp, LINK_STATE_DOWN);
2515	}
2516}
2517
2518/*********************************************************************
2519 *
2520 *  This routine disables all traffic on the adapter by issuing a
2521 *  global reset on the MAC and deallocates TX/RX buffers.
2522 *
2523 *  This routine should always be called with BOTH the CORE
2524 *  and TX locks.
2525 **********************************************************************/
2526
2527static void
2528em_stop(void *arg)
2529{
2530	struct adapter	*adapter = arg;
2531	struct ifnet	*ifp = adapter->ifp;
2532	struct tx_ring	*txr = adapter->tx_rings;
2533
2534	EM_CORE_LOCK_ASSERT(adapter);
2535
2536	INIT_DEBUGOUT("em_stop: begin");
2537
2538	em_disable_intr(adapter);
2539	callout_stop(&adapter->timer);
2540
2541	/* Tell the stack that the interface is no longer active */
2542	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2543	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2544
2545        /* Disarm Hang Detection. */
2546	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2547		EM_TX_LOCK(txr);
2548		txr->busy = EM_TX_IDLE;
2549		EM_TX_UNLOCK(txr);
2550	}
2551
2552	/* I219 needs some special flushing to avoid hangs */
2553	if (adapter->hw.mac.type == e1000_pch_spt)
2554		em_flush_desc_rings(adapter);
2555
2556	e1000_reset_hw(&adapter->hw);
2557	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2558
2559	e1000_led_off(&adapter->hw);
2560	e1000_cleanup_led(&adapter->hw);
2561}
2562
2563
2564/*********************************************************************
2565 *
2566 *  Determine hardware revision.
2567 *
2568 **********************************************************************/
2569static void
2570em_identify_hardware(struct adapter *adapter)
2571{
2572	device_t dev = adapter->dev;
2573
2574	/* Make sure our PCI config space has the necessary stuff set */
2575	pci_enable_busmaster(dev);
2576	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2577
2578	/* Save off the information about this board */
2579	adapter->hw.vendor_id = pci_get_vendor(dev);
2580	adapter->hw.device_id = pci_get_device(dev);
2581	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2582	adapter->hw.subsystem_vendor_id =
2583	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2584	adapter->hw.subsystem_device_id =
2585	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2586
2587	/* Do Shared Code Init and Setup */
2588	if (e1000_set_mac_type(&adapter->hw)) {
2589		device_printf(dev, "Setup init failure\n");
2590		return;
2591	}
2592}
2593
2594static int
2595em_allocate_pci_resources(struct adapter *adapter)
2596{
2597	device_t	dev = adapter->dev;
2598	int		rid;
2599
2600	rid = PCIR_BAR(0);
2601	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2602	    &rid, RF_ACTIVE);
2603	if (adapter->memory == NULL) {
2604		device_printf(dev, "Unable to allocate bus resource: memory\n");
2605		return (ENXIO);
2606	}
2607	adapter->osdep.mem_bus_space_tag =
2608	    rman_get_bustag(adapter->memory);
2609	adapter->osdep.mem_bus_space_handle =
2610	    rman_get_bushandle(adapter->memory);
2611	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2612
2613	adapter->hw.back = &adapter->osdep;
2614
2615	return (0);
2616}
2617
2618/*********************************************************************
2619 *
2620 *  Setup the Legacy or MSI Interrupt handler
2621 *
2622 **********************************************************************/
2623static int
2624em_allocate_legacy(struct adapter *adapter)
2625{
2626	device_t dev = adapter->dev;
2627	struct tx_ring	*txr = adapter->tx_rings;
2628	int error, rid = 0;
2629
2630	/* Manually turn off all interrupts */
2631	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2632
2633	if (adapter->msix == 1) /* using MSI */
2634		rid = 1;
2635	/* We allocate a single interrupt resource */
2636	adapter->res = bus_alloc_resource_any(dev,
2637	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2638	if (adapter->res == NULL) {
2639		device_printf(dev, "Unable to allocate bus resource: "
2640		    "interrupt\n");
2641		return (ENXIO);
2642	}
2643
2644	/*
2645	 * Allocate a fast interrupt and the associated
2646	 * deferred processing contexts.
2647	 */
2648	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2649	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2650	    taskqueue_thread_enqueue, &adapter->tq);
2651	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2652	    device_get_nameunit(adapter->dev));
2653	/* Use a TX only tasklet for local timer */
2654	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2655	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2656	    taskqueue_thread_enqueue, &txr->tq);
2657	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2658	    device_get_nameunit(adapter->dev));
2659	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2660	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2661	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2662		device_printf(dev, "Failed to register fast interrupt "
2663			    "handler: %d\n", error);
2664		taskqueue_free(adapter->tq);
2665		adapter->tq = NULL;
2666		return (error);
2667	}
2668
2669	return (0);
2670}
2671
2672/*********************************************************************
2673 *
2674 *  Setup the MSIX Interrupt handlers
2675 *   This is not really Multiqueue, rather
2676 *   its just seperate interrupt vectors
2677 *   for TX, RX, and Link.
2678 *
2679 **********************************************************************/
2680static int
2681em_allocate_msix(struct adapter *adapter)
2682{
2683	device_t	dev = adapter->dev;
2684	struct		tx_ring *txr = adapter->tx_rings;
2685	struct		rx_ring *rxr = adapter->rx_rings;
2686	int		error, rid, vector = 0;
2687	int		cpu_id = 0;
2688
2689
2690	/* Make sure all interrupts are disabled */
2691	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2692
2693	/* First set up ring resources */
2694	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2695
2696		/* RX ring */
2697		rid = vector + 1;
2698
2699		rxr->res = bus_alloc_resource_any(dev,
2700		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2701		if (rxr->res == NULL) {
2702			device_printf(dev,
2703			    "Unable to allocate bus resource: "
2704			    "RX MSIX Interrupt %d\n", i);
2705			return (ENXIO);
2706		}
2707		if ((error = bus_setup_intr(dev, rxr->res,
2708		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2709		    rxr, &rxr->tag)) != 0) {
2710			device_printf(dev, "Failed to register RX handler");
2711			return (error);
2712		}
2713#if __FreeBSD_version >= 800504
2714		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2715#endif
2716		rxr->msix = vector;
2717
2718		if (em_last_bind_cpu < 0)
2719			em_last_bind_cpu = CPU_FIRST();
2720		cpu_id = em_last_bind_cpu;
2721		bus_bind_intr(dev, rxr->res, cpu_id);
2722
2723		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2724		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2725		    taskqueue_thread_enqueue, &rxr->tq);
2726		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2727		    device_get_nameunit(adapter->dev), cpu_id);
2728		/*
2729		** Set the bit to enable interrupt
2730		** in E1000_IMS -- bits 20 and 21
2731		** are for RX0 and RX1, note this has
2732		** NOTHING to do with the MSIX vector
2733		*/
2734		rxr->ims = 1 << (20 + i);
2735		adapter->ims |= rxr->ims;
2736		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2737
2738		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2739	}
2740
2741	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2742		/* TX ring */
2743		rid = vector + 1;
2744		txr->res = bus_alloc_resource_any(dev,
2745		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2746		if (txr->res == NULL) {
2747			device_printf(dev,
2748			    "Unable to allocate bus resource: "
2749			    "TX MSIX Interrupt %d\n", i);
2750			return (ENXIO);
2751		}
2752		if ((error = bus_setup_intr(dev, txr->res,
2753		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2754		    txr, &txr->tag)) != 0) {
2755			device_printf(dev, "Failed to register TX handler");
2756			return (error);
2757		}
2758#if __FreeBSD_version >= 800504
2759		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2760#endif
2761		txr->msix = vector;
2762
2763                if (em_last_bind_cpu < 0)
2764                        em_last_bind_cpu = CPU_FIRST();
2765                cpu_id = em_last_bind_cpu;
2766                bus_bind_intr(dev, txr->res, cpu_id);
2767
2768		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2769		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2770		    taskqueue_thread_enqueue, &txr->tq);
2771		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2772		    device_get_nameunit(adapter->dev), cpu_id);
2773		/*
2774		** Set the bit to enable interrupt
2775		** in E1000_IMS -- bits 22 and 23
2776		** are for TX0 and TX1, note this has
2777		** NOTHING to do with the MSIX vector
2778		*/
2779		txr->ims = 1 << (22 + i);
2780		adapter->ims |= txr->ims;
2781		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2782
2783		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2784	}
2785
2786	/* Link interrupt */
2787	rid = vector + 1;
2788	adapter->res = bus_alloc_resource_any(dev,
2789	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2790	if (!adapter->res) {
2791		device_printf(dev,"Unable to allocate "
2792		    "bus resource: Link interrupt [%d]\n", rid);
2793		return (ENXIO);
2794        }
2795	/* Set the link handler function */
2796	error = bus_setup_intr(dev, adapter->res,
2797	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2798	    em_msix_link, adapter, &adapter->tag);
2799	if (error) {
2800		adapter->res = NULL;
2801		device_printf(dev, "Failed to register LINK handler");
2802		return (error);
2803	}
2804#if __FreeBSD_version >= 800504
2805	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2806#endif
2807	adapter->linkvec = vector;
2808	adapter->ivars |=  (8 | vector) << 16;
2809	adapter->ivars |= 0x80000000;
2810
2811	return (0);
2812}
2813
2814
2815static void
2816em_free_pci_resources(struct adapter *adapter)
2817{
2818	device_t	dev = adapter->dev;
2819	struct tx_ring	*txr;
2820	struct rx_ring	*rxr;
2821	int		rid;
2822
2823
2824	/*
2825	** Release all the queue interrupt resources:
2826	*/
2827	for (int i = 0; i < adapter->num_queues; i++) {
2828		txr = &adapter->tx_rings[i];
2829		/* an early abort? */
2830		if (txr == NULL)
2831			break;
2832		rid = txr->msix +1;
2833		if (txr->tag != NULL) {
2834			bus_teardown_intr(dev, txr->res, txr->tag);
2835			txr->tag = NULL;
2836		}
2837		if (txr->res != NULL)
2838			bus_release_resource(dev, SYS_RES_IRQ,
2839			    rid, txr->res);
2840
2841		rxr = &adapter->rx_rings[i];
2842		/* an early abort? */
2843		if (rxr == NULL)
2844			break;
2845		rid = rxr->msix +1;
2846		if (rxr->tag != NULL) {
2847			bus_teardown_intr(dev, rxr->res, rxr->tag);
2848			rxr->tag = NULL;
2849		}
2850		if (rxr->res != NULL)
2851			bus_release_resource(dev, SYS_RES_IRQ,
2852			    rid, rxr->res);
2853	}
2854
2855        if (adapter->linkvec) /* we are doing MSIX */
2856                rid = adapter->linkvec + 1;
2857        else
2858                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2859
2860	if (adapter->tag != NULL) {
2861		bus_teardown_intr(dev, adapter->res, adapter->tag);
2862		adapter->tag = NULL;
2863	}
2864
2865	if (adapter->res != NULL)
2866		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2867
2868
2869	if (adapter->msix)
2870		pci_release_msi(dev);
2871
2872	if (adapter->msix_mem != NULL)
2873		bus_release_resource(dev, SYS_RES_MEMORY,
2874		    adapter->memrid, adapter->msix_mem);
2875
2876	if (adapter->memory != NULL)
2877		bus_release_resource(dev, SYS_RES_MEMORY,
2878		    PCIR_BAR(0), adapter->memory);
2879
2880	if (adapter->flash != NULL)
2881		bus_release_resource(dev, SYS_RES_MEMORY,
2882		    EM_FLASH, adapter->flash);
2883}
2884
2885/*
2886 * Setup MSI or MSI/X
2887 */
2888static int
2889em_setup_msix(struct adapter *adapter)
2890{
2891	device_t dev = adapter->dev;
2892	int val;
2893
2894	/* Nearly always going to use one queue */
2895	adapter->num_queues = 1;
2896
2897	/*
2898	** Try using MSI-X for Hartwell adapters
2899	*/
2900	if ((adapter->hw.mac.type == e1000_82574) &&
2901	    (em_enable_msix == TRUE)) {
2902#ifdef EM_MULTIQUEUE
2903		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2904		if (adapter->num_queues > 1)
2905			em_enable_vectors_82574(adapter);
2906#endif
2907		/* Map the MSIX BAR */
2908		adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2909		adapter->msix_mem = bus_alloc_resource_any(dev,
2910		    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2911       		if (adapter->msix_mem == NULL) {
2912			/* May not be enabled */
2913               		device_printf(adapter->dev,
2914			    "Unable to map MSIX table \n");
2915			goto msi;
2916       		}
2917		val = pci_msix_count(dev);
2918
2919#ifdef EM_MULTIQUEUE
2920		/* We need 5 vectors in the multiqueue case */
2921		if (adapter->num_queues > 1 ) {
2922			if (val >= 5)
2923				val = 5;
2924			else {
2925				adapter->num_queues = 1;
2926				device_printf(adapter->dev,
2927				    "Insufficient MSIX vectors for >1 queue, "
2928				    "using single queue...\n");
2929				goto msix_one;
2930			}
2931		} else {
2932msix_one:
2933#endif
2934			if (val >= 3)
2935				val = 3;
2936			else {
2937				device_printf(adapter->dev,
2938			    	"Insufficient MSIX vectors, using MSI\n");
2939				goto msi;
2940			}
2941#ifdef EM_MULTIQUEUE
2942		}
2943#endif
2944
2945		if ((pci_alloc_msix(dev, &val) == 0)) {
2946			device_printf(adapter->dev,
2947			    "Using MSIX interrupts "
2948			    "with %d vectors\n", val);
2949			return (val);
2950		}
2951
2952		/*
2953		** If MSIX alloc failed or provided us with
2954		** less than needed, free and fall through to MSI
2955		*/
2956		pci_release_msi(dev);
2957	}
2958msi:
2959	if (adapter->msix_mem != NULL) {
2960		bus_release_resource(dev, SYS_RES_MEMORY,
2961		    adapter->memrid, adapter->msix_mem);
2962		adapter->msix_mem = NULL;
2963	}
2964       	val = 1;
2965       	if (pci_alloc_msi(dev, &val) == 0) {
2966               	device_printf(adapter->dev, "Using an MSI interrupt\n");
2967		return (val);
2968	}
2969	/* Should only happen due to manual configuration */
2970	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2971	return (0);
2972}
2973
2974
2975/*
2976** The 3 following flush routines are used as a workaround in the
2977** I219 client parts and only for them.
2978**
2979** em_flush_tx_ring - remove all descriptors from the tx_ring
2980**
2981** We want to clear all pending descriptors from the TX ring.
2982** zeroing happens when the HW reads the regs. We  assign the ring itself as
2983** the data of the next descriptor. We don't care about the data we are about
2984** to reset the HW.
2985*/
2986static void
2987em_flush_tx_ring(struct adapter *adapter)
2988{
2989	struct e1000_hw		*hw = &adapter->hw;
2990	struct tx_ring		*txr = adapter->tx_rings;
2991	struct e1000_tx_desc	*txd;
2992	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2993	u16			size = 512;
2994
2995	tctl = E1000_READ_REG(hw, E1000_TCTL);
2996	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2997
2998	txd = &txr->tx_base[txr->next_avail_desc++];
2999	if (txr->next_avail_desc == adapter->num_tx_desc)
3000		txr->next_avail_desc = 0;
3001
3002	/* Just use the ring as a dummy buffer addr */
3003	txd->buffer_addr = txr->txdma.dma_paddr;
3004	txd->lower.data = htole32(txd_lower | size);
3005	txd->upper.data = 0;
3006
3007	/* flush descriptors to memory before notifying the HW */
3008	wmb();
3009
3010	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3011	mb();
3012	usec_delay(250);
3013}
3014
3015/*
3016** em_flush_rx_ring - remove all descriptors from the rx_ring
3017**
3018** Mark all descriptors in the RX ring as consumed and disable the rx ring
3019*/
3020static void
3021em_flush_rx_ring(struct adapter *adapter)
3022{
3023	struct e1000_hw	*hw = &adapter->hw;
3024	u32		rctl, rxdctl;
3025
3026	rctl = E1000_READ_REG(hw, E1000_RCTL);
3027	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3028	E1000_WRITE_FLUSH(hw);
3029	usec_delay(150);
3030
3031	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3032	/* zero the lower 14 bits (prefetch and host thresholds) */
3033	rxdctl &= 0xffffc000;
3034	/*
3035	 * update thresholds: prefetch threshold to 31, host threshold to 1
3036	 * and make sure the granularity is "descriptors" and not "cache lines"
3037	 */
3038	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3039	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3040
3041	/* momentarily enable the RX ring for the changes to take effect */
3042	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3043	E1000_WRITE_FLUSH(hw);
3044	usec_delay(150);
3045	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3046}
3047
3048/*
3049** em_flush_desc_rings - remove all descriptors from the descriptor rings
3050**
3051** In i219, the descriptor rings must be emptied before resetting the HW
3052** or before changing the device state to D3 during runtime (runtime PM).
3053**
3054** Failure to do this will cause the HW to enter a unit hang state which can
3055** only be released by PCI reset on the device
3056**
3057*/
3058static void
3059em_flush_desc_rings(struct adapter *adapter)
3060{
3061	struct e1000_hw	*hw = &adapter->hw;
3062	device_t	dev = adapter->dev;
3063	u16		hang_state;
3064	u32		fext_nvm11, tdlen;
3065
3066	/* First, disable MULR fix in FEXTNVM11 */
3067	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3068	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3069	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3070
3071	/* do nothing if we're not in faulty state, or if the queue is empty */
3072	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3073	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3074	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3075		return;
3076	em_flush_tx_ring(adapter);
3077
3078	/* recheck, maybe the fault is caused by the rx ring */
3079	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3080	if (hang_state & FLUSH_DESC_REQUIRED)
3081		em_flush_rx_ring(adapter);
3082}
3083
3084
3085/*********************************************************************
3086 *
3087 *  Initialize the hardware to a configuration
3088 *  as specified by the adapter structure.
3089 *
3090 **********************************************************************/
3091static void
3092em_reset(struct adapter *adapter)
3093{
3094	device_t	dev = adapter->dev;
3095	struct ifnet	*ifp = adapter->ifp;
3096	struct e1000_hw	*hw = &adapter->hw;
3097	u16		rx_buffer_size;
3098	u32		pba;
3099
3100	INIT_DEBUGOUT("em_reset: begin");
3101
3102	/* Set up smart power down as default off on newer adapters. */
3103	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3104	    hw->mac.type == e1000_82572)) {
3105		u16 phy_tmp = 0;
3106
3107		/* Speed up time to link by disabling smart power down. */
3108		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3109		phy_tmp &= ~IGP02E1000_PM_SPD;
3110		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3111	}
3112
3113	/*
3114	 * Packet Buffer Allocation (PBA)
3115	 * Writing PBA sets the receive portion of the buffer
3116	 * the remainder is used for the transmit buffer.
3117	 */
3118	switch (hw->mac.type) {
3119	/* Total Packet Buffer on these is 48K */
3120	case e1000_82571:
3121	case e1000_82572:
3122	case e1000_80003es2lan:
3123			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3124		break;
3125	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3126			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3127		break;
3128	case e1000_82574:
3129	case e1000_82583:
3130			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3131		break;
3132	case e1000_ich8lan:
3133		pba = E1000_PBA_8K;
3134		break;
3135	case e1000_ich9lan:
3136	case e1000_ich10lan:
3137		/* Boost Receive side for jumbo frames */
3138		if (adapter->hw.mac.max_frame_size > 4096)
3139			pba = E1000_PBA_14K;
3140		else
3141			pba = E1000_PBA_10K;
3142		break;
3143	case e1000_pchlan:
3144	case e1000_pch2lan:
3145	case e1000_pch_lpt:
3146	case e1000_pch_spt:
3147	case e1000_pch_cnp:
3148		pba = E1000_PBA_26K;
3149		break;
3150	default:
3151		if (adapter->hw.mac.max_frame_size > 8192)
3152			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3153		else
3154			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3155	}
3156	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3157
3158	/*
3159	 * These parameters control the automatic generation (Tx) and
3160	 * response (Rx) to Ethernet PAUSE frames.
3161	 * - High water mark should allow for at least two frames to be
3162	 *   received after sending an XOFF.
3163	 * - Low water mark works best when it is very near the high water mark.
3164	 *   This allows the receiver to restart by sending XON when it has
3165	 *   drained a bit. Here we use an arbitary value of 1500 which will
3166	 *   restart after one full frame is pulled from the buffer. There
3167	 *   could be several smaller frames in the buffer and if so they will
3168	 *   not trigger the XON until their total number reduces the buffer
3169	 *   by 1500.
3170	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3171	 */
3172	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3173	hw->fc.high_water = rx_buffer_size -
3174	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3175	hw->fc.low_water = hw->fc.high_water - 1500;
3176
3177	if (adapter->fc) /* locally set flow control value? */
3178		hw->fc.requested_mode = adapter->fc;
3179	else
3180		hw->fc.requested_mode = e1000_fc_full;
3181
3182	if (hw->mac.type == e1000_80003es2lan)
3183		hw->fc.pause_time = 0xFFFF;
3184	else
3185		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3186
3187	hw->fc.send_xon = TRUE;
3188
3189	/* Device specific overrides/settings */
3190	switch (hw->mac.type) {
3191	case e1000_pchlan:
3192		/* Workaround: no TX flow ctrl for PCH */
3193                hw->fc.requested_mode = e1000_fc_rx_pause;
3194		hw->fc.pause_time = 0xFFFF; /* override */
3195		if (ifp->if_mtu > ETHERMTU) {
3196			hw->fc.high_water = 0x3500;
3197			hw->fc.low_water = 0x1500;
3198		} else {
3199			hw->fc.high_water = 0x5000;
3200			hw->fc.low_water = 0x3000;
3201		}
3202		hw->fc.refresh_time = 0x1000;
3203		break;
3204	case e1000_pch2lan:
3205	case e1000_pch_lpt:
3206	case e1000_pch_spt:
3207	case e1000_pch_cnp:
3208		hw->fc.high_water = 0x5C20;
3209		hw->fc.low_water = 0x5048;
3210		hw->fc.pause_time = 0x0650;
3211		hw->fc.refresh_time = 0x0400;
3212		/* Jumbos need adjusted PBA */
3213		if (ifp->if_mtu > ETHERMTU)
3214			E1000_WRITE_REG(hw, E1000_PBA, 12);
3215		else
3216			E1000_WRITE_REG(hw, E1000_PBA, 26);
3217		break;
3218        case e1000_ich9lan:
3219        case e1000_ich10lan:
3220		if (ifp->if_mtu > ETHERMTU) {
3221			hw->fc.high_water = 0x2800;
3222			hw->fc.low_water = hw->fc.high_water - 8;
3223			break;
3224		}
3225		/* else fall thru */
3226	default:
3227		if (hw->mac.type == e1000_80003es2lan)
3228			hw->fc.pause_time = 0xFFFF;
3229		break;
3230	}
3231
3232	/* I219 needs some special flushing to avoid hangs */
3233	if (hw->mac.type == e1000_pch_spt)
3234		em_flush_desc_rings(adapter);
3235
3236	/* Issue a global reset */
3237	e1000_reset_hw(hw);
3238	E1000_WRITE_REG(hw, E1000_WUC, 0);
3239	em_disable_aspm(adapter);
3240	/* and a re-init */
3241	if (e1000_init_hw(hw) < 0) {
3242		device_printf(dev, "Hardware Initialization Failed\n");
3243		return;
3244	}
3245
3246	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3247	e1000_get_phy_info(hw);
3248	e1000_check_for_link(hw);
3249	return;
3250}
3251
3252/*********************************************************************
3253 *
3254 *  Setup networking device structure and register an interface.
3255 *
3256 **********************************************************************/
3257static int
3258em_setup_interface(device_t dev, struct adapter *adapter)
3259{
3260	struct ifnet   *ifp;
3261
3262	INIT_DEBUGOUT("em_setup_interface: begin");
3263
3264	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3265	if (ifp == NULL) {
3266		device_printf(dev, "can not allocate ifnet structure\n");
3267		return (-1);
3268	}
3269	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3270	ifp->if_init =  em_init;
3271	ifp->if_softc = adapter;
3272	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3273	ifp->if_ioctl = em_ioctl;
3274
3275	/* TSO parameters */
3276	ifp->if_hw_tsomax = IP_MAXPACKET;
3277	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3278	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3279	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3280
3281#ifdef EM_MULTIQUEUE
3282	/* Multiqueue stack interface */
3283	ifp->if_transmit = em_mq_start;
3284	ifp->if_qflush = em_qflush;
3285#else
3286	ifp->if_start = em_start;
3287	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3288	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3289	IFQ_SET_READY(&ifp->if_snd);
3290#endif
3291
3292	ether_ifattach(ifp, adapter->hw.mac.addr);
3293
3294	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3295	ifp->if_capenable = ifp->if_capabilities;
3296
3297	/*
3298	 * Tell the upper layer(s) we
3299	 * support full VLAN capability
3300	 */
3301	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3302	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3303			     |  IFCAP_VLAN_HWTSO
3304			     |  IFCAP_VLAN_MTU;
3305	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3306			  |  IFCAP_VLAN_MTU;
3307
3308	/*
3309	 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3310	 * - Although the silicon bug of TSO only working at gigabit speed is
3311	 *   worked around in em_update_link_status() by selectively setting
3312	 *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3313	 *   descriptors.  Thus, such descriptors may still cause the MAC to
3314	 *   hang and, consequently, TSO is only safe to be used in setups
3315	 *   where the link isn't expected to switch from gigabit to lower
3316	 *   speeds.
3317	 * - Similarly, there's currently no way to trigger a reconfiguration
3318	 *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3319	 *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3320	 *   when link speed changes are not to be expected.
3321	 * - Despite all the workarounds for TSO-related silicon bugs, at
3322	 *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3323	 */
3324	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3325
3326	/*
3327	** Don't turn this on by default, if vlans are
3328	** created on another pseudo device (eg. lagg)
3329	** then vlan events are not passed thru, breaking
3330	** operation, but with HW FILTER off it works. If
3331	** using vlans directly on the em driver you can
3332	** enable this and get full hardware tag filtering.
3333	*/
3334	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3335
3336#ifdef DEVICE_POLLING
3337	ifp->if_capabilities |= IFCAP_POLLING;
3338#endif
3339
3340	/* Enable only WOL MAGIC by default */
3341	if (adapter->wol) {
3342		ifp->if_capabilities |= IFCAP_WOL;
3343		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3344	}
3345
3346	/*
3347	 * Specify the media types supported by this adapter and register
3348	 * callbacks to update media and link information
3349	 */
3350	ifmedia_init(&adapter->media, IFM_IMASK,
3351	    em_media_change, em_media_status);
3352	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3353	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3354		u_char fiber_type = IFM_1000_SX;	/* default type */
3355
3356		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3357			    0, NULL);
3358		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3359	} else {
3360		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3361		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3362			    0, NULL);
3363		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3364			    0, NULL);
3365		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3366			    0, NULL);
3367		if (adapter->hw.phy.type != e1000_phy_ife) {
3368			ifmedia_add(&adapter->media,
3369				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3370			ifmedia_add(&adapter->media,
3371				IFM_ETHER | IFM_1000_T, 0, NULL);
3372		}
3373	}
3374	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3375	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3376	return (0);
3377}
3378
3379
3380/*
3381 * Manage DMA'able memory.
3382 */
3383static void
3384em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3385{
3386	if (error)
3387		return;
3388	*(bus_addr_t *) arg = segs[0].ds_addr;
3389}
3390
3391static int
3392em_dma_malloc(struct adapter *adapter, bus_size_t size,
3393        struct em_dma_alloc *dma, int mapflags)
3394{
3395	int error;
3396
3397	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3398				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3399				BUS_SPACE_MAXADDR,	/* lowaddr */
3400				BUS_SPACE_MAXADDR,	/* highaddr */
3401				NULL, NULL,		/* filter, filterarg */
3402				size,			/* maxsize */
3403				1,			/* nsegments */
3404				size,			/* maxsegsize */
3405				0,			/* flags */
3406				NULL,			/* lockfunc */
3407				NULL,			/* lockarg */
3408				&dma->dma_tag);
3409	if (error) {
3410		device_printf(adapter->dev,
3411		    "%s: bus_dma_tag_create failed: %d\n",
3412		    __func__, error);
3413		goto fail_0;
3414	}
3415
3416	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3417	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3418	if (error) {
3419		device_printf(adapter->dev,
3420		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3421		    __func__, (uintmax_t)size, error);
3422		goto fail_2;
3423	}
3424
3425	dma->dma_paddr = 0;
3426	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3427	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3428	if (error || dma->dma_paddr == 0) {
3429		device_printf(adapter->dev,
3430		    "%s: bus_dmamap_load failed: %d\n",
3431		    __func__, error);
3432		goto fail_3;
3433	}
3434
3435	return (0);
3436
3437fail_3:
3438	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3439fail_2:
3440	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3441	bus_dma_tag_destroy(dma->dma_tag);
3442fail_0:
3443	dma->dma_tag = NULL;
3444
3445	return (error);
3446}
3447
3448static void
3449em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3450{
3451	if (dma->dma_tag == NULL)
3452		return;
3453	if (dma->dma_paddr != 0) {
3454		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3455		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3456		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3457		dma->dma_paddr = 0;
3458	}
3459	if (dma->dma_vaddr != NULL) {
3460		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3461		dma->dma_vaddr = NULL;
3462	}
3463	bus_dma_tag_destroy(dma->dma_tag);
3464	dma->dma_tag = NULL;
3465}
3466
3467
3468/*********************************************************************
3469 *
3470 *  Allocate memory for the transmit and receive rings, and then
3471 *  the descriptors associated with each, called only once at attach.
3472 *
3473 **********************************************************************/
3474static int
3475em_allocate_queues(struct adapter *adapter)
3476{
3477	device_t		dev = adapter->dev;
3478	struct tx_ring		*txr = NULL;
3479	struct rx_ring		*rxr = NULL;
3480	int rsize, tsize, error = E1000_SUCCESS;
3481	int txconf = 0, rxconf = 0;
3482
3483
3484	/* Allocate the TX ring struct memory */
3485	if (!(adapter->tx_rings =
3486	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3487	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3488		device_printf(dev, "Unable to allocate TX ring memory\n");
3489		error = ENOMEM;
3490		goto fail;
3491	}
3492
3493	/* Now allocate the RX */
3494	if (!(adapter->rx_rings =
3495	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3496	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3497		device_printf(dev, "Unable to allocate RX ring memory\n");
3498		error = ENOMEM;
3499		goto rx_fail;
3500	}
3501
3502	tsize = roundup2(adapter->num_tx_desc *
3503	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3504	/*
3505	 * Now set up the TX queues, txconf is needed to handle the
3506	 * possibility that things fail midcourse and we need to
3507	 * undo memory gracefully
3508	 */
3509	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3510		/* Set up some basics */
3511		txr = &adapter->tx_rings[i];
3512		txr->adapter = adapter;
3513		txr->me = i;
3514
3515		/* Initialize the TX lock */
3516		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3517		    device_get_nameunit(dev), txr->me);
3518		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3519
3520		if (em_dma_malloc(adapter, tsize,
3521			&txr->txdma, BUS_DMA_NOWAIT)) {
3522			device_printf(dev,
3523			    "Unable to allocate TX Descriptor memory\n");
3524			error = ENOMEM;
3525			goto err_tx_desc;
3526		}
3527		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3528		bzero((void *)txr->tx_base, tsize);
3529
3530        	if (em_allocate_transmit_buffers(txr)) {
3531			device_printf(dev,
3532			    "Critical Failure setting up transmit buffers\n");
3533			error = ENOMEM;
3534			goto err_tx_desc;
3535        	}
3536#if __FreeBSD_version >= 800000
3537		/* Allocate a buf ring */
3538		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3539		    M_WAITOK, &txr->tx_mtx);
3540#endif
3541	}
3542
3543	/*
3544	 * Next the RX queues...
3545	 */
3546	rsize = roundup2(adapter->num_rx_desc *
3547	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3548	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3549		rxr = &adapter->rx_rings[i];
3550		rxr->adapter = adapter;
3551		rxr->me = i;
3552
3553		/* Initialize the RX lock */
3554		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3555		    device_get_nameunit(dev), txr->me);
3556		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3557
3558		if (em_dma_malloc(adapter, rsize,
3559			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3560			device_printf(dev,
3561			    "Unable to allocate RxDescriptor memory\n");
3562			error = ENOMEM;
3563			goto err_rx_desc;
3564		}
3565		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3566		bzero((void *)rxr->rx_base, rsize);
3567
3568        	/* Allocate receive buffers for the ring*/
3569		if (em_allocate_receive_buffers(rxr)) {
3570			device_printf(dev,
3571			    "Critical Failure setting up receive buffers\n");
3572			error = ENOMEM;
3573			goto err_rx_desc;
3574		}
3575	}
3576
3577	return (0);
3578
3579err_rx_desc:
3580	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3581		em_dma_free(adapter, &rxr->rxdma);
3582err_tx_desc:
3583	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3584		em_dma_free(adapter, &txr->txdma);
3585	free(adapter->rx_rings, M_DEVBUF);
3586rx_fail:
3587#if __FreeBSD_version >= 800000
3588	buf_ring_free(txr->br, M_DEVBUF);
3589#endif
3590	free(adapter->tx_rings, M_DEVBUF);
3591fail:
3592	return (error);
3593}
3594
3595
3596/*********************************************************************
3597 *
3598 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3599 *  the information needed to transmit a packet on the wire. This is
3600 *  called only once at attach, setup is done every reset.
3601 *
3602 **********************************************************************/
3603static int
3604em_allocate_transmit_buffers(struct tx_ring *txr)
3605{
3606	struct adapter *adapter = txr->adapter;
3607	device_t dev = adapter->dev;
3608	struct em_txbuffer *txbuf;
3609	int error, i;
3610
3611	/*
3612	 * Setup DMA descriptor areas.
3613	 */
3614	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3615			       1, 0,			/* alignment, bounds */
3616			       BUS_SPACE_MAXADDR,	/* lowaddr */
3617			       BUS_SPACE_MAXADDR,	/* highaddr */
3618			       NULL, NULL,		/* filter, filterarg */
3619			       EM_TSO_SIZE,		/* maxsize */
3620			       EM_MAX_SCATTER,		/* nsegments */
3621			       PAGE_SIZE,		/* maxsegsize */
3622			       0,			/* flags */
3623			       NULL,			/* lockfunc */
3624			       NULL,			/* lockfuncarg */
3625			       &txr->txtag))) {
3626		device_printf(dev,"Unable to allocate TX DMA tag\n");
3627		goto fail;
3628	}
3629
3630	if (!(txr->tx_buffers =
3631	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3632	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3633		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3634		error = ENOMEM;
3635		goto fail;
3636	}
3637
3638        /* Create the descriptor buffer dma maps */
3639	txbuf = txr->tx_buffers;
3640	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3641		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3642		if (error != 0) {
3643			device_printf(dev, "Unable to create TX DMA map\n");
3644			goto fail;
3645		}
3646	}
3647
3648	return 0;
3649fail:
3650	/* We free all, it handles case where we are in the middle */
3651	em_free_transmit_structures(adapter);
3652	return (error);
3653}
3654
3655/*********************************************************************
3656 *
3657 *  Initialize a transmit ring.
3658 *
3659 **********************************************************************/
3660static void
3661em_setup_transmit_ring(struct tx_ring *txr)
3662{
3663	struct adapter *adapter = txr->adapter;
3664	struct em_txbuffer *txbuf;
3665	int i;
3666#ifdef DEV_NETMAP
3667	struct netmap_adapter *na = NA(adapter->ifp);
3668	struct netmap_slot *slot;
3669#endif /* DEV_NETMAP */
3670
3671	/* Clear the old descriptor contents */
3672	EM_TX_LOCK(txr);
3673#ifdef DEV_NETMAP
3674	slot = netmap_reset(na, NR_TX, txr->me, 0);
3675#endif /* DEV_NETMAP */
3676
3677	bzero((void *)txr->tx_base,
3678	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3679	/* Reset indices */
3680	txr->next_avail_desc = 0;
3681	txr->next_to_clean = 0;
3682
3683	/* Free any existing tx buffers. */
3684        txbuf = txr->tx_buffers;
3685	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3686		if (txbuf->m_head != NULL) {
3687			bus_dmamap_sync(txr->txtag, txbuf->map,
3688			    BUS_DMASYNC_POSTWRITE);
3689			bus_dmamap_unload(txr->txtag, txbuf->map);
3690			m_freem(txbuf->m_head);
3691			txbuf->m_head = NULL;
3692		}
3693#ifdef DEV_NETMAP
3694		if (slot) {
3695			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3696			uint64_t paddr;
3697			void *addr;
3698
3699			addr = PNMB(na, slot + si, &paddr);
3700			txr->tx_base[i].buffer_addr = htole64(paddr);
3701			/* reload the map for netmap mode */
3702			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3703		}
3704#endif /* DEV_NETMAP */
3705
3706		/* clear the watch index */
3707		txbuf->next_eop = -1;
3708        }
3709
3710	/* Set number of descriptors available */
3711	txr->tx_avail = adapter->num_tx_desc;
3712	txr->busy = EM_TX_IDLE;
3713
3714	/* Clear checksum offload context. */
3715	txr->last_hw_offload = 0;
3716	txr->last_hw_ipcss = 0;
3717	txr->last_hw_ipcso = 0;
3718	txr->last_hw_tucss = 0;
3719	txr->last_hw_tucso = 0;
3720
3721	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3722	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3723	EM_TX_UNLOCK(txr);
3724}
3725
3726/*********************************************************************
3727 *
3728 *  Initialize all transmit rings.
3729 *
3730 **********************************************************************/
3731static void
3732em_setup_transmit_structures(struct adapter *adapter)
3733{
3734	struct tx_ring *txr = adapter->tx_rings;
3735
3736	for (int i = 0; i < adapter->num_queues; i++, txr++)
3737		em_setup_transmit_ring(txr);
3738
3739	return;
3740}
3741
3742/*********************************************************************
3743 *
3744 *  Enable transmit unit.
3745 *
3746 **********************************************************************/
3747static void
3748em_initialize_transmit_unit(struct adapter *adapter)
3749{
3750	struct tx_ring	*txr = adapter->tx_rings;
3751	struct e1000_hw	*hw = &adapter->hw;
3752	u32	tctl, txdctl = 0, tarc, tipg = 0;
3753
3754	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3755
3756	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3757		u64 bus_addr = txr->txdma.dma_paddr;
3758		/* Base and Len of TX Ring */
3759		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3760	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3761		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3762	    	    (u32)(bus_addr >> 32));
3763		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3764	    	    (u32)bus_addr);
3765		/* Init the HEAD/TAIL indices */
3766		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3767		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3768
3769		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3770		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3771		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3772
3773		txr->busy = EM_TX_IDLE;
3774		txdctl = 0; /* clear txdctl */
3775                txdctl |= 0x1f; /* PTHRESH */
3776                txdctl |= 1 << 8; /* HTHRESH */
3777                txdctl |= 1 << 16;/* WTHRESH */
3778		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3779		txdctl |= E1000_TXDCTL_GRAN;
3780                txdctl |= 1 << 25; /* LWTHRESH */
3781
3782                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3783	}
3784
3785	/* Set the default values for the Tx Inter Packet Gap timer */
3786	switch (adapter->hw.mac.type) {
3787	case e1000_80003es2lan:
3788		tipg = DEFAULT_82543_TIPG_IPGR1;
3789		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3790		    E1000_TIPG_IPGR2_SHIFT;
3791		break;
3792	default:
3793		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3794		    (adapter->hw.phy.media_type ==
3795		    e1000_media_type_internal_serdes))
3796			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3797		else
3798			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3799		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3800		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3801	}
3802
3803	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3804	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3805
3806	if(adapter->hw.mac.type >= e1000_82540)
3807		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3808		    adapter->tx_abs_int_delay.value);
3809
3810	if ((adapter->hw.mac.type == e1000_82571) ||
3811	    (adapter->hw.mac.type == e1000_82572)) {
3812		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3813		tarc |= TARC_SPEED_MODE_BIT;
3814		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3815	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3816		/* errata: program both queues to unweighted RR */
3817		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3818		tarc |= 1;
3819		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3820		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3821		tarc |= 1;
3822		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3823	} else if (adapter->hw.mac.type == e1000_82574) {
3824		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3825		tarc |= TARC_ERRATA_BIT;
3826		if ( adapter->num_queues > 1) {
3827			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3828			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3829			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3830		} else
3831			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3832	}
3833
3834	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3835	if (adapter->tx_int_delay.value > 0)
3836		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3837
3838	/* Program the Transmit Control Register */
3839	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3840	tctl &= ~E1000_TCTL_CT;
3841	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3842		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3843
3844	if (adapter->hw.mac.type >= e1000_82571)
3845		tctl |= E1000_TCTL_MULR;
3846
3847	/* This write will effectively turn on the transmit unit. */
3848	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3849
3850	/* SPT and KBL errata workarounds */
3851	if (hw->mac.type == e1000_pch_spt) {
3852		u32 reg;
3853		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3854		reg |= E1000_RCTL_RDMTS_HEX;
3855		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3856		/* i218-i219 Specification Update 1.5.4.5 */
3857		reg = E1000_READ_REG(hw, E1000_TARC(0));
3858		reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3859		reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3860		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3861	}
3862}
3863
3864
3865/*********************************************************************
3866 *
3867 *  Free all transmit rings.
3868 *
3869 **********************************************************************/
3870static void
3871em_free_transmit_structures(struct adapter *adapter)
3872{
3873	struct tx_ring *txr = adapter->tx_rings;
3874
3875	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3876		EM_TX_LOCK(txr);
3877		em_free_transmit_buffers(txr);
3878		em_dma_free(adapter, &txr->txdma);
3879		EM_TX_UNLOCK(txr);
3880		EM_TX_LOCK_DESTROY(txr);
3881	}
3882
3883	free(adapter->tx_rings, M_DEVBUF);
3884}
3885
3886/*********************************************************************
3887 *
3888 *  Free transmit ring related data structures.
3889 *
3890 **********************************************************************/
3891static void
3892em_free_transmit_buffers(struct tx_ring *txr)
3893{
3894	struct adapter		*adapter = txr->adapter;
3895	struct em_txbuffer	*txbuf;
3896
3897	INIT_DEBUGOUT("free_transmit_ring: begin");
3898
3899	if (txr->tx_buffers == NULL)
3900		return;
3901
3902	for (int i = 0; i < adapter->num_tx_desc; i++) {
3903		txbuf = &txr->tx_buffers[i];
3904		if (txbuf->m_head != NULL) {
3905			bus_dmamap_sync(txr->txtag, txbuf->map,
3906			    BUS_DMASYNC_POSTWRITE);
3907			bus_dmamap_unload(txr->txtag,
3908			    txbuf->map);
3909			m_freem(txbuf->m_head);
3910			txbuf->m_head = NULL;
3911			if (txbuf->map != NULL) {
3912				bus_dmamap_destroy(txr->txtag,
3913				    txbuf->map);
3914				txbuf->map = NULL;
3915			}
3916		} else if (txbuf->map != NULL) {
3917			bus_dmamap_unload(txr->txtag,
3918			    txbuf->map);
3919			bus_dmamap_destroy(txr->txtag,
3920			    txbuf->map);
3921			txbuf->map = NULL;
3922		}
3923	}
3924#if __FreeBSD_version >= 800000
3925	if (txr->br != NULL)
3926		buf_ring_free(txr->br, M_DEVBUF);
3927#endif
3928	if (txr->tx_buffers != NULL) {
3929		free(txr->tx_buffers, M_DEVBUF);
3930		txr->tx_buffers = NULL;
3931	}
3932	if (txr->txtag != NULL) {
3933		bus_dma_tag_destroy(txr->txtag);
3934		txr->txtag = NULL;
3935	}
3936	return;
3937}
3938
3939
3940/*********************************************************************
3941 *  The offload context is protocol specific (TCP/UDP) and thus
3942 *  only needs to be set when the protocol changes. The occasion
3943 *  of a context change can be a performance detriment, and
3944 *  might be better just disabled. The reason arises in the way
3945 *  in which the controller supports pipelined requests from the
3946 *  Tx data DMA. Up to four requests can be pipelined, and they may
3947 *  belong to the same packet or to multiple packets. However all
3948 *  requests for one packet are issued before a request is issued
3949 *  for a subsequent packet and if a request for the next packet
3950 *  requires a context change, that request will be stalled
3951 *  until the previous request completes. This means setting up
3952 *  a new context effectively disables pipelined Tx data DMA which
3953 *  in turn greatly slow down performance to send small sized
3954 *  frames.
3955 **********************************************************************/
3956static void
3957em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3958    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3959{
3960	struct adapter			*adapter = txr->adapter;
3961	struct e1000_context_desc	*TXD = NULL;
3962	struct em_txbuffer		*tx_buffer;
3963	int				cur, hdr_len;
3964	u32				cmd = 0;
3965	u16				offload = 0;
3966	u8				ipcso, ipcss, tucso, tucss;
3967
3968	ipcss = ipcso = tucss = tucso = 0;
3969	hdr_len = ip_off + (ip->ip_hl << 2);
3970	cur = txr->next_avail_desc;
3971
3972	/* Setup of IP header checksum. */
3973	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3974		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3975		offload |= CSUM_IP;
3976		ipcss = ip_off;
3977		ipcso = ip_off + offsetof(struct ip, ip_sum);
3978		/*
3979		 * Start offset for header checksum calculation.
3980		 * End offset for header checksum calculation.
3981		 * Offset of place to put the checksum.
3982		 */
3983		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3984		TXD->lower_setup.ip_fields.ipcss = ipcss;
3985		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3986		TXD->lower_setup.ip_fields.ipcso = ipcso;
3987		cmd |= E1000_TXD_CMD_IP;
3988	}
3989
3990	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3991 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3992 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3993 		offload |= CSUM_TCP;
3994 		tucss = hdr_len;
3995 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3996		/*
3997		 * The 82574L can only remember the *last* context used
3998		 * regardless of queue that it was use for.  We cannot reuse
3999		 * contexts on this hardware platform and must generate a new
4000		 * context every time.  82574L hardware spec, section 7.2.6,
4001		 * second note.
4002		 */
4003		if (adapter->num_queues < 2) {
4004 			/*
4005 		 	* Setting up new checksum offload context for every
4006			* frames takes a lot of processing time for hardware.
4007			* This also reduces performance a lot for small sized
4008			* frames so avoid it if driver can use previously
4009			* configured checksum offload context.
4010 		 	*/
4011 			if (txr->last_hw_offload == offload) {
4012 				if (offload & CSUM_IP) {
4013 					if (txr->last_hw_ipcss == ipcss &&
4014 				    	txr->last_hw_ipcso == ipcso &&
4015 				    	txr->last_hw_tucss == tucss &&
4016 				    	txr->last_hw_tucso == tucso)
4017 						return;
4018 				} else {
4019 					if (txr->last_hw_tucss == tucss &&
4020 				    	txr->last_hw_tucso == tucso)
4021 						return;
4022 				}
4023  			}
4024 			txr->last_hw_offload = offload;
4025 			txr->last_hw_tucss = tucss;
4026 			txr->last_hw_tucso = tucso;
4027		}
4028 		/*
4029 		 * Start offset for payload checksum calculation.
4030 		 * End offset for payload checksum calculation.
4031 		 * Offset of place to put the checksum.
4032 		 */
4033		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4034 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
4035 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
4036 		TXD->upper_setup.tcp_fields.tucso = tucso;
4037 		cmd |= E1000_TXD_CMD_TCP;
4038 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4039 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4040 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4041 		tucss = hdr_len;
4042 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4043		/*
4044		 * The 82574L can only remember the *last* context used
4045		 * regardless of queue that it was use for.  We cannot reuse
4046		 * contexts on this hardware platform and must generate a new
4047		 * context every time.  82574L hardware spec, section 7.2.6,
4048		 * second note.
4049		 */
4050		if (adapter->num_queues < 2) {
4051 			/*
4052 		 	* Setting up new checksum offload context for every
4053			* frames takes a lot of processing time for hardware.
4054			* This also reduces performance a lot for small sized
4055			* frames so avoid it if driver can use previously
4056			* configured checksum offload context.
4057 		 	*/
4058 			if (txr->last_hw_offload == offload) {
4059 				if (offload & CSUM_IP) {
4060 					if (txr->last_hw_ipcss == ipcss &&
4061 				    	txr->last_hw_ipcso == ipcso &&
4062 				    	txr->last_hw_tucss == tucss &&
4063 				    	txr->last_hw_tucso == tucso)
4064 						return;
4065 				} else {
4066 					if (txr->last_hw_tucss == tucss &&
4067 				    	txr->last_hw_tucso == tucso)
4068 						return;
4069 				}
4070 			}
4071 			txr->last_hw_offload = offload;
4072 			txr->last_hw_tucss = tucss;
4073 			txr->last_hw_tucso = tucso;
4074		}
4075 		/*
4076 		 * Start offset for header checksum calculation.
4077 		 * End offset for header checksum calculation.
4078 		 * Offset of place to put the checksum.
4079 		 */
4080		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4081 		TXD->upper_setup.tcp_fields.tucss = tucss;
4082 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
4083 		TXD->upper_setup.tcp_fields.tucso = tucso;
4084  	}
4085
4086 	if (offload & CSUM_IP) {
4087 		txr->last_hw_ipcss = ipcss;
4088 		txr->last_hw_ipcso = ipcso;
4089  	}
4090
4091	TXD->tcp_seg_setup.data = htole32(0);
4092	TXD->cmd_and_length =
4093	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4094	tx_buffer = &txr->tx_buffers[cur];
4095	tx_buffer->m_head = NULL;
4096	tx_buffer->next_eop = -1;
4097
4098	if (++cur == adapter->num_tx_desc)
4099		cur = 0;
4100
4101	txr->tx_avail--;
4102	txr->next_avail_desc = cur;
4103}
4104
4105
4106/**********************************************************************
4107 *
4108 *  Setup work for hardware segmentation offload (TSO)
4109 *
4110 **********************************************************************/
4111static void
4112em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4113    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4114{
4115	struct adapter			*adapter = txr->adapter;
4116	struct e1000_context_desc	*TXD;
4117	struct em_txbuffer		*tx_buffer;
4118	int cur, hdr_len;
4119
4120	/*
4121	 * In theory we can use the same TSO context if and only if
4122	 * frame is the same type(IP/TCP) and the same MSS. However
4123	 * checking whether a frame has the same IP/TCP structure is
4124	 * hard thing so just ignore that and always restablish a
4125	 * new TSO context.
4126	 */
4127	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4128	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4129		      E1000_TXD_DTYP_D |	/* Data descr type */
4130		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4131
4132	/* IP and/or TCP header checksum calculation and insertion. */
4133	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4134
4135	cur = txr->next_avail_desc;
4136	tx_buffer = &txr->tx_buffers[cur];
4137	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4138
4139	/*
4140	 * Start offset for header checksum calculation.
4141	 * End offset for header checksum calculation.
4142	 * Offset of place put the checksum.
4143	 */
4144	TXD->lower_setup.ip_fields.ipcss = ip_off;
4145	TXD->lower_setup.ip_fields.ipcse =
4146	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4147	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4148	/*
4149	 * Start offset for payload checksum calculation.
4150	 * End offset for payload checksum calculation.
4151	 * Offset of place to put the checksum.
4152	 */
4153	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4154	TXD->upper_setup.tcp_fields.tucse = 0;
4155	TXD->upper_setup.tcp_fields.tucso =
4156	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4157	/*
4158	 * Payload size per packet w/o any headers.
4159	 * Length of all headers up to payload.
4160	 */
4161	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4162	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4163
4164	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4165				E1000_TXD_CMD_DEXT |	/* Extended descr */
4166				E1000_TXD_CMD_TSE |	/* TSE context */
4167				E1000_TXD_CMD_IP |	/* Do IP csum */
4168				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4169				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4170
4171	tx_buffer->m_head = NULL;
4172	tx_buffer->next_eop = -1;
4173
4174	if (++cur == adapter->num_tx_desc)
4175		cur = 0;
4176
4177	txr->tx_avail--;
4178	txr->next_avail_desc = cur;
4179	txr->tx_tso = TRUE;
4180}
4181
4182
4183/**********************************************************************
4184 *
4185 *  Examine each tx_buffer in the used queue. If the hardware is done
4186 *  processing the packet then free associated resources. The
4187 *  tx_buffer is put back on the free queue.
4188 *
4189 **********************************************************************/
4190static void
4191em_txeof(struct tx_ring *txr)
4192{
4193	struct adapter	*adapter = txr->adapter;
4194        int first, last, done, processed;
4195        struct em_txbuffer *tx_buffer;
4196        struct e1000_tx_desc   *tx_desc, *eop_desc;
4197	struct ifnet   *ifp = adapter->ifp;
4198
4199	EM_TX_LOCK_ASSERT(txr);
4200#ifdef DEV_NETMAP
4201	if (netmap_tx_irq(ifp, txr->me))
4202		return;
4203#endif /* DEV_NETMAP */
4204
4205	/* No work, make sure hang detection is disabled */
4206        if (txr->tx_avail == adapter->num_tx_desc) {
4207		txr->busy = EM_TX_IDLE;
4208                return;
4209	}
4210
4211	processed = 0;
4212        first = txr->next_to_clean;
4213        tx_desc = &txr->tx_base[first];
4214        tx_buffer = &txr->tx_buffers[first];
4215	last = tx_buffer->next_eop;
4216        eop_desc = &txr->tx_base[last];
4217
4218	/*
4219	 * What this does is get the index of the
4220	 * first descriptor AFTER the EOP of the
4221	 * first packet, that way we can do the
4222	 * simple comparison on the inner while loop.
4223	 */
4224	if (++last == adapter->num_tx_desc)
4225 		last = 0;
4226	done = last;
4227
4228        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4229            BUS_DMASYNC_POSTREAD);
4230
4231        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4232		/* We clean the range of the packet */
4233		while (first != done) {
4234                	tx_desc->upper.data = 0;
4235                	tx_desc->lower.data = 0;
4236                	tx_desc->buffer_addr = 0;
4237                	++txr->tx_avail;
4238			++processed;
4239
4240			if (tx_buffer->m_head) {
4241				bus_dmamap_sync(txr->txtag,
4242				    tx_buffer->map,
4243				    BUS_DMASYNC_POSTWRITE);
4244				bus_dmamap_unload(txr->txtag,
4245				    tx_buffer->map);
4246                        	m_freem(tx_buffer->m_head);
4247                        	tx_buffer->m_head = NULL;
4248                	}
4249			tx_buffer->next_eop = -1;
4250
4251	                if (++first == adapter->num_tx_desc)
4252				first = 0;
4253
4254	                tx_buffer = &txr->tx_buffers[first];
4255			tx_desc = &txr->tx_base[first];
4256		}
4257		++ifp->if_opackets;
4258		/* See if we can continue to the next packet */
4259		last = tx_buffer->next_eop;
4260		if (last != -1) {
4261        		eop_desc = &txr->tx_base[last];
4262			/* Get new done point */
4263			if (++last == adapter->num_tx_desc) last = 0;
4264			done = last;
4265		} else
4266			break;
4267        }
4268        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4269            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4270
4271        txr->next_to_clean = first;
4272
4273	/*
4274	** Hang detection: we know there's work outstanding
4275	** or the entry return would have been taken, so no
4276	** descriptor processed here indicates a potential hang.
4277	** The local timer will examine this and do a reset if needed.
4278	*/
4279	if (processed == 0) {
4280		if (txr->busy != EM_TX_HUNG)
4281			++txr->busy;
4282	} else /* At least one descriptor was cleaned */
4283		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4284
4285        /*
4286         * If we have a minimum free, clear IFF_DRV_OACTIVE
4287         * to tell the stack that it is OK to send packets.
4288	 * Notice that all writes of OACTIVE happen under the
4289	 * TX lock which, with a single queue, guarantees
4290	 * sanity.
4291         */
4292        if (txr->tx_avail >= EM_MAX_SCATTER) {
4293		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4294	}
4295
4296	/* Disable hang detection if all clean */
4297	if (txr->tx_avail == adapter->num_tx_desc)
4298		txr->busy = EM_TX_IDLE;
4299}
4300
4301/*********************************************************************
4302 *
4303 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4304 *
4305 **********************************************************************/
4306static void
4307em_refresh_mbufs(struct rx_ring *rxr, int limit)
4308{
4309	struct adapter		*adapter = rxr->adapter;
4310	struct mbuf		*m;
4311	bus_dma_segment_t	segs;
4312	struct em_rxbuffer	*rxbuf;
4313	int			i, j, error, nsegs;
4314	bool			cleaned = FALSE;
4315
4316	i = j = rxr->next_to_refresh;
4317	/*
4318	** Get one descriptor beyond
4319	** our work mark to control
4320	** the loop.
4321	*/
4322	if (++j == adapter->num_rx_desc)
4323		j = 0;
4324
4325	while (j != limit) {
4326		rxbuf = &rxr->rx_buffers[i];
4327		if (rxbuf->m_head == NULL) {
4328			m = m_getjcl(M_NOWAIT, MT_DATA,
4329			    M_PKTHDR, adapter->rx_mbuf_sz);
4330			/*
4331			** If we have a temporary resource shortage
4332			** that causes a failure, just abort refresh
4333			** for now, we will return to this point when
4334			** reinvoked from em_rxeof.
4335			*/
4336			if (m == NULL)
4337				goto update;
4338		} else
4339			m = rxbuf->m_head;
4340
4341		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4342		m->m_flags |= M_PKTHDR;
4343		m->m_data = m->m_ext.ext_buf;
4344
4345		/* Use bus_dma machinery to setup the memory mapping  */
4346		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4347		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4348		if (error != 0) {
4349			printf("Refresh mbufs: hdr dmamap load"
4350			    " failure - %d\n", error);
4351			m_free(m);
4352			rxbuf->m_head = NULL;
4353			goto update;
4354		}
4355		rxbuf->m_head = m;
4356		rxbuf->paddr = segs.ds_addr;
4357		bus_dmamap_sync(rxr->rxtag,
4358		    rxbuf->map, BUS_DMASYNC_PREREAD);
4359		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4360		cleaned = TRUE;
4361
4362		i = j; /* Next is precalulated for us */
4363		rxr->next_to_refresh = i;
4364		/* Calculate next controlling index */
4365		if (++j == adapter->num_rx_desc)
4366			j = 0;
4367	}
4368update:
4369	/*
4370	** Update the tail pointer only if,
4371	** and as far as we have refreshed.
4372	*/
4373	if (cleaned)
4374		E1000_WRITE_REG(&adapter->hw,
4375		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4376
4377	return;
4378}
4379
4380
4381/*********************************************************************
4382 *
4383 *  Allocate memory for rx_buffer structures. Since we use one
4384 *  rx_buffer per received packet, the maximum number of rx_buffer's
4385 *  that we'll need is equal to the number of receive descriptors
4386 *  that we've allocated.
4387 *
4388 **********************************************************************/
4389static int
4390em_allocate_receive_buffers(struct rx_ring *rxr)
4391{
4392	struct adapter		*adapter = rxr->adapter;
4393	device_t		dev = adapter->dev;
4394	struct em_rxbuffer	*rxbuf;
4395	int			error;
4396
4397	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4398	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4399	if (rxr->rx_buffers == NULL) {
4400		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4401		return (ENOMEM);
4402	}
4403
4404	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4405				1, 0,			/* alignment, bounds */
4406				BUS_SPACE_MAXADDR,	/* lowaddr */
4407				BUS_SPACE_MAXADDR,	/* highaddr */
4408				NULL, NULL,		/* filter, filterarg */
4409				MJUM9BYTES,		/* maxsize */
4410				1,			/* nsegments */
4411				MJUM9BYTES,		/* maxsegsize */
4412				0,			/* flags */
4413				NULL,			/* lockfunc */
4414				NULL,			/* lockarg */
4415				&rxr->rxtag);
4416	if (error) {
4417		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4418		    __func__, error);
4419		goto fail;
4420	}
4421
4422	rxbuf = rxr->rx_buffers;
4423	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4424		rxbuf = &rxr->rx_buffers[i];
4425		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4426		if (error) {
4427			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4428			    __func__, error);
4429			goto fail;
4430		}
4431	}
4432
4433	return (0);
4434
4435fail:
4436	em_free_receive_structures(adapter);
4437	return (error);
4438}
4439
4440
4441/*********************************************************************
4442 *
4443 *  Initialize a receive ring and its buffers.
4444 *
4445 **********************************************************************/
4446static int
4447em_setup_receive_ring(struct rx_ring *rxr)
4448{
4449	struct	adapter 	*adapter = rxr->adapter;
4450	struct em_rxbuffer	*rxbuf;
4451	bus_dma_segment_t	seg[1];
4452	int			rsize, nsegs, error = 0;
4453#ifdef DEV_NETMAP
4454	struct netmap_adapter *na = NA(adapter->ifp);
4455	struct netmap_slot *slot;
4456#endif
4457
4458
4459	/* Clear the ring contents */
4460	EM_RX_LOCK(rxr);
4461	rsize = roundup2(adapter->num_rx_desc *
4462	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4463	bzero((void *)rxr->rx_base, rsize);
4464#ifdef DEV_NETMAP
4465	slot = netmap_reset(na, NR_RX, 0, 0);
4466#endif
4467
4468	/*
4469	** Free current RX buffer structs and their mbufs
4470	*/
4471	for (int i = 0; i < adapter->num_rx_desc; i++) {
4472		rxbuf = &rxr->rx_buffers[i];
4473		if (rxbuf->m_head != NULL) {
4474			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4475			    BUS_DMASYNC_POSTREAD);
4476			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4477			m_freem(rxbuf->m_head);
4478			rxbuf->m_head = NULL; /* mark as freed */
4479		}
4480	}
4481
4482	/* Now replenish the mbufs */
4483        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4484		rxbuf = &rxr->rx_buffers[j];
4485#ifdef DEV_NETMAP
4486		if (slot) {
4487			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4488			uint64_t paddr;
4489			void *addr;
4490
4491			addr = PNMB(na, slot + si, &paddr);
4492			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4493			rxbuf->paddr = paddr;
4494			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4495			continue;
4496		}
4497#endif /* DEV_NETMAP */
4498		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4499		    M_PKTHDR, adapter->rx_mbuf_sz);
4500		if (rxbuf->m_head == NULL) {
4501			error = ENOBUFS;
4502			goto fail;
4503		}
4504		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4505		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4506		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4507
4508		/* Get the memory mapping */
4509		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4510		    rxbuf->map, rxbuf->m_head, seg,
4511		    &nsegs, BUS_DMA_NOWAIT);
4512		if (error != 0) {
4513			m_freem(rxbuf->m_head);
4514			rxbuf->m_head = NULL;
4515			goto fail;
4516		}
4517		bus_dmamap_sync(rxr->rxtag,
4518		    rxbuf->map, BUS_DMASYNC_PREREAD);
4519
4520		rxbuf->paddr = seg[0].ds_addr;
4521		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4522	}
4523	rxr->next_to_check = 0;
4524	rxr->next_to_refresh = 0;
4525	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4526	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4527
4528fail:
4529	EM_RX_UNLOCK(rxr);
4530	return (error);
4531}
4532
4533/*********************************************************************
4534 *
4535 *  Initialize all receive rings.
4536 *
4537 **********************************************************************/
4538static int
4539em_setup_receive_structures(struct adapter *adapter)
4540{
4541	struct rx_ring *rxr = adapter->rx_rings;
4542	int q;
4543
4544	for (q = 0; q < adapter->num_queues; q++, rxr++)
4545		if (em_setup_receive_ring(rxr))
4546			goto fail;
4547
4548	return (0);
4549fail:
4550	/*
4551	 * Free RX buffers allocated so far, we will only handle
4552	 * the rings that completed, the failing case will have
4553	 * cleaned up for itself. 'q' failed, so its the terminus.
4554	 */
4555	for (int i = 0; i < q; ++i) {
4556		rxr = &adapter->rx_rings[i];
4557		for (int n = 0; n < adapter->num_rx_desc; n++) {
4558			struct em_rxbuffer *rxbuf;
4559			rxbuf = &rxr->rx_buffers[n];
4560			if (rxbuf->m_head != NULL) {
4561				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4562			  	  BUS_DMASYNC_POSTREAD);
4563				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4564				m_freem(rxbuf->m_head);
4565				rxbuf->m_head = NULL;
4566			}
4567		}
4568		rxr->next_to_check = 0;
4569		rxr->next_to_refresh = 0;
4570	}
4571
4572	return (ENOBUFS);
4573}
4574
4575/*********************************************************************
4576 *
4577 *  Free all receive rings.
4578 *
4579 **********************************************************************/
4580static void
4581em_free_receive_structures(struct adapter *adapter)
4582{
4583	struct rx_ring *rxr = adapter->rx_rings;
4584
4585	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4586		em_free_receive_buffers(rxr);
4587		/* Free the ring memory as well */
4588		em_dma_free(adapter, &rxr->rxdma);
4589		EM_RX_LOCK_DESTROY(rxr);
4590	}
4591
4592	free(adapter->rx_rings, M_DEVBUF);
4593}
4594
4595
4596/*********************************************************************
4597 *
4598 *  Free receive ring data structures
4599 *
4600 **********************************************************************/
4601static void
4602em_free_receive_buffers(struct rx_ring *rxr)
4603{
4604	struct adapter		*adapter = rxr->adapter;
4605	struct em_rxbuffer	*rxbuf = NULL;
4606
4607	INIT_DEBUGOUT("free_receive_buffers: begin");
4608
4609	if (rxr->rx_buffers != NULL) {
4610		for (int i = 0; i < adapter->num_rx_desc; i++) {
4611			rxbuf = &rxr->rx_buffers[i];
4612			if (rxbuf->map != NULL) {
4613				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4614				    BUS_DMASYNC_POSTREAD);
4615				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4616				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4617			}
4618			if (rxbuf->m_head != NULL) {
4619				m_freem(rxbuf->m_head);
4620				rxbuf->m_head = NULL;
4621			}
4622		}
4623		free(rxr->rx_buffers, M_DEVBUF);
4624		rxr->rx_buffers = NULL;
4625		rxr->next_to_check = 0;
4626		rxr->next_to_refresh = 0;
4627	}
4628
4629	if (rxr->rxtag != NULL) {
4630		bus_dma_tag_destroy(rxr->rxtag);
4631		rxr->rxtag = NULL;
4632	}
4633
4634	return;
4635}
4636
4637
4638/*********************************************************************
4639 *
4640 *  Enable receive unit.
4641 *
4642 **********************************************************************/
4643
4644static void
4645em_initialize_receive_unit(struct adapter *adapter)
4646{
4647	struct rx_ring *rxr = adapter->rx_rings;
4648	struct ifnet	*ifp = adapter->ifp;
4649	struct e1000_hw	*hw = &adapter->hw;
4650	u32	rctl, rxcsum, rfctl;
4651
4652	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4653
4654	/*
4655	 * Make sure receives are disabled while setting
4656	 * up the descriptor ring
4657	 */
4658	rctl = E1000_READ_REG(hw, E1000_RCTL);
4659	/* Do not disable if ever enabled on this hardware */
4660	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4661		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4662
4663	/* Setup the Receive Control Register */
4664	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4665	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4666	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4667	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4668
4669	/* Do not store bad packets */
4670	rctl &= ~E1000_RCTL_SBP;
4671
4672	/* Enable Long Packet receive */
4673	if (ifp->if_mtu > ETHERMTU)
4674		rctl |= E1000_RCTL_LPE;
4675	else
4676		rctl &= ~E1000_RCTL_LPE;
4677
4678        /* Strip the CRC */
4679        if (!em_disable_crc_stripping)
4680		rctl |= E1000_RCTL_SECRC;
4681
4682	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4683	    adapter->rx_abs_int_delay.value);
4684
4685	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4686	    adapter->rx_int_delay.value);
4687	/*
4688	 * Set the interrupt throttling rate. Value is calculated
4689	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4690	 */
4691	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4692
4693	/* Use extended rx descriptor formats */
4694	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4695	rfctl |= E1000_RFCTL_EXTEN;
4696	/*
4697	** When using MSIX interrupts we need to throttle
4698	** using the EITR register (82574 only)
4699	*/
4700	if (hw->mac.type == e1000_82574) {
4701		for (int i = 0; i < 4; i++)
4702			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4703			    DEFAULT_ITR);
4704		/* Disable accelerated acknowledge */
4705		rfctl |= E1000_RFCTL_ACK_DIS;
4706	}
4707	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4708
4709	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4710	if (ifp->if_capenable & IFCAP_RXCSUM) {
4711#ifdef EM_MULTIQUEUE
4712		rxcsum |= E1000_RXCSUM_TUOFL |
4713			  E1000_RXCSUM_IPOFL |
4714			  E1000_RXCSUM_PCSD;
4715#else
4716		rxcsum |= E1000_RXCSUM_TUOFL;
4717#endif
4718	} else
4719		rxcsum &= ~E1000_RXCSUM_TUOFL;
4720
4721	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4722
4723#ifdef EM_MULTIQUEUE
4724#define RSSKEYLEN 10
4725	if (adapter->num_queues > 1) {
4726		uint8_t  rss_key[4 * RSSKEYLEN];
4727		uint32_t reta = 0;
4728		int i;
4729
4730		/*
4731		* Configure RSS key
4732		*/
4733		arc4rand(rss_key, sizeof(rss_key), 0);
4734		for (i = 0; i < RSSKEYLEN; ++i) {
4735			uint32_t rssrk = 0;
4736
4737			rssrk = EM_RSSRK_VAL(rss_key, i);
4738			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4739		}
4740
4741		/*
4742		* Configure RSS redirect table in following fashion:
4743		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4744		*/
4745		for (i = 0; i < sizeof(reta); ++i) {
4746			uint32_t q;
4747
4748			q = (i % adapter->num_queues) << 7;
4749			reta |= q << (8 * i);
4750		}
4751
4752		for (i = 0; i < 32; ++i) {
4753			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4754		}
4755
4756		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4757				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4758				E1000_MRQC_RSS_FIELD_IPV4 |
4759				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4760				E1000_MRQC_RSS_FIELD_IPV6_EX |
4761				E1000_MRQC_RSS_FIELD_IPV6);
4762	}
4763#endif
4764	/*
4765	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4766	** long latencies are observed, like Lenovo X60. This
4767	** change eliminates the problem, but since having positive
4768	** values in RDTR is a known source of problems on other
4769	** platforms another solution is being sought.
4770	*/
4771	if (hw->mac.type == e1000_82573)
4772		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4773
4774	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4775		/* Setup the Base and Length of the Rx Descriptor Ring */
4776		u64 bus_addr = rxr->rxdma.dma_paddr;
4777		u32 rdt = adapter->num_rx_desc - 1; /* default */
4778
4779		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4780		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4781		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4782		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4783		/* Setup the Head and Tail Descriptor Pointers */
4784		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4785#ifdef DEV_NETMAP
4786		/*
4787		 * an init() while a netmap client is active must
4788		 * preserve the rx buffers passed to userspace.
4789		 */
4790		if (ifp->if_capenable & IFCAP_NETMAP)
4791			rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4792#endif /* DEV_NETMAP */
4793		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4794	}
4795
4796	/*
4797	 * Set PTHRESH for improved jumbo performance
4798	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4799	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4800	 * Only write to RXDCTL(1) if there is a need for different
4801	 * settings.
4802	 */
4803	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4804	    (adapter->hw.mac.type == e1000_pch2lan) ||
4805	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4806	    (ifp->if_mtu > ETHERMTU)) {
4807		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4808		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4809	} else if (adapter->hw.mac.type == e1000_82574) {
4810		for (int i = 0; i < adapter->num_queues; i++) {
4811			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4812
4813			rxdctl |= 0x20; /* PTHRESH */
4814			rxdctl |= 4 << 8; /* HTHRESH */
4815			rxdctl |= 4 << 16;/* WTHRESH */
4816			rxdctl |= 1 << 24; /* Switch to granularity */
4817			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4818		}
4819	}
4820
4821	if (adapter->hw.mac.type >= e1000_pch2lan) {
4822		if (ifp->if_mtu > ETHERMTU)
4823			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4824		else
4825			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4826	}
4827
4828        /* Make sure VLAN Filters are off */
4829        rctl &= ~E1000_RCTL_VFE;
4830
4831	if (adapter->rx_mbuf_sz == MCLBYTES)
4832		rctl |= E1000_RCTL_SZ_2048;
4833	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4834		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4835	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4836		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4837
4838	/* ensure we clear use DTYPE of 00 here */
4839	rctl &= ~0x00000C00;
4840	/* Write out the settings */
4841	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4842
4843	return;
4844}
4845
4846
4847/*********************************************************************
4848 *
4849 *  This routine executes in interrupt context. It replenishes
4850 *  the mbufs in the descriptor and sends data which has been
4851 *  dma'ed into host memory to upper layer.
4852 *
4853 *  We loop at most count times if count is > 0, or until done if
4854 *  count < 0.
4855 *
4856 *  For polling we also now return the number of cleaned packets
4857 *********************************************************************/
4858static bool
4859em_rxeof(struct rx_ring *rxr, int count, int *done)
4860{
4861	struct adapter		*adapter = rxr->adapter;
4862	struct ifnet		*ifp = adapter->ifp;
4863	struct mbuf		*mp, *sendmp;
4864	u32			status = 0;
4865	u16 			len;
4866	int			i, processed, rxdone = 0;
4867	bool			eop;
4868	union e1000_rx_desc_extended	*cur;
4869
4870	EM_RX_LOCK(rxr);
4871
4872	/* Sync the ring */
4873	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4874	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4875
4876
4877#ifdef DEV_NETMAP
4878	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4879		EM_RX_UNLOCK(rxr);
4880		return (FALSE);
4881	}
4882#endif /* DEV_NETMAP */
4883
4884	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4885		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4886			break;
4887
4888		cur = &rxr->rx_base[i];
4889		status = le32toh(cur->wb.upper.status_error);
4890		mp = sendmp = NULL;
4891
4892		if ((status & E1000_RXD_STAT_DD) == 0)
4893			break;
4894
4895		len = le16toh(cur->wb.upper.length);
4896		eop = (status & E1000_RXD_STAT_EOP) != 0;
4897
4898		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4899		    (rxr->discard == TRUE)) {
4900			adapter->dropped_pkts++;
4901			++rxr->rx_discarded;
4902			if (!eop) /* Catch subsequent segs */
4903				rxr->discard = TRUE;
4904			else
4905				rxr->discard = FALSE;
4906			em_rx_discard(rxr, i);
4907			goto next_desc;
4908		}
4909		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4910
4911		/* Assign correct length to the current fragment */
4912		mp = rxr->rx_buffers[i].m_head;
4913		mp->m_len = len;
4914
4915		/* Trigger for refresh */
4916		rxr->rx_buffers[i].m_head = NULL;
4917
4918		/* First segment? */
4919		if (rxr->fmp == NULL) {
4920			mp->m_pkthdr.len = len;
4921			rxr->fmp = rxr->lmp = mp;
4922		} else {
4923			/* Chain mbuf's together */
4924			mp->m_flags &= ~M_PKTHDR;
4925			rxr->lmp->m_next = mp;
4926			rxr->lmp = mp;
4927			rxr->fmp->m_pkthdr.len += len;
4928		}
4929
4930		if (eop) {
4931			--count;
4932			sendmp = rxr->fmp;
4933			sendmp->m_pkthdr.rcvif = ifp;
4934			ifp->if_ipackets++;
4935			em_receive_checksum(status, sendmp);
4936#ifndef __NO_STRICT_ALIGNMENT
4937			if (adapter->hw.mac.max_frame_size >
4938			    (MCLBYTES - ETHER_ALIGN) &&
4939			    em_fixup_rx(rxr) != 0)
4940				goto skip;
4941#endif
4942			if (status & E1000_RXD_STAT_VP) {
4943				sendmp->m_pkthdr.ether_vtag =
4944				    le16toh(cur->wb.upper.vlan);
4945				sendmp->m_flags |= M_VLANTAG;
4946			}
4947#ifndef __NO_STRICT_ALIGNMENT
4948skip:
4949#endif
4950			rxr->fmp = rxr->lmp = NULL;
4951		}
4952next_desc:
4953		/* Sync the ring */
4954		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4955	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4956
4957		/* Zero out the receive descriptors status. */
4958		cur->wb.upper.status_error &= htole32(~0xFF);
4959		++rxdone;	/* cumulative for POLL */
4960		++processed;
4961
4962		/* Advance our pointers to the next descriptor. */
4963		if (++i == adapter->num_rx_desc)
4964			i = 0;
4965
4966		/* Send to the stack */
4967		if (sendmp != NULL) {
4968			rxr->next_to_check = i;
4969			EM_RX_UNLOCK(rxr);
4970			(*ifp->if_input)(ifp, sendmp);
4971			EM_RX_LOCK(rxr);
4972			i = rxr->next_to_check;
4973		}
4974
4975		/* Only refresh mbufs every 8 descriptors */
4976		if (processed == 8) {
4977			em_refresh_mbufs(rxr, i);
4978			processed = 0;
4979		}
4980	}
4981
4982	/* Catch any remaining refresh work */
4983	if (e1000_rx_unrefreshed(rxr))
4984		em_refresh_mbufs(rxr, i);
4985
4986	rxr->next_to_check = i;
4987	if (done != NULL)
4988		*done = rxdone;
4989	EM_RX_UNLOCK(rxr);
4990
4991	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4992}
4993
4994static __inline void
4995em_rx_discard(struct rx_ring *rxr, int i)
4996{
4997	struct em_rxbuffer	*rbuf;
4998
4999	rbuf = &rxr->rx_buffers[i];
5000	bus_dmamap_unload(rxr->rxtag, rbuf->map);
5001
5002	/* Free any previous pieces */
5003	if (rxr->fmp != NULL) {
5004		rxr->fmp->m_flags |= M_PKTHDR;
5005		m_freem(rxr->fmp);
5006		rxr->fmp = NULL;
5007		rxr->lmp = NULL;
5008	}
5009	/*
5010	** Free buffer and allow em_refresh_mbufs()
5011	** to clean up and recharge buffer.
5012	*/
5013	if (rbuf->m_head) {
5014		m_free(rbuf->m_head);
5015		rbuf->m_head = NULL;
5016	}
5017	return;
5018}
5019
5020#ifndef __NO_STRICT_ALIGNMENT
5021/*
5022 * When jumbo frames are enabled we should realign entire payload on
5023 * architecures with strict alignment. This is serious design mistake of 8254x
5024 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5025 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5026 * payload. On architecures without strict alignment restrictions 8254x still
5027 * performs unaligned memory access which would reduce the performance too.
5028 * To avoid copying over an entire frame to align, we allocate a new mbuf and
5029 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5030 * existing mbuf chain.
5031 *
5032 * Be aware, best performance of the 8254x is achived only when jumbo frame is
5033 * not used at all on architectures with strict alignment.
5034 */
5035static int
5036em_fixup_rx(struct rx_ring *rxr)
5037{
5038	struct adapter *adapter = rxr->adapter;
5039	struct mbuf *m, *n;
5040	int error;
5041
5042	error = 0;
5043	m = rxr->fmp;
5044	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5045		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5046		m->m_data += ETHER_HDR_LEN;
5047	} else {
5048		MGETHDR(n, M_NOWAIT, MT_DATA);
5049		if (n != NULL) {
5050			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5051			m->m_data += ETHER_HDR_LEN;
5052			m->m_len -= ETHER_HDR_LEN;
5053			n->m_len = ETHER_HDR_LEN;
5054			M_MOVE_PKTHDR(n, m);
5055			n->m_next = m;
5056			rxr->fmp = n;
5057		} else {
5058			adapter->dropped_pkts++;
5059			m_freem(rxr->fmp);
5060			rxr->fmp = NULL;
5061			error = ENOMEM;
5062		}
5063	}
5064
5065	return (error);
5066}
5067#endif
5068
5069static void
5070em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5071{
5072	rxd->read.buffer_addr = htole64(rxbuf->paddr);
5073	/* DD bits must be cleared */
5074	rxd->wb.upper.status_error= 0;
5075}
5076
5077/*********************************************************************
5078 *
5079 *  Verify that the hardware indicated that the checksum is valid.
5080 *  Inform the stack about the status of checksum so that stack
5081 *  doesn't spend time verifying the checksum.
5082 *
5083 *********************************************************************/
5084static void
5085em_receive_checksum(uint32_t status, struct mbuf *mp)
5086{
5087	mp->m_pkthdr.csum_flags = 0;
5088
5089	/* Ignore Checksum bit is set */
5090	if (status & E1000_RXD_STAT_IXSM)
5091		return;
5092
5093	/* If the IP checksum exists and there is no IP Checksum error */
5094	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5095		E1000_RXD_STAT_IPCS) {
5096		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5097	}
5098
5099	/* TCP or UDP checksum */
5100	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5101	    E1000_RXD_STAT_TCPCS) {
5102		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5103		mp->m_pkthdr.csum_data = htons(0xffff);
5104	}
5105	if (status & E1000_RXD_STAT_UDPCS) {
5106		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5107		mp->m_pkthdr.csum_data = htons(0xffff);
5108	}
5109}
5110
5111/*
5112 * This routine is run via an vlan
5113 * config EVENT
5114 */
5115static void
5116em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5117{
5118	struct adapter	*adapter = ifp->if_softc;
5119	u32		index, bit;
5120
5121	if (ifp->if_softc !=  arg)   /* Not our event */
5122		return;
5123
5124	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5125                return;
5126
5127	EM_CORE_LOCK(adapter);
5128	index = (vtag >> 5) & 0x7F;
5129	bit = vtag & 0x1F;
5130	adapter->shadow_vfta[index] |= (1 << bit);
5131	++adapter->num_vlans;
5132	/* Re-init to load the changes */
5133	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5134		em_init_locked(adapter);
5135	EM_CORE_UNLOCK(adapter);
5136}
5137
5138/*
5139 * This routine is run via an vlan
5140 * unconfig EVENT
5141 */
5142static void
5143em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5144{
5145	struct adapter	*adapter = ifp->if_softc;
5146	u32		index, bit;
5147
5148	if (ifp->if_softc !=  arg)
5149		return;
5150
5151	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5152                return;
5153
5154	EM_CORE_LOCK(adapter);
5155	index = (vtag >> 5) & 0x7F;
5156	bit = vtag & 0x1F;
5157	adapter->shadow_vfta[index] &= ~(1 << bit);
5158	--adapter->num_vlans;
5159	/* Re-init to load the changes */
5160	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5161		em_init_locked(adapter);
5162	EM_CORE_UNLOCK(adapter);
5163}
5164
5165static void
5166em_setup_vlan_hw_support(struct adapter *adapter)
5167{
5168	struct e1000_hw *hw = &adapter->hw;
5169	u32             reg;
5170
5171	/*
5172	** We get here thru init_locked, meaning
5173	** a soft reset, this has already cleared
5174	** the VFTA and other state, so if there
5175	** have been no vlan's registered do nothing.
5176	*/
5177	if (adapter->num_vlans == 0)
5178                return;
5179
5180	/*
5181	** A soft reset zero's out the VFTA, so
5182	** we need to repopulate it now.
5183	*/
5184	for (int i = 0; i < EM_VFTA_SIZE; i++)
5185                if (adapter->shadow_vfta[i] != 0)
5186			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5187                            i, adapter->shadow_vfta[i]);
5188
5189	reg = E1000_READ_REG(hw, E1000_CTRL);
5190	reg |= E1000_CTRL_VME;
5191	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5192
5193	/* Enable the Filter Table */
5194	reg = E1000_READ_REG(hw, E1000_RCTL);
5195	reg &= ~E1000_RCTL_CFIEN;
5196	reg |= E1000_RCTL_VFE;
5197	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5198}
5199
5200static void
5201em_enable_intr(struct adapter *adapter)
5202{
5203	struct e1000_hw *hw = &adapter->hw;
5204	u32 ims_mask = IMS_ENABLE_MASK;
5205
5206	if (hw->mac.type == e1000_82574) {
5207		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5208		ims_mask |= EM_MSIX_MASK;
5209	}
5210	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5211}
5212
5213static void
5214em_disable_intr(struct adapter *adapter)
5215{
5216	struct e1000_hw *hw = &adapter->hw;
5217
5218	if (hw->mac.type == e1000_82574)
5219		E1000_WRITE_REG(hw, EM_EIAC, 0);
5220	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5221}
5222
5223/*
5224 * Bit of a misnomer, what this really means is
5225 * to enable OS management of the system... aka
5226 * to disable special hardware management features
5227 */
5228static void
5229em_init_manageability(struct adapter *adapter)
5230{
5231	/* A shared code workaround */
5232#define E1000_82542_MANC2H E1000_MANC2H
5233	if (adapter->has_manage) {
5234		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5235		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5236
5237		/* disable hardware interception of ARP */
5238		manc &= ~(E1000_MANC_ARP_EN);
5239
5240                /* enable receiving management packets to the host */
5241		manc |= E1000_MANC_EN_MNG2HOST;
5242#define E1000_MNG2HOST_PORT_623 (1 << 5)
5243#define E1000_MNG2HOST_PORT_664 (1 << 6)
5244		manc2h |= E1000_MNG2HOST_PORT_623;
5245		manc2h |= E1000_MNG2HOST_PORT_664;
5246		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5247		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5248	}
5249}
5250
5251/*
5252 * Give control back to hardware management
5253 * controller if there is one.
5254 */
5255static void
5256em_release_manageability(struct adapter *adapter)
5257{
5258	if (adapter->has_manage) {
5259		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5260
5261		/* re-enable hardware interception of ARP */
5262		manc |= E1000_MANC_ARP_EN;
5263		manc &= ~E1000_MANC_EN_MNG2HOST;
5264
5265		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5266	}
5267}
5268
5269/*
5270 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5271 * For ASF and Pass Through versions of f/w this means
5272 * that the driver is loaded. For AMT version type f/w
5273 * this means that the network i/f is open.
5274 */
5275static void
5276em_get_hw_control(struct adapter *adapter)
5277{
5278	u32 ctrl_ext, swsm;
5279
5280	if (adapter->hw.mac.type == e1000_82573) {
5281		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5282		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5283		    swsm | E1000_SWSM_DRV_LOAD);
5284		return;
5285	}
5286	/* else */
5287	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5288	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5289	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5290	return;
5291}
5292
5293/*
5294 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5295 * For ASF and Pass Through versions of f/w this means that
5296 * the driver is no longer loaded. For AMT versions of the
5297 * f/w this means that the network i/f is closed.
5298 */
5299static void
5300em_release_hw_control(struct adapter *adapter)
5301{
5302	u32 ctrl_ext, swsm;
5303
5304	if (!adapter->has_manage)
5305		return;
5306
5307	if (adapter->hw.mac.type == e1000_82573) {
5308		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5309		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5310		    swsm & ~E1000_SWSM_DRV_LOAD);
5311		return;
5312	}
5313	/* else */
5314	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5315	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5316	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5317	return;
5318}
5319
5320static int
5321em_is_valid_ether_addr(u8 *addr)
5322{
5323	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5324
5325	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5326		return (FALSE);
5327	}
5328
5329	return (TRUE);
5330}
5331
5332/*
5333** Parse the interface capabilities with regard
5334** to both system management and wake-on-lan for
5335** later use.
5336*/
5337static void
5338em_get_wakeup(device_t dev)
5339{
5340	struct adapter	*adapter = device_get_softc(dev);
5341	u16		eeprom_data = 0, device_id, apme_mask;
5342
5343	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5344	apme_mask = EM_EEPROM_APME;
5345
5346	switch (adapter->hw.mac.type) {
5347	case e1000_82573:
5348	case e1000_82583:
5349		adapter->has_amt = TRUE;
5350		/* Falls thru */
5351	case e1000_82571:
5352	case e1000_82572:
5353	case e1000_80003es2lan:
5354		if (adapter->hw.bus.func == 1) {
5355			e1000_read_nvm(&adapter->hw,
5356			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5357			break;
5358		} else
5359			e1000_read_nvm(&adapter->hw,
5360			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5361		break;
5362	case e1000_ich8lan:
5363	case e1000_ich9lan:
5364	case e1000_ich10lan:
5365	case e1000_pchlan:
5366	case e1000_pch2lan:
5367	case e1000_pch_lpt:
5368	case e1000_pch_spt:
5369	case e1000_pch_cnp:
5370		apme_mask = E1000_WUC_APME;
5371		adapter->has_amt = TRUE;
5372		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5373		break;
5374	default:
5375		e1000_read_nvm(&adapter->hw,
5376		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5377		break;
5378	}
5379	if (eeprom_data & apme_mask)
5380		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5381	/*
5382         * We have the eeprom settings, now apply the special cases
5383         * where the eeprom may be wrong or the board won't support
5384         * wake on lan on a particular port
5385	 */
5386	device_id = pci_get_device(dev);
5387        switch (device_id) {
5388	case E1000_DEV_ID_82571EB_FIBER:
5389		/* Wake events only supported on port A for dual fiber
5390		 * regardless of eeprom setting */
5391		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5392		    E1000_STATUS_FUNC_1)
5393			adapter->wol = 0;
5394		break;
5395	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5396	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5397	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5398                /* if quad port adapter, disable WoL on all but port A */
5399		if (global_quad_port_a != 0)
5400			adapter->wol = 0;
5401		/* Reset for multiple quad port adapters */
5402		if (++global_quad_port_a == 4)
5403			global_quad_port_a = 0;
5404                break;
5405	}
5406	return;
5407}
5408
5409
5410/*
5411 * Enable PCI Wake On Lan capability
5412 */
5413static void
5414em_enable_wakeup(device_t dev)
5415{
5416	struct adapter	*adapter = device_get_softc(dev);
5417	struct ifnet	*ifp = adapter->ifp;
5418	int		error = 0;
5419	u32		pmc, ctrl, ctrl_ext, rctl;
5420	u16     	status;
5421
5422	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5423		return;
5424
5425	/*
5426	** Determine type of Wakeup: note that wol
5427	** is set with all bits on by default.
5428	*/
5429	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5430		adapter->wol &= ~E1000_WUFC_MAG;
5431
5432	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5433		adapter->wol &= ~E1000_WUFC_MC;
5434	else {
5435		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5436		rctl |= E1000_RCTL_MPE;
5437		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5438	}
5439
5440	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5441		goto pme;
5442
5443	/* Advertise the wakeup capability */
5444	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5445	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5446	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5447
5448	/* Keep the laser running on Fiber adapters */
5449	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5450	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5451		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5452		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5453		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5454	}
5455
5456	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5457	    (adapter->hw.mac.type == e1000_pchlan) ||
5458	    (adapter->hw.mac.type == e1000_ich9lan) ||
5459	    (adapter->hw.mac.type == e1000_ich10lan))
5460		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5461
5462	if ((adapter->hw.mac.type == e1000_pchlan)  ||
5463	    (adapter->hw.mac.type == e1000_pch2lan) ||
5464	    (adapter->hw.mac.type == e1000_pch_lpt) ||
5465	    (adapter->hw.mac.type == e1000_pch_spt) ||
5466	    (adapter->hw.mac.type == e1000_pch_cnp)) {
5467		error = em_enable_phy_wakeup(adapter);
5468		if (error)
5469			goto pme;
5470	} else {
5471		/* Enable wakeup by the MAC */
5472		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5473		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5474	}
5475
5476	if (adapter->hw.phy.type == e1000_phy_igp_3)
5477		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5478
5479pme:
5480        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5481	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5482	if (!error && (ifp->if_capenable & IFCAP_WOL))
5483		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5484        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5485
5486	return;
5487}
5488
5489/*
5490** WOL in the newer chipset interfaces (pchlan)
5491** require thing to be copied into the phy
5492*/
5493static int
5494em_enable_phy_wakeup(struct adapter *adapter)
5495{
5496	struct e1000_hw *hw = &adapter->hw;
5497	u32 mreg, ret = 0;
5498	u16 preg;
5499
5500	/* copy MAC RARs to PHY RARs */
5501	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5502
5503	/* copy MAC MTA to PHY MTA */
5504	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5505		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5506		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5507		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5508		    (u16)((mreg >> 16) & 0xFFFF));
5509	}
5510
5511	/* configure PHY Rx Control register */
5512	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5513	mreg = E1000_READ_REG(hw, E1000_RCTL);
5514	if (mreg & E1000_RCTL_UPE)
5515		preg |= BM_RCTL_UPE;
5516	if (mreg & E1000_RCTL_MPE)
5517		preg |= BM_RCTL_MPE;
5518	preg &= ~(BM_RCTL_MO_MASK);
5519	if (mreg & E1000_RCTL_MO_3)
5520		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5521				<< BM_RCTL_MO_SHIFT);
5522	if (mreg & E1000_RCTL_BAM)
5523		preg |= BM_RCTL_BAM;
5524	if (mreg & E1000_RCTL_PMCF)
5525		preg |= BM_RCTL_PMCF;
5526	mreg = E1000_READ_REG(hw, E1000_CTRL);
5527	if (mreg & E1000_CTRL_RFCE)
5528		preg |= BM_RCTL_RFCE;
5529	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5530
5531	/* enable PHY wakeup in MAC register */
5532	E1000_WRITE_REG(hw, E1000_WUC,
5533	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5534	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5535
5536	/* configure and enable PHY wakeup in PHY registers */
5537	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5538	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5539
5540	/* activate PHY wakeup */
5541	ret = hw->phy.ops.acquire(hw);
5542	if (ret) {
5543		printf("Could not acquire PHY\n");
5544		return ret;
5545	}
5546	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5547	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5548	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5549	if (ret) {
5550		printf("Could not read PHY page 769\n");
5551		goto out;
5552	}
5553	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5554	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5555	if (ret)
5556		printf("Could not set PHY Host Wakeup bit\n");
5557out:
5558	hw->phy.ops.release(hw);
5559
5560	return ret;
5561}
5562
5563static void
5564em_led_func(void *arg, int onoff)
5565{
5566	struct adapter	*adapter = arg;
5567
5568	EM_CORE_LOCK(adapter);
5569	if (onoff) {
5570		e1000_setup_led(&adapter->hw);
5571		e1000_led_on(&adapter->hw);
5572	} else {
5573		e1000_led_off(&adapter->hw);
5574		e1000_cleanup_led(&adapter->hw);
5575	}
5576	EM_CORE_UNLOCK(adapter);
5577}
5578
5579/*
5580** Disable the L0S and L1 LINK states
5581*/
5582static void
5583em_disable_aspm(struct adapter *adapter)
5584{
5585	int		base, reg;
5586	u16		link_cap,link_ctrl;
5587	device_t	dev = adapter->dev;
5588
5589	switch (adapter->hw.mac.type) {
5590		case e1000_82573:
5591		case e1000_82574:
5592		case e1000_82583:
5593			break;
5594		default:
5595			return;
5596	}
5597	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5598		return;
5599	reg = base + PCIER_LINK_CAP;
5600	link_cap = pci_read_config(dev, reg, 2);
5601	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5602		return;
5603	reg = base + PCIER_LINK_CTL;
5604	link_ctrl = pci_read_config(dev, reg, 2);
5605	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5606	pci_write_config(dev, reg, link_ctrl, 2);
5607	return;
5608}
5609
5610/**********************************************************************
5611 *
5612 *  Update the board statistics counters.
5613 *
5614 **********************************************************************/
5615static void
5616em_update_stats_counters(struct adapter *adapter)
5617{
5618	struct ifnet   *ifp;
5619
5620	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5621	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5622		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5623		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5624	}
5625	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5626	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5627	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5628	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5629
5630	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5631	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5632	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5633	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5634	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5635	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5636	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5637	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5638	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5639	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5640	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5641	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5642	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5643	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5644	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5645	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5646	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5647	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5648	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5649	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5650
5651	/* For the 64-bit byte counters the low dword must be read first. */
5652	/* Both registers clear on the read of the high dword */
5653
5654	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5655	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5656	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5657	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5658
5659	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5660	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5661	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5662	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5663	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5664
5665	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5666	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5667
5668	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5669	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5670	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5671	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5672	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5673	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5674	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5675	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5676	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5677	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5678
5679	/* Interrupt Counts */
5680
5681	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5682	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5683	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5684	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5685	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5686	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5687	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5688	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5689	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5690
5691	if (adapter->hw.mac.type >= e1000_82543) {
5692		adapter->stats.algnerrc +=
5693		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5694		adapter->stats.rxerrc +=
5695		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5696		adapter->stats.tncrs +=
5697		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5698		adapter->stats.cexterr +=
5699		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5700		adapter->stats.tsctc +=
5701		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5702		adapter->stats.tsctfc +=
5703		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5704	}
5705	ifp = adapter->ifp;
5706
5707	ifp->if_collisions = adapter->stats.colc;
5708
5709	/* Rx Errors */
5710	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5711	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5712	    adapter->stats.ruc + adapter->stats.roc +
5713	    adapter->stats.mpc + adapter->stats.cexterr;
5714
5715	/* Tx Errors */
5716	ifp->if_oerrors = adapter->stats.ecol +
5717	    adapter->stats.latecol + adapter->watchdog_events;
5718}
5719
5720/* Export a single 32-bit register via a read-only sysctl. */
5721static int
5722em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5723{
5724	struct adapter *adapter;
5725	u_int val;
5726
5727	adapter = oidp->oid_arg1;
5728	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5729	return (sysctl_handle_int(oidp, &val, 0, req));
5730}
5731
5732/*
5733 * Add sysctl variables, one per statistic, to the system.
5734 */
5735static void
5736em_add_hw_stats(struct adapter *adapter)
5737{
5738	device_t dev = adapter->dev;
5739
5740	struct tx_ring *txr = adapter->tx_rings;
5741	struct rx_ring *rxr = adapter->rx_rings;
5742
5743	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5744	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5745	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5746	struct e1000_hw_stats *stats = &adapter->stats;
5747
5748	struct sysctl_oid *stat_node, *queue_node, *int_node;
5749	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5750
5751#define QUEUE_NAME_LEN 32
5752	char namebuf[QUEUE_NAME_LEN];
5753
5754	/* Driver Statistics */
5755	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5756			CTLFLAG_RD, &adapter->dropped_pkts,
5757			"Driver dropped packets");
5758	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5759			CTLFLAG_RD, &adapter->link_irq,
5760			"Link MSIX IRQ Handled");
5761	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5762			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5763			 "Defragmenting mbuf chain failed");
5764	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5765			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5766			"Driver tx dma failure in xmit");
5767	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5768			CTLFLAG_RD, &adapter->rx_overruns,
5769			"RX overruns");
5770	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5771			CTLFLAG_RD, &adapter->watchdog_events,
5772			"Watchdog timeouts");
5773
5774	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5775			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5776			em_sysctl_reg_handler, "IU",
5777			"Device Control Register");
5778	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5779			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5780			em_sysctl_reg_handler, "IU",
5781			"Receiver Control Register");
5782	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5783			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5784			"Flow Control High Watermark");
5785	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5786			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5787			"Flow Control Low Watermark");
5788
5789	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5790		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5791		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5792					    CTLFLAG_RD, NULL, "TX Queue Name");
5793		queue_list = SYSCTL_CHILDREN(queue_node);
5794
5795		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5796				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5797				E1000_TDH(txr->me),
5798				em_sysctl_reg_handler, "IU",
5799 				"Transmit Descriptor Head");
5800		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5801				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5802				E1000_TDT(txr->me),
5803				em_sysctl_reg_handler, "IU",
5804 				"Transmit Descriptor Tail");
5805		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5806				CTLFLAG_RD, &txr->tx_irq,
5807				"Queue MSI-X Transmit Interrupts");
5808		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5809				CTLFLAG_RD, &txr->no_desc_avail,
5810				"Queue No Descriptor Available");
5811
5812		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5813		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5814					    CTLFLAG_RD, NULL, "RX Queue Name");
5815		queue_list = SYSCTL_CHILDREN(queue_node);
5816
5817		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5818				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5819				E1000_RDH(rxr->me),
5820				em_sysctl_reg_handler, "IU",
5821				"Receive Descriptor Head");
5822		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5823				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5824				E1000_RDT(rxr->me),
5825				em_sysctl_reg_handler, "IU",
5826				"Receive Descriptor Tail");
5827		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5828				CTLFLAG_RD, &rxr->rx_irq,
5829				"Queue MSI-X Receive Interrupts");
5830	}
5831
5832	/* MAC stats get their own sub node */
5833
5834	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5835				    CTLFLAG_RD, NULL, "Statistics");
5836	stat_list = SYSCTL_CHILDREN(stat_node);
5837
5838	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5839			CTLFLAG_RD, &stats->ecol,
5840			"Excessive collisions");
5841	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5842			CTLFLAG_RD, &stats->scc,
5843			"Single collisions");
5844	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5845			CTLFLAG_RD, &stats->mcc,
5846			"Multiple collisions");
5847	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5848			CTLFLAG_RD, &stats->latecol,
5849			"Late collisions");
5850	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5851			CTLFLAG_RD, &stats->colc,
5852			"Collision Count");
5853	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5854			CTLFLAG_RD, &adapter->stats.symerrs,
5855			"Symbol Errors");
5856	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5857			CTLFLAG_RD, &adapter->stats.sec,
5858			"Sequence Errors");
5859	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5860			CTLFLAG_RD, &adapter->stats.dc,
5861			"Defer Count");
5862	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5863			CTLFLAG_RD, &adapter->stats.mpc,
5864			"Missed Packets");
5865	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5866			CTLFLAG_RD, &adapter->stats.rnbc,
5867			"Receive No Buffers");
5868	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5869			CTLFLAG_RD, &adapter->stats.ruc,
5870			"Receive Undersize");
5871	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5872			CTLFLAG_RD, &adapter->stats.rfc,
5873			"Fragmented Packets Received ");
5874	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5875			CTLFLAG_RD, &adapter->stats.roc,
5876			"Oversized Packets Received");
5877	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5878			CTLFLAG_RD, &adapter->stats.rjc,
5879			"Recevied Jabber");
5880	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5881			CTLFLAG_RD, &adapter->stats.rxerrc,
5882			"Receive Errors");
5883	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5884			CTLFLAG_RD, &adapter->stats.crcerrs,
5885			"CRC errors");
5886	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5887			CTLFLAG_RD, &adapter->stats.algnerrc,
5888			"Alignment Errors");
5889	/* On 82575 these are collision counts */
5890	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5891			CTLFLAG_RD, &adapter->stats.cexterr,
5892			"Collision/Carrier extension errors");
5893	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5894			CTLFLAG_RD, &adapter->stats.xonrxc,
5895			"XON Received");
5896	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5897			CTLFLAG_RD, &adapter->stats.xontxc,
5898			"XON Transmitted");
5899	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5900			CTLFLAG_RD, &adapter->stats.xoffrxc,
5901			"XOFF Received");
5902	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5903			CTLFLAG_RD, &adapter->stats.xofftxc,
5904			"XOFF Transmitted");
5905
5906	/* Packet Reception Stats */
5907	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5908			CTLFLAG_RD, &adapter->stats.tpr,
5909			"Total Packets Received ");
5910	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5911			CTLFLAG_RD, &adapter->stats.gprc,
5912			"Good Packets Received");
5913	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5914			CTLFLAG_RD, &adapter->stats.bprc,
5915			"Broadcast Packets Received");
5916	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5917			CTLFLAG_RD, &adapter->stats.mprc,
5918			"Multicast Packets Received");
5919	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5920			CTLFLAG_RD, &adapter->stats.prc64,
5921			"64 byte frames received ");
5922	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5923			CTLFLAG_RD, &adapter->stats.prc127,
5924			"65-127 byte frames received");
5925	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5926			CTLFLAG_RD, &adapter->stats.prc255,
5927			"128-255 byte frames received");
5928	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5929			CTLFLAG_RD, &adapter->stats.prc511,
5930			"256-511 byte frames received");
5931	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5932			CTLFLAG_RD, &adapter->stats.prc1023,
5933			"512-1023 byte frames received");
5934	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5935			CTLFLAG_RD, &adapter->stats.prc1522,
5936			"1023-1522 byte frames received");
5937 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5938 			CTLFLAG_RD, &adapter->stats.gorc,
5939 			"Good Octets Received");
5940
5941	/* Packet Transmission Stats */
5942 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5943 			CTLFLAG_RD, &adapter->stats.gotc,
5944 			"Good Octets Transmitted");
5945	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5946			CTLFLAG_RD, &adapter->stats.tpt,
5947			"Total Packets Transmitted");
5948	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5949			CTLFLAG_RD, &adapter->stats.gptc,
5950			"Good Packets Transmitted");
5951	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5952			CTLFLAG_RD, &adapter->stats.bptc,
5953			"Broadcast Packets Transmitted");
5954	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5955			CTLFLAG_RD, &adapter->stats.mptc,
5956			"Multicast Packets Transmitted");
5957	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5958			CTLFLAG_RD, &adapter->stats.ptc64,
5959			"64 byte frames transmitted ");
5960	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5961			CTLFLAG_RD, &adapter->stats.ptc127,
5962			"65-127 byte frames transmitted");
5963	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5964			CTLFLAG_RD, &adapter->stats.ptc255,
5965			"128-255 byte frames transmitted");
5966	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5967			CTLFLAG_RD, &adapter->stats.ptc511,
5968			"256-511 byte frames transmitted");
5969	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5970			CTLFLAG_RD, &adapter->stats.ptc1023,
5971			"512-1023 byte frames transmitted");
5972	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5973			CTLFLAG_RD, &adapter->stats.ptc1522,
5974			"1024-1522 byte frames transmitted");
5975	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5976			CTLFLAG_RD, &adapter->stats.tsctc,
5977			"TSO Contexts Transmitted");
5978	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5979			CTLFLAG_RD, &adapter->stats.tsctfc,
5980			"TSO Contexts Failed");
5981
5982
5983	/* Interrupt Stats */
5984
5985	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5986				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5987	int_list = SYSCTL_CHILDREN(int_node);
5988
5989	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5990			CTLFLAG_RD, &adapter->stats.iac,
5991			"Interrupt Assertion Count");
5992
5993	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5994			CTLFLAG_RD, &adapter->stats.icrxptc,
5995			"Interrupt Cause Rx Pkt Timer Expire Count");
5996
5997	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5998			CTLFLAG_RD, &adapter->stats.icrxatc,
5999			"Interrupt Cause Rx Abs Timer Expire Count");
6000
6001	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6002			CTLFLAG_RD, &adapter->stats.ictxptc,
6003			"Interrupt Cause Tx Pkt Timer Expire Count");
6004
6005	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6006			CTLFLAG_RD, &adapter->stats.ictxatc,
6007			"Interrupt Cause Tx Abs Timer Expire Count");
6008
6009	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6010			CTLFLAG_RD, &adapter->stats.ictxqec,
6011			"Interrupt Cause Tx Queue Empty Count");
6012
6013	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6014			CTLFLAG_RD, &adapter->stats.ictxqmtc,
6015			"Interrupt Cause Tx Queue Min Thresh Count");
6016
6017	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6018			CTLFLAG_RD, &adapter->stats.icrxdmtc,
6019			"Interrupt Cause Rx Desc Min Thresh Count");
6020
6021	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6022			CTLFLAG_RD, &adapter->stats.icrxoc,
6023			"Interrupt Cause Receiver Overrun Count");
6024}
6025
6026/**********************************************************************
6027 *
6028 *  This routine provides a way to dump out the adapter eeprom,
6029 *  often a useful debug/service tool. This only dumps the first
6030 *  32 words, stuff that matters is in that extent.
6031 *
6032 **********************************************************************/
6033static int
6034em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6035{
6036	struct adapter *adapter = (struct adapter *)arg1;
6037	int error;
6038	int result;
6039
6040	result = -1;
6041	error = sysctl_handle_int(oidp, &result, 0, req);
6042
6043	if (error || !req->newptr)
6044		return (error);
6045
6046	/*
6047	 * This value will cause a hex dump of the
6048	 * first 32 16-bit words of the EEPROM to
6049	 * the screen.
6050	 */
6051	if (result == 1)
6052		em_print_nvm_info(adapter);
6053
6054	return (error);
6055}
6056
6057static void
6058em_print_nvm_info(struct adapter *adapter)
6059{
6060	u16	eeprom_data;
6061	int	i, j, row = 0;
6062
6063	/* Its a bit crude, but it gets the job done */
6064	printf("\nInterface EEPROM Dump:\n");
6065	printf("Offset\n0x0000  ");
6066	for (i = 0, j = 0; i < 32; i++, j++) {
6067		if (j == 8) { /* Make the offset block */
6068			j = 0; ++row;
6069			printf("\n0x00%x0  ",row);
6070		}
6071		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6072		printf("%04x ", eeprom_data);
6073	}
6074	printf("\n");
6075}
6076
6077static int
6078em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6079{
6080	struct em_int_delay_info *info;
6081	struct adapter *adapter;
6082	u32 regval;
6083	int error, usecs, ticks;
6084
6085	info = (struct em_int_delay_info *)arg1;
6086	usecs = info->value;
6087	error = sysctl_handle_int(oidp, &usecs, 0, req);
6088	if (error != 0 || req->newptr == NULL)
6089		return (error);
6090	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6091		return (EINVAL);
6092	info->value = usecs;
6093	ticks = EM_USECS_TO_TICKS(usecs);
6094	if (info->offset == E1000_ITR)	/* units are 256ns here */
6095		ticks *= 4;
6096
6097	adapter = info->adapter;
6098
6099	EM_CORE_LOCK(adapter);
6100	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6101	regval = (regval & ~0xffff) | (ticks & 0xffff);
6102	/* Handle a few special cases. */
6103	switch (info->offset) {
6104	case E1000_RDTR:
6105		break;
6106	case E1000_TIDV:
6107		if (ticks == 0) {
6108			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6109			/* Don't write 0 into the TIDV register. */
6110			regval++;
6111		} else
6112			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6113		break;
6114	}
6115	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6116	EM_CORE_UNLOCK(adapter);
6117	return (0);
6118}
6119
6120static void
6121em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6122	const char *description, struct em_int_delay_info *info,
6123	int offset, int value)
6124{
6125	info->adapter = adapter;
6126	info->offset = offset;
6127	info->value = value;
6128	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6129	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6130	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6131	    info, 0, em_sysctl_int_delay, "I", description);
6132}
6133
6134static void
6135em_set_sysctl_value(struct adapter *adapter, const char *name,
6136	const char *description, int *limit, int value)
6137{
6138	*limit = value;
6139	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6140	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6141	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6142}
6143
6144
6145/*
6146** Set flow control using sysctl:
6147** Flow control values:
6148**      0 - off
6149**      1 - rx pause
6150**      2 - tx pause
6151**      3 - full
6152*/
6153static int
6154em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6155{
6156        int		error;
6157	static int	input = 3; /* default is full */
6158        struct adapter	*adapter = (struct adapter *) arg1;
6159
6160        error = sysctl_handle_int(oidp, &input, 0, req);
6161
6162        if ((error) || (req->newptr == NULL))
6163                return (error);
6164
6165	if (input == adapter->fc) /* no change? */
6166		return (error);
6167
6168        switch (input) {
6169                case e1000_fc_rx_pause:
6170                case e1000_fc_tx_pause:
6171                case e1000_fc_full:
6172                case e1000_fc_none:
6173                        adapter->hw.fc.requested_mode = input;
6174			adapter->fc = input;
6175                        break;
6176                default:
6177			/* Do nothing */
6178			return (error);
6179        }
6180
6181        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6182        e1000_force_mac_fc(&adapter->hw);
6183        return (error);
6184}
6185
6186/*
6187** Manage Energy Efficient Ethernet:
6188** Control values:
6189**     0/1 - enabled/disabled
6190*/
6191static int
6192em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6193{
6194       struct adapter *adapter = (struct adapter *) arg1;
6195       int             error, value;
6196
6197       value = adapter->hw.dev_spec.ich8lan.eee_disable;
6198       error = sysctl_handle_int(oidp, &value, 0, req);
6199       if (error || req->newptr == NULL)
6200               return (error);
6201       EM_CORE_LOCK(adapter);
6202       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6203       em_init_locked(adapter);
6204       EM_CORE_UNLOCK(adapter);
6205       return (0);
6206}
6207
6208static int
6209em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6210{
6211	struct adapter *adapter;
6212	int error;
6213	int result;
6214
6215	result = -1;
6216	error = sysctl_handle_int(oidp, &result, 0, req);
6217
6218	if (error || !req->newptr)
6219		return (error);
6220
6221	if (result == 1) {
6222		adapter = (struct adapter *)arg1;
6223		em_print_debug_info(adapter);
6224        }
6225
6226	return (error);
6227}
6228
6229/*
6230** This routine is meant to be fluid, add whatever is
6231** needed for debugging a problem.  -jfv
6232*/
6233static void
6234em_print_debug_info(struct adapter *adapter)
6235{
6236	device_t dev = adapter->dev;
6237	struct tx_ring *txr = adapter->tx_rings;
6238	struct rx_ring *rxr = adapter->rx_rings;
6239
6240	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6241		printf("Interface is RUNNING ");
6242	else
6243		printf("Interface is NOT RUNNING\n");
6244
6245	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6246		printf("and INACTIVE\n");
6247	else
6248		printf("and ACTIVE\n");
6249
6250	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6251		device_printf(dev, "TX Queue %d ------\n", i);
6252		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6253	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6254	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6255		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6256		device_printf(dev, "TX descriptors avail = %d\n",
6257	    		txr->tx_avail);
6258		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6259	    		txr->no_desc_avail);
6260		device_printf(dev, "RX Queue %d ------\n", i);
6261		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6262	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6263	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6264		device_printf(dev, "RX discarded packets = %ld\n",
6265	    		rxr->rx_discarded);
6266		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6267		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6268	}
6269}
6270
6271#ifdef EM_MULTIQUEUE
6272/*
6273 * 82574 only:
6274 * Write a new value to the EEPROM increasing the number of MSIX
6275 * vectors from 3 to 5, for proper multiqueue support.
6276 */
6277static void
6278em_enable_vectors_82574(struct adapter *adapter)
6279{
6280	struct e1000_hw *hw = &adapter->hw;
6281	device_t dev = adapter->dev;
6282	u16 edata;
6283
6284	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6285	printf("Current cap: %#06x\n", edata);
6286	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6287		device_printf(dev, "Writing to eeprom: increasing "
6288		    "reported MSIX vectors from 3 to 5...\n");
6289		edata &= ~(EM_NVM_MSIX_N_MASK);
6290		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6291		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6292		e1000_update_nvm_checksum(hw);
6293		device_printf(dev, "Writing to eeprom: done\n");
6294	}
6295}
6296#endif
6297
6298#ifdef DDB
6299DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6300{
6301	devclass_t	dc;
6302	int max_em;
6303
6304	dc = devclass_find("em");
6305	max_em = devclass_get_maxunit(dc);
6306
6307	for (int index = 0; index < (max_em - 1); index++) {
6308		device_t dev;
6309		dev = devclass_get_device(dc, index);
6310		if (device_get_driver(dev) == &em_driver) {
6311			struct adapter *adapter = device_get_softc(dev);
6312			EM_CORE_LOCK(adapter);
6313			em_init_locked(adapter);
6314			EM_CORE_UNLOCK(adapter);
6315		}
6316	}
6317}
6318DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6319{
6320	devclass_t	dc;
6321	int max_em;
6322
6323	dc = devclass_find("em");
6324	max_em = devclass_get_maxunit(dc);
6325
6326	for (int index = 0; index < (max_em - 1); index++) {
6327		device_t dev;
6328		dev = devclass_get_device(dc, index);
6329		if (device_get_driver(dev) == &em_driver)
6330			em_print_debug_info(device_get_softc(dev));
6331	}
6332
6333}
6334#endif
6335