if_em.c revision 323079
1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_em.c 323079 2017-08-31 23:57:34Z marius $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "e1000_82571.h"
96#include "if_em.h"
97
98/*********************************************************************
99 *  Driver version:
100 *********************************************************************/
101char em_driver_version[] = "7.6.1-k";
102
103/*********************************************************************
104 *  PCI Device ID Table
105 *
106 *  Used by probe to select devices to load on
107 *  Last field stores an index into e1000_strings
108 *  Last entry must be all 0s
109 *
110 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113static em_vendor_info_t em_vendor_info_array[] =
114{
115	/* Intel(R) PRO/1000 Network Connection */
116	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182						PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184						PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195						PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200						PCI_ANY_ID, PCI_ANY_ID, 0},
201	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
203						PCI_ANY_ID, PCI_ANY_ID, 0},
204	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
205	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
206						PCI_ANY_ID, PCI_ANY_ID, 0},
207	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
208	/* required last entry */
209	{ 0, 0, 0, 0, 0}
210};
211
212/*********************************************************************
213 *  Table of branding strings for all supported NICs.
214 *********************************************************************/
215
216static char *em_strings[] = {
217	"Intel(R) PRO/1000 Network Connection"
218};
219
220/*********************************************************************
221 *  Function prototypes
222 *********************************************************************/
223static int	em_probe(device_t);
224static int	em_attach(device_t);
225static int	em_detach(device_t);
226static int	em_shutdown(device_t);
227static int	em_suspend(device_t);
228static int	em_resume(device_t);
229#ifdef EM_MULTIQUEUE
230static int	em_mq_start(if_t, struct mbuf *);
231static int	em_mq_start_locked(if_t,
232		    struct tx_ring *);
233static void	em_qflush(if_t);
234#else
235static void	em_start(if_t);
236static void	em_start_locked(if_t, struct tx_ring *);
237#endif
238static int	em_ioctl(if_t, u_long, caddr_t);
239static uint64_t	em_get_counter(if_t, ift_counter);
240static void	em_init(void *);
241static void	em_init_locked(struct adapter *);
242static void	em_stop(void *);
243static void	em_media_status(if_t, struct ifmediareq *);
244static int	em_media_change(if_t);
245static void	em_identify_hardware(struct adapter *);
246static int	em_allocate_pci_resources(struct adapter *);
247static int	em_allocate_legacy(struct adapter *);
248static int	em_allocate_msix(struct adapter *);
249static int	em_allocate_queues(struct adapter *);
250static int	em_setup_msix(struct adapter *);
251static void	em_free_pci_resources(struct adapter *);
252static void	em_local_timer(void *);
253static void	em_reset(struct adapter *);
254static int	em_setup_interface(device_t, struct adapter *);
255static void	em_flush_desc_rings(struct adapter *);
256
257static void	em_setup_transmit_structures(struct adapter *);
258static void	em_initialize_transmit_unit(struct adapter *);
259static int	em_allocate_transmit_buffers(struct tx_ring *);
260static void	em_free_transmit_structures(struct adapter *);
261static void	em_free_transmit_buffers(struct tx_ring *);
262
263static int	em_setup_receive_structures(struct adapter *);
264static int	em_allocate_receive_buffers(struct rx_ring *);
265static void	em_initialize_receive_unit(struct adapter *);
266static void	em_free_receive_structures(struct adapter *);
267static void	em_free_receive_buffers(struct rx_ring *);
268
269static void	em_enable_intr(struct adapter *);
270static void	em_disable_intr(struct adapter *);
271static void	em_update_stats_counters(struct adapter *);
272static void	em_add_hw_stats(struct adapter *adapter);
273static void	em_txeof(struct tx_ring *);
274static bool	em_rxeof(struct rx_ring *, int, int *);
275#ifndef __NO_STRICT_ALIGNMENT
276static int	em_fixup_rx(struct rx_ring *);
277#endif
278static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
279		    const struct em_rxbuffer *rxbuf);
280static void	em_receive_checksum(uint32_t status, struct mbuf *);
281static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
282		    struct ip *, u32 *, u32 *);
283static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
284		    struct tcphdr *, u32 *, u32 *);
285static void	em_set_promisc(struct adapter *);
286static void	em_disable_promisc(struct adapter *);
287static void	em_set_multi(struct adapter *);
288static void	em_update_link_status(struct adapter *);
289static void	em_refresh_mbufs(struct rx_ring *, int);
290static void	em_register_vlan(void *, if_t, u16);
291static void	em_unregister_vlan(void *, if_t, u16);
292static void	em_setup_vlan_hw_support(struct adapter *);
293static int	em_xmit(struct tx_ring *, struct mbuf **);
294static int	em_dma_malloc(struct adapter *, bus_size_t,
295		    struct em_dma_alloc *, int);
296static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
297static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
298static void	em_print_nvm_info(struct adapter *);
299static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
300static void	em_print_debug_info(struct adapter *);
301static int 	em_is_valid_ether_addr(u8 *);
302static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
303static void	em_add_int_delay_sysctl(struct adapter *, const char *,
304		    const char *, struct em_int_delay_info *, int, int);
305/* Management and WOL Support */
306static void	em_init_manageability(struct adapter *);
307static void	em_release_manageability(struct adapter *);
308static void     em_get_hw_control(struct adapter *);
309static void     em_release_hw_control(struct adapter *);
310static void	em_get_wakeup(device_t);
311static void     em_enable_wakeup(device_t);
312static int	em_enable_phy_wakeup(struct adapter *);
313static void	em_led_func(void *, int);
314static void	em_disable_aspm(struct adapter *);
315
316static int	em_irq_fast(void *);
317
318/* MSIX handlers */
319static void	em_msix_tx(void *);
320static void	em_msix_rx(void *);
321static void	em_msix_link(void *);
322static void	em_handle_tx(void *context, int pending);
323static void	em_handle_rx(void *context, int pending);
324static void	em_handle_link(void *context, int pending);
325
326#ifdef EM_MULTIQUEUE
327static void	em_enable_vectors_82574(struct adapter *);
328#endif
329
330static void	em_set_sysctl_value(struct adapter *, const char *,
331		    const char *, int *, int);
332static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
333static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
334
335static __inline void em_rx_discard(struct rx_ring *, int);
336
337#ifdef DEVICE_POLLING
338static poll_handler_t em_poll;
339#endif /* POLLING */
340
341/*********************************************************************
342 *  FreeBSD Device Interface Entry Points
343 *********************************************************************/
344
345static device_method_t em_methods[] = {
346	/* Device interface */
347	DEVMETHOD(device_probe, em_probe),
348	DEVMETHOD(device_attach, em_attach),
349	DEVMETHOD(device_detach, em_detach),
350	DEVMETHOD(device_shutdown, em_shutdown),
351	DEVMETHOD(device_suspend, em_suspend),
352	DEVMETHOD(device_resume, em_resume),
353	DEVMETHOD_END
354};
355
356static driver_t em_driver = {
357	"em", em_methods, sizeof(struct adapter),
358};
359
360devclass_t em_devclass;
361DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
362MODULE_DEPEND(em, pci, 1, 1, 1);
363MODULE_DEPEND(em, ether, 1, 1, 1);
364#ifdef DEV_NETMAP
365MODULE_DEPEND(em, netmap, 1, 1, 1);
366#endif /* DEV_NETMAP */
367
368/*********************************************************************
369 *  Tunable default values.
370 *********************************************************************/
371
372#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
373#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
374#define M_TSO_LEN			66
375
376#define MAX_INTS_PER_SEC	8000
377#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
378
379/* Allow common code without TSO */
380#ifndef CSUM_TSO
381#define CSUM_TSO	0
382#endif
383
384#define TSO_WORKAROUND	4
385
386static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
387
388static int em_disable_crc_stripping = 0;
389SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
390    &em_disable_crc_stripping, 0, "Disable CRC Stripping");
391
392static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
393static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
394SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
395    0, "Default transmit interrupt delay in usecs");
396SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
397    0, "Default receive interrupt delay in usecs");
398
399static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
400static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
401SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
402    &em_tx_abs_int_delay_dflt, 0,
403    "Default transmit interrupt delay limit in usecs");
404SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
405    &em_rx_abs_int_delay_dflt, 0,
406    "Default receive interrupt delay limit in usecs");
407
408static int em_rxd = EM_DEFAULT_RXD;
409static int em_txd = EM_DEFAULT_TXD;
410SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
411    "Number of receive descriptors per queue");
412SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
413    "Number of transmit descriptors per queue");
414
415static int em_smart_pwr_down = FALSE;
416SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
417    0, "Set to true to leave smart power down enabled on newer adapters");
418
419/* Controls whether promiscuous also shows bad packets */
420static int em_debug_sbp = FALSE;
421SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
422    "Show bad packets in promiscuous mode");
423
424static int em_enable_msix = TRUE;
425SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
426    "Enable MSI-X interrupts");
427
428#ifdef EM_MULTIQUEUE
429static int em_num_queues = 1;
430SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
431    "82574 only: Number of queues to configure, 0 indicates autoconfigure");
432#endif
433
434/*
435** Global variable to store last used CPU when binding queues
436** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
437** queue is bound to a cpu.
438*/
439static int em_last_bind_cpu = -1;
440
441/* How many packets rxeof tries to clean at a time */
442static int em_rx_process_limit = 100;
443SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
444    &em_rx_process_limit, 0,
445    "Maximum number of received packets to process "
446    "at a time, -1 means unlimited");
447
448/* Energy efficient ethernet - default to OFF */
449static int eee_setting = 1;
450SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
451    "Enable Energy Efficient Ethernet");
452
453/* Global used in WOL setup with multiport cards */
454static int global_quad_port_a = 0;
455
456#ifdef DEV_NETMAP	/* see ixgbe.c for details */
457#include <dev/netmap/if_em_netmap.h>
458#endif /* DEV_NETMAP */
459
460/*********************************************************************
461 *  Device identification routine
462 *
463 *  em_probe determines if the driver should be loaded on
464 *  adapter based on PCI vendor/device id of the adapter.
465 *
466 *  return BUS_PROBE_DEFAULT on success, positive on failure
467 *********************************************************************/
468
469static int
470em_probe(device_t dev)
471{
472	char		adapter_name[60];
473	uint16_t	pci_vendor_id = 0;
474	uint16_t	pci_device_id = 0;
475	uint16_t	pci_subvendor_id = 0;
476	uint16_t	pci_subdevice_id = 0;
477	em_vendor_info_t *ent;
478
479	INIT_DEBUGOUT("em_probe: begin");
480
481	pci_vendor_id = pci_get_vendor(dev);
482	if (pci_vendor_id != EM_VENDOR_ID)
483		return (ENXIO);
484
485	pci_device_id = pci_get_device(dev);
486	pci_subvendor_id = pci_get_subvendor(dev);
487	pci_subdevice_id = pci_get_subdevice(dev);
488
489	ent = em_vendor_info_array;
490	while (ent->vendor_id != 0) {
491		if ((pci_vendor_id == ent->vendor_id) &&
492		    (pci_device_id == ent->device_id) &&
493
494		    ((pci_subvendor_id == ent->subvendor_id) ||
495		    (ent->subvendor_id == PCI_ANY_ID)) &&
496
497		    ((pci_subdevice_id == ent->subdevice_id) ||
498		    (ent->subdevice_id == PCI_ANY_ID))) {
499			sprintf(adapter_name, "%s %s",
500				em_strings[ent->index],
501				em_driver_version);
502			device_set_desc_copy(dev, adapter_name);
503			return (BUS_PROBE_DEFAULT);
504		}
505		ent++;
506	}
507
508	return (ENXIO);
509}
510
511/*********************************************************************
512 *  Device initialization routine
513 *
514 *  The attach entry point is called when the driver is being loaded.
515 *  This routine identifies the type of hardware, allocates all resources
516 *  and initializes the hardware.
517 *
518 *  return 0 on success, positive on failure
519 *********************************************************************/
520
521static int
522em_attach(device_t dev)
523{
524	struct adapter	*adapter;
525	struct e1000_hw	*hw;
526	int		error = 0;
527
528	INIT_DEBUGOUT("em_attach: begin");
529
530	if (resource_disabled("em", device_get_unit(dev))) {
531		device_printf(dev, "Disabled by device hint\n");
532		return (ENXIO);
533	}
534
535	adapter = device_get_softc(dev);
536	adapter->dev = adapter->osdep.dev = dev;
537	hw = &adapter->hw;
538	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
539
540	/* SYSCTL stuff */
541	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
542	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
543	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
544	    em_sysctl_nvm_info, "I", "NVM Information");
545
546	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
547	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
548	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
549	    em_sysctl_debug_info, "I", "Debug Information");
550
551	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
552	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
553	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
554	    em_set_flowcntl, "I", "Flow Control");
555
556	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
557
558	/* Determine hardware and mac info */
559	em_identify_hardware(adapter);
560
561	/* Setup PCI resources */
562	if (em_allocate_pci_resources(adapter)) {
563		device_printf(dev, "Allocation of PCI resources failed\n");
564		error = ENXIO;
565		goto err_pci;
566	}
567
568	/*
569	** For ICH8 and family we need to
570	** map the flash memory, and this
571	** must happen after the MAC is
572	** identified
573	*/
574	if ((hw->mac.type == e1000_ich8lan) ||
575	    (hw->mac.type == e1000_ich9lan) ||
576	    (hw->mac.type == e1000_ich10lan) ||
577	    (hw->mac.type == e1000_pchlan) ||
578	    (hw->mac.type == e1000_pch2lan) ||
579	    (hw->mac.type == e1000_pch_lpt)) {
580		int rid = EM_BAR_TYPE_FLASH;
581		adapter->flash = bus_alloc_resource_any(dev,
582		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
583		if (adapter->flash == NULL) {
584			device_printf(dev, "Mapping of Flash failed\n");
585			error = ENXIO;
586			goto err_pci;
587		}
588		/* This is used in the shared code */
589		hw->flash_address = (u8 *)adapter->flash;
590		adapter->osdep.flash_bus_space_tag =
591		    rman_get_bustag(adapter->flash);
592		adapter->osdep.flash_bus_space_handle =
593		    rman_get_bushandle(adapter->flash);
594	}
595	/*
596	** In the new SPT device flash is not  a
597	** separate BAR, rather it is also in BAR0,
598	** so use the same tag and an offset handle for the
599	** FLASH read/write macros in the shared code.
600	*/
601	else if (hw->mac.type == e1000_pch_spt) {
602		adapter->osdep.flash_bus_space_tag =
603		    adapter->osdep.mem_bus_space_tag;
604		adapter->osdep.flash_bus_space_handle =
605		    adapter->osdep.mem_bus_space_handle
606		    + E1000_FLASH_BASE_ADDR;
607	}
608
609	/* Do Shared Code initialization */
610	error = e1000_setup_init_funcs(hw, TRUE);
611	if (error) {
612		device_printf(dev, "Setup of Shared code failed, error %d\n",
613		    error);
614		error = ENXIO;
615		goto err_pci;
616	}
617
618	/*
619	 * Setup MSI/X or MSI if PCI Express
620	 */
621	adapter->msix = em_setup_msix(adapter);
622
623	e1000_get_bus_info(hw);
624
625	/* Set up some sysctls for the tunable interrupt delays */
626	em_add_int_delay_sysctl(adapter, "rx_int_delay",
627	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
628	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
629	em_add_int_delay_sysctl(adapter, "tx_int_delay",
630	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
631	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
632	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
633	    "receive interrupt delay limit in usecs",
634	    &adapter->rx_abs_int_delay,
635	    E1000_REGISTER(hw, E1000_RADV),
636	    em_rx_abs_int_delay_dflt);
637	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
638	    "transmit interrupt delay limit in usecs",
639	    &adapter->tx_abs_int_delay,
640	    E1000_REGISTER(hw, E1000_TADV),
641	    em_tx_abs_int_delay_dflt);
642	em_add_int_delay_sysctl(adapter, "itr",
643	    "interrupt delay limit in usecs/4",
644	    &adapter->tx_itr,
645	    E1000_REGISTER(hw, E1000_ITR),
646	    DEFAULT_ITR);
647
648	/* Sysctl for limiting the amount of work done in the taskqueue */
649	em_set_sysctl_value(adapter, "rx_processing_limit",
650	    "max number of rx packets to process", &adapter->rx_process_limit,
651	    em_rx_process_limit);
652
653	/*
654	 * Validate number of transmit and receive descriptors. It
655	 * must not exceed hardware maximum, and must be multiple
656	 * of E1000_DBA_ALIGN.
657	 */
658	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
659	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
660		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
661		    EM_DEFAULT_TXD, em_txd);
662		adapter->num_tx_desc = EM_DEFAULT_TXD;
663	} else
664		adapter->num_tx_desc = em_txd;
665
666	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
667	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
668		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
669		    EM_DEFAULT_RXD, em_rxd);
670		adapter->num_rx_desc = EM_DEFAULT_RXD;
671	} else
672		adapter->num_rx_desc = em_rxd;
673
674	hw->mac.autoneg = DO_AUTO_NEG;
675	hw->phy.autoneg_wait_to_complete = FALSE;
676	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
677
678	/* Copper options */
679	if (hw->phy.media_type == e1000_media_type_copper) {
680		hw->phy.mdix = AUTO_ALL_MODES;
681		hw->phy.disable_polarity_correction = FALSE;
682		hw->phy.ms_type = EM_MASTER_SLAVE;
683	}
684
685	/*
686	 * Set the frame limits assuming
687	 * standard ethernet sized frames.
688	 */
689	adapter->hw.mac.max_frame_size =
690	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
691
692	/*
693	 * This controls when hardware reports transmit completion
694	 * status.
695	 */
696	hw->mac.report_tx_early = 1;
697
698	/*
699	** Get queue/ring memory
700	*/
701	if (em_allocate_queues(adapter)) {
702		error = ENOMEM;
703		goto err_pci;
704	}
705
706	/* Allocate multicast array memory. */
707	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
708	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
709	if (adapter->mta == NULL) {
710		device_printf(dev, "Can not allocate multicast setup array\n");
711		error = ENOMEM;
712		goto err_late;
713	}
714
715	/* Check SOL/IDER usage */
716	if (e1000_check_reset_block(hw))
717		device_printf(dev, "PHY reset is blocked"
718		    " due to SOL/IDER session.\n");
719
720	/* Sysctl for setting Energy Efficient Ethernet */
721	hw->dev_spec.ich8lan.eee_disable = eee_setting;
722	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
723	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
724	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
725	    adapter, 0, em_sysctl_eee, "I",
726	    "Disable Energy Efficient Ethernet");
727
728	/*
729	** Start from a known state, this is
730	** important in reading the nvm and
731	** mac from that.
732	*/
733	e1000_reset_hw(hw);
734
735
736	/* Make sure we have a good EEPROM before we read from it */
737	if (e1000_validate_nvm_checksum(hw) < 0) {
738		/*
739		** Some PCI-E parts fail the first check due to
740		** the link being in sleep state, call it again,
741		** if it fails a second time its a real issue.
742		*/
743		if (e1000_validate_nvm_checksum(hw) < 0) {
744			device_printf(dev,
745			    "The EEPROM Checksum Is Not Valid\n");
746			error = EIO;
747			goto err_late;
748		}
749	}
750
751	/* Copy the permanent MAC address out of the EEPROM */
752	if (e1000_read_mac_addr(hw) < 0) {
753		device_printf(dev, "EEPROM read error while reading MAC"
754		    " address\n");
755		error = EIO;
756		goto err_late;
757	}
758
759	if (!em_is_valid_ether_addr(hw->mac.addr)) {
760		device_printf(dev, "Invalid MAC address\n");
761		error = EIO;
762		goto err_late;
763	}
764
765	/* Disable ULP support */
766	e1000_disable_ulp_lpt_lp(hw, TRUE);
767
768	/*
769	**  Do interrupt configuration
770	*/
771	if (adapter->msix > 1) /* Do MSIX */
772		error = em_allocate_msix(adapter);
773	else  /* MSI or Legacy */
774		error = em_allocate_legacy(adapter);
775	if (error)
776		goto err_late;
777
778	/*
779	 * Get Wake-on-Lan and Management info for later use
780	 */
781	em_get_wakeup(dev);
782
783	/* Setup OS specific network interface */
784	if (em_setup_interface(dev, adapter) != 0)
785		goto err_late;
786
787	em_reset(adapter);
788
789	/* Initialize statistics */
790	em_update_stats_counters(adapter);
791
792	hw->mac.get_link_status = 1;
793	em_update_link_status(adapter);
794
795	/* Register for VLAN events */
796	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
797	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
798	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
799	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
800
801	em_add_hw_stats(adapter);
802
803	/* Non-AMT based hardware can now take control from firmware */
804	if (adapter->has_manage && !adapter->has_amt)
805		em_get_hw_control(adapter);
806
807	/* Tell the stack that the interface is not active */
808	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
809
810	adapter->led_dev = led_create(em_led_func, adapter,
811	    device_get_nameunit(dev));
812#ifdef DEV_NETMAP
813	em_netmap_attach(adapter);
814#endif /* DEV_NETMAP */
815
816	INIT_DEBUGOUT("em_attach: end");
817
818	return (0);
819
820err_late:
821	em_free_transmit_structures(adapter);
822	em_free_receive_structures(adapter);
823	em_release_hw_control(adapter);
824	if (adapter->ifp != (void *)NULL)
825		if_free(adapter->ifp);
826err_pci:
827	em_free_pci_resources(adapter);
828	free(adapter->mta, M_DEVBUF);
829	EM_CORE_LOCK_DESTROY(adapter);
830
831	return (error);
832}
833
834/*********************************************************************
835 *  Device removal routine
836 *
837 *  The detach entry point is called when the driver is being removed.
838 *  This routine stops the adapter and deallocates all the resources
839 *  that were allocated for driver operation.
840 *
841 *  return 0 on success, positive on failure
842 *********************************************************************/
843
844static int
845em_detach(device_t dev)
846{
847	struct adapter	*adapter = device_get_softc(dev);
848	if_t ifp = adapter->ifp;
849
850	INIT_DEBUGOUT("em_detach: begin");
851
852	/* Make sure VLANS are not using driver */
853	if (if_vlantrunkinuse(ifp)) {
854		device_printf(dev,"Vlan in use, detach first\n");
855		return (EBUSY);
856	}
857
858#ifdef DEVICE_POLLING
859	if (if_getcapenable(ifp) & IFCAP_POLLING)
860		ether_poll_deregister(ifp);
861#endif
862
863	if (adapter->led_dev != NULL)
864		led_destroy(adapter->led_dev);
865
866	EM_CORE_LOCK(adapter);
867	adapter->in_detach = 1;
868	em_stop(adapter);
869	EM_CORE_UNLOCK(adapter);
870	EM_CORE_LOCK_DESTROY(adapter);
871
872	e1000_phy_hw_reset(&adapter->hw);
873
874	em_release_manageability(adapter);
875	em_release_hw_control(adapter);
876
877	/* Unregister VLAN events */
878	if (adapter->vlan_attach != NULL)
879		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
880	if (adapter->vlan_detach != NULL)
881		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
882
883	ether_ifdetach(adapter->ifp);
884	callout_drain(&adapter->timer);
885
886#ifdef DEV_NETMAP
887	netmap_detach(ifp);
888#endif /* DEV_NETMAP */
889
890	em_free_pci_resources(adapter);
891	bus_generic_detach(dev);
892	if_free(ifp);
893
894	em_free_transmit_structures(adapter);
895	em_free_receive_structures(adapter);
896
897	em_release_hw_control(adapter);
898	free(adapter->mta, M_DEVBUF);
899
900	return (0);
901}
902
903/*********************************************************************
904 *
905 *  Shutdown entry point
906 *
907 **********************************************************************/
908
909static int
910em_shutdown(device_t dev)
911{
912	return em_suspend(dev);
913}
914
915/*
916 * Suspend/resume device methods.
917 */
918static int
919em_suspend(device_t dev)
920{
921	struct adapter *adapter = device_get_softc(dev);
922
923	EM_CORE_LOCK(adapter);
924
925        em_release_manageability(adapter);
926	em_release_hw_control(adapter);
927	em_enable_wakeup(dev);
928
929	EM_CORE_UNLOCK(adapter);
930
931	return bus_generic_suspend(dev);
932}
933
934static int
935em_resume(device_t dev)
936{
937	struct adapter *adapter = device_get_softc(dev);
938	struct tx_ring	*txr = adapter->tx_rings;
939	if_t ifp = adapter->ifp;
940
941	EM_CORE_LOCK(adapter);
942	if (adapter->hw.mac.type == e1000_pch2lan)
943		e1000_resume_workarounds_pchlan(&adapter->hw);
944	em_init_locked(adapter);
945	em_init_manageability(adapter);
946
947	if ((if_getflags(ifp) & IFF_UP) &&
948	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
949		for (int i = 0; i < adapter->num_queues; i++, txr++) {
950			EM_TX_LOCK(txr);
951#ifdef EM_MULTIQUEUE
952			if (!drbr_empty(ifp, txr->br))
953				em_mq_start_locked(ifp, txr);
954#else
955			if (!if_sendq_empty(ifp))
956				em_start_locked(ifp, txr);
957#endif
958			EM_TX_UNLOCK(txr);
959		}
960	}
961	EM_CORE_UNLOCK(adapter);
962
963	return bus_generic_resume(dev);
964}
965
966
967#ifndef EM_MULTIQUEUE
968static void
969em_start_locked(if_t ifp, struct tx_ring *txr)
970{
971	struct adapter	*adapter = if_getsoftc(ifp);
972	struct mbuf	*m_head;
973
974	EM_TX_LOCK_ASSERT(txr);
975
976	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
977	    IFF_DRV_RUNNING)
978		return;
979
980	if (!adapter->link_active)
981		return;
982
983	while (!if_sendq_empty(ifp)) {
984        	/* Call cleanup if number of TX descriptors low */
985		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
986			em_txeof(txr);
987		if (txr->tx_avail < EM_MAX_SCATTER) {
988			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
989			break;
990		}
991		m_head = if_dequeue(ifp);
992		if (m_head == NULL)
993			break;
994		/*
995		 *  Encapsulation can modify our pointer, and or make it
996		 *  NULL on failure.  In that event, we can't requeue.
997		 */
998		if (em_xmit(txr, &m_head)) {
999			if (m_head == NULL)
1000				break;
1001			if_sendq_prepend(ifp, m_head);
1002			break;
1003		}
1004
1005		/* Mark the queue as having work */
1006		if (txr->busy == EM_TX_IDLE)
1007			txr->busy = EM_TX_BUSY;
1008
1009		/* Send a copy of the frame to the BPF listener */
1010		ETHER_BPF_MTAP(ifp, m_head);
1011
1012	}
1013
1014	return;
1015}
1016
1017static void
1018em_start(if_t ifp)
1019{
1020	struct adapter	*adapter = if_getsoftc(ifp);
1021	struct tx_ring	*txr = adapter->tx_rings;
1022
1023	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1024		EM_TX_LOCK(txr);
1025		em_start_locked(ifp, txr);
1026		EM_TX_UNLOCK(txr);
1027	}
1028	return;
1029}
1030#else /* EM_MULTIQUEUE */
1031/*********************************************************************
1032 *  Multiqueue Transmit routines
1033 *
1034 *  em_mq_start is called by the stack to initiate a transmit.
1035 *  however, if busy the driver can queue the request rather
1036 *  than do an immediate send. It is this that is an advantage
1037 *  in this driver, rather than also having multiple tx queues.
1038 **********************************************************************/
1039/*
1040** Multiqueue capable stack interface
1041*/
1042static int
1043em_mq_start(if_t ifp, struct mbuf *m)
1044{
1045	struct adapter	*adapter = if_getsoftc(ifp);
1046	struct tx_ring	*txr = adapter->tx_rings;
1047	unsigned int	i, error;
1048
1049	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1050		i = m->m_pkthdr.flowid % adapter->num_queues;
1051	else
1052		i = curcpu % adapter->num_queues;
1053
1054	txr = &adapter->tx_rings[i];
1055
1056	error = drbr_enqueue(ifp, txr->br, m);
1057	if (error)
1058		return (error);
1059
1060	if (EM_TX_TRYLOCK(txr)) {
1061		em_mq_start_locked(ifp, txr);
1062		EM_TX_UNLOCK(txr);
1063	} else
1064		taskqueue_enqueue(txr->tq, &txr->tx_task);
1065
1066	return (0);
1067}
1068
1069static int
1070em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1071{
1072	struct adapter  *adapter = txr->adapter;
1073        struct mbuf     *next;
1074        int             err = 0, enq = 0;
1075
1076	EM_TX_LOCK_ASSERT(txr);
1077
1078	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1079	    adapter->link_active == 0) {
1080		return (ENETDOWN);
1081	}
1082
1083	/* Process the queue */
1084	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1085		if ((err = em_xmit(txr, &next)) != 0) {
1086			if (next == NULL) {
1087				/* It was freed, move forward */
1088				drbr_advance(ifp, txr->br);
1089			} else {
1090				/*
1091				 * Still have one left, it may not be
1092				 * the same since the transmit function
1093				 * may have changed it.
1094				 */
1095				drbr_putback(ifp, txr->br, next);
1096			}
1097			break;
1098		}
1099		drbr_advance(ifp, txr->br);
1100		enq++;
1101		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1102		if (next->m_flags & M_MCAST)
1103			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1104		ETHER_BPF_MTAP(ifp, next);
1105		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1106                        break;
1107	}
1108
1109	/* Mark the queue as having work */
1110	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1111		txr->busy = EM_TX_BUSY;
1112
1113	if (txr->tx_avail < EM_MAX_SCATTER)
1114		em_txeof(txr);
1115	if (txr->tx_avail < EM_MAX_SCATTER) {
1116		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1117	}
1118	return (err);
1119}
1120
1121/*
1122** Flush all ring buffers
1123*/
1124static void
1125em_qflush(if_t ifp)
1126{
1127	struct adapter  *adapter = if_getsoftc(ifp);
1128	struct tx_ring  *txr = adapter->tx_rings;
1129	struct mbuf     *m;
1130
1131	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1132		EM_TX_LOCK(txr);
1133		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1134			m_freem(m);
1135		EM_TX_UNLOCK(txr);
1136	}
1137	if_qflush(ifp);
1138}
1139#endif /* EM_MULTIQUEUE */
1140
1141/*********************************************************************
1142 *  Ioctl entry point
1143 *
1144 *  em_ioctl is called when the user wants to configure the
1145 *  interface.
1146 *
1147 *  return 0 on success, positive on failure
1148 **********************************************************************/
1149
1150static int
1151em_ioctl(if_t ifp, u_long command, caddr_t data)
1152{
1153	struct adapter	*adapter = if_getsoftc(ifp);
1154	struct ifreq	*ifr = (struct ifreq *)data;
1155#if defined(INET) || defined(INET6)
1156	struct ifaddr	*ifa = (struct ifaddr *)data;
1157#endif
1158	bool		avoid_reset = FALSE;
1159	int		error = 0;
1160
1161	if (adapter->in_detach)
1162		return (error);
1163
1164	switch (command) {
1165	case SIOCSIFADDR:
1166#ifdef INET
1167		if (ifa->ifa_addr->sa_family == AF_INET)
1168			avoid_reset = TRUE;
1169#endif
1170#ifdef INET6
1171		if (ifa->ifa_addr->sa_family == AF_INET6)
1172			avoid_reset = TRUE;
1173#endif
1174		/*
1175		** Calling init results in link renegotiation,
1176		** so we avoid doing it when possible.
1177		*/
1178		if (avoid_reset) {
1179			if_setflagbits(ifp,IFF_UP,0);
1180			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1181				em_init(adapter);
1182#ifdef INET
1183			if (!(if_getflags(ifp) & IFF_NOARP))
1184				arp_ifinit(ifp, ifa);
1185#endif
1186		} else
1187			error = ether_ioctl(ifp, command, data);
1188		break;
1189	case SIOCSIFMTU:
1190	    {
1191		int max_frame_size;
1192
1193		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1194
1195		EM_CORE_LOCK(adapter);
1196		switch (adapter->hw.mac.type) {
1197		case e1000_82571:
1198		case e1000_82572:
1199		case e1000_ich9lan:
1200		case e1000_ich10lan:
1201		case e1000_pch2lan:
1202		case e1000_pch_lpt:
1203		case e1000_pch_spt:
1204		case e1000_82574:
1205		case e1000_82583:
1206		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1207			max_frame_size = 9234;
1208			break;
1209		case e1000_pchlan:
1210			max_frame_size = 4096;
1211			break;
1212			/* Adapters that do not support jumbo frames */
1213		case e1000_ich8lan:
1214			max_frame_size = ETHER_MAX_LEN;
1215			break;
1216		default:
1217			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1218		}
1219		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1220		    ETHER_CRC_LEN) {
1221			EM_CORE_UNLOCK(adapter);
1222			error = EINVAL;
1223			break;
1224		}
1225
1226		if_setmtu(ifp, ifr->ifr_mtu);
1227		adapter->hw.mac.max_frame_size =
1228		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1229		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1230			em_init_locked(adapter);
1231		EM_CORE_UNLOCK(adapter);
1232		break;
1233	    }
1234	case SIOCSIFFLAGS:
1235		IOCTL_DEBUGOUT("ioctl rcv'd:\
1236		    SIOCSIFFLAGS (Set Interface Flags)");
1237		EM_CORE_LOCK(adapter);
1238		if (if_getflags(ifp) & IFF_UP) {
1239			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1240				if ((if_getflags(ifp) ^ adapter->if_flags) &
1241				    (IFF_PROMISC | IFF_ALLMULTI)) {
1242					em_disable_promisc(adapter);
1243					em_set_promisc(adapter);
1244				}
1245			} else
1246				em_init_locked(adapter);
1247		} else
1248			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1249				em_stop(adapter);
1250		adapter->if_flags = if_getflags(ifp);
1251		EM_CORE_UNLOCK(adapter);
1252		break;
1253	case SIOCADDMULTI:
1254	case SIOCDELMULTI:
1255		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1256		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1257			EM_CORE_LOCK(adapter);
1258			em_disable_intr(adapter);
1259			em_set_multi(adapter);
1260#ifdef DEVICE_POLLING
1261			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1262#endif
1263				em_enable_intr(adapter);
1264			EM_CORE_UNLOCK(adapter);
1265		}
1266		break;
1267	case SIOCSIFMEDIA:
1268		/* Check SOL/IDER usage */
1269		EM_CORE_LOCK(adapter);
1270		if (e1000_check_reset_block(&adapter->hw)) {
1271			EM_CORE_UNLOCK(adapter);
1272			device_printf(adapter->dev, "Media change is"
1273			    " blocked due to SOL/IDER session.\n");
1274			break;
1275		}
1276		EM_CORE_UNLOCK(adapter);
1277		/* falls thru */
1278	case SIOCGIFMEDIA:
1279		IOCTL_DEBUGOUT("ioctl rcv'd: \
1280		    SIOCxIFMEDIA (Get/Set Interface Media)");
1281		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1282		break;
1283	case SIOCSIFCAP:
1284	    {
1285		int mask, reinit;
1286
1287		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1288		reinit = 0;
1289		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1290#ifdef DEVICE_POLLING
1291		if (mask & IFCAP_POLLING) {
1292			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1293				error = ether_poll_register(em_poll, ifp);
1294				if (error)
1295					return (error);
1296				EM_CORE_LOCK(adapter);
1297				em_disable_intr(adapter);
1298				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1299				EM_CORE_UNLOCK(adapter);
1300			} else {
1301				error = ether_poll_deregister(ifp);
1302				/* Enable interrupt even in error case */
1303				EM_CORE_LOCK(adapter);
1304				em_enable_intr(adapter);
1305				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1306				EM_CORE_UNLOCK(adapter);
1307			}
1308		}
1309#endif
1310		if (mask & IFCAP_HWCSUM) {
1311			if_togglecapenable(ifp,IFCAP_HWCSUM);
1312			reinit = 1;
1313		}
1314		if (mask & IFCAP_TSO4) {
1315			if_togglecapenable(ifp,IFCAP_TSO4);
1316			reinit = 1;
1317		}
1318		if (mask & IFCAP_VLAN_HWTAGGING) {
1319			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1320			reinit = 1;
1321		}
1322		if (mask & IFCAP_VLAN_HWFILTER) {
1323			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1324			reinit = 1;
1325		}
1326		if (mask & IFCAP_VLAN_HWTSO) {
1327			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1328			reinit = 1;
1329		}
1330		if ((mask & IFCAP_WOL) &&
1331		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1332			if (mask & IFCAP_WOL_MCAST)
1333				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1334			if (mask & IFCAP_WOL_MAGIC)
1335				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1336		}
1337		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1338			em_init(adapter);
1339		if_vlancap(ifp);
1340		break;
1341	    }
1342
1343	default:
1344		error = ether_ioctl(ifp, command, data);
1345		break;
1346	}
1347
1348	return (error);
1349}
1350
1351
1352/*********************************************************************
1353 *  Init entry point
1354 *
1355 *  This routine is used in two ways. It is used by the stack as
1356 *  init entry point in network interface structure. It is also used
1357 *  by the driver as a hw/sw initialization routine to get to a
1358 *  consistent state.
1359 *
1360 *  return 0 on success, positive on failure
1361 **********************************************************************/
1362
1363static void
1364em_init_locked(struct adapter *adapter)
1365{
1366	if_t ifp = adapter->ifp;
1367	device_t	dev = adapter->dev;
1368
1369	INIT_DEBUGOUT("em_init: begin");
1370
1371	EM_CORE_LOCK_ASSERT(adapter);
1372
1373	em_disable_intr(adapter);
1374	callout_stop(&adapter->timer);
1375
1376	/* Get the latest mac address, User can use a LAA */
1377        bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1378              ETHER_ADDR_LEN);
1379
1380	/* Put the address into the Receive Address Array */
1381	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1382
1383	/*
1384	 * With the 82571 adapter, RAR[0] may be overwritten
1385	 * when the other port is reset, we make a duplicate
1386	 * in RAR[14] for that eventuality, this assures
1387	 * the interface continues to function.
1388	 */
1389	if (adapter->hw.mac.type == e1000_82571) {
1390		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1391		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1392		    E1000_RAR_ENTRIES - 1);
1393	}
1394
1395	/* Initialize the hardware */
1396	em_reset(adapter);
1397	em_update_link_status(adapter);
1398
1399	/* Setup VLAN support, basic and offload if available */
1400	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1401
1402	/* Set hardware offload abilities */
1403	if_clearhwassist(ifp);
1404	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1405		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1406	/*
1407	** There have proven to be problems with TSO when not
1408	** at full gigabit speed, so disable the assist automatically
1409	** when at lower speeds.  -jfv
1410	*/
1411	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1412		if (adapter->link_speed == SPEED_1000)
1413			if_sethwassistbits(ifp, CSUM_TSO, 0);
1414	}
1415
1416	/* Configure for OS presence */
1417	em_init_manageability(adapter);
1418
1419	/* Prepare transmit descriptors and buffers */
1420	em_setup_transmit_structures(adapter);
1421	em_initialize_transmit_unit(adapter);
1422
1423	/* Setup Multicast table */
1424	em_set_multi(adapter);
1425
1426	/*
1427	** Figure out the desired mbuf
1428	** pool for doing jumbos
1429	*/
1430	if (adapter->hw.mac.max_frame_size <= 2048)
1431		adapter->rx_mbuf_sz = MCLBYTES;
1432	else if (adapter->hw.mac.max_frame_size <= 4096)
1433		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1434	else
1435		adapter->rx_mbuf_sz = MJUM9BYTES;
1436
1437	/* Prepare receive descriptors and buffers */
1438	if (em_setup_receive_structures(adapter)) {
1439		device_printf(dev, "Could not setup receive structures\n");
1440		em_stop(adapter);
1441		return;
1442	}
1443	em_initialize_receive_unit(adapter);
1444
1445	/* Use real VLAN Filter support? */
1446	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1447		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1448			/* Use real VLAN Filter support */
1449			em_setup_vlan_hw_support(adapter);
1450		else {
1451			u32 ctrl;
1452			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1453			ctrl |= E1000_CTRL_VME;
1454			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1455		}
1456	}
1457
1458	/* Don't lose promiscuous settings */
1459	em_set_promisc(adapter);
1460
1461	/* Set the interface as ACTIVE */
1462	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1463
1464	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1465	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1466
1467	/* MSI/X configuration for 82574 */
1468	if (adapter->hw.mac.type == e1000_82574) {
1469		int tmp;
1470		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1471		tmp |= E1000_CTRL_EXT_PBA_CLR;
1472		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1473		/* Set the IVAR - interrupt vector routing. */
1474		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1475	}
1476
1477#ifdef DEVICE_POLLING
1478	/*
1479	 * Only enable interrupts if we are not polling, make sure
1480	 * they are off otherwise.
1481	 */
1482	if (if_getcapenable(ifp) & IFCAP_POLLING)
1483		em_disable_intr(adapter);
1484	else
1485#endif /* DEVICE_POLLING */
1486		em_enable_intr(adapter);
1487
1488	/* AMT based hardware can now take control from firmware */
1489	if (adapter->has_manage && adapter->has_amt)
1490		em_get_hw_control(adapter);
1491}
1492
1493static void
1494em_init(void *arg)
1495{
1496	struct adapter *adapter = arg;
1497
1498	EM_CORE_LOCK(adapter);
1499	em_init_locked(adapter);
1500	EM_CORE_UNLOCK(adapter);
1501}
1502
1503
1504#ifdef DEVICE_POLLING
1505/*********************************************************************
1506 *
1507 *  Legacy polling routine: note this only works with single queue
1508 *
1509 *********************************************************************/
1510static int
1511em_poll(if_t ifp, enum poll_cmd cmd, int count)
1512{
1513	struct adapter *adapter = if_getsoftc(ifp);
1514	struct tx_ring	*txr = adapter->tx_rings;
1515	struct rx_ring	*rxr = adapter->rx_rings;
1516	u32		reg_icr;
1517	int		rx_done;
1518
1519	EM_CORE_LOCK(adapter);
1520	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1521		EM_CORE_UNLOCK(adapter);
1522		return (0);
1523	}
1524
1525	if (cmd == POLL_AND_CHECK_STATUS) {
1526		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1527		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1528			callout_stop(&adapter->timer);
1529			adapter->hw.mac.get_link_status = 1;
1530			em_update_link_status(adapter);
1531			callout_reset(&adapter->timer, hz,
1532			    em_local_timer, adapter);
1533		}
1534	}
1535	EM_CORE_UNLOCK(adapter);
1536
1537	em_rxeof(rxr, count, &rx_done);
1538
1539	EM_TX_LOCK(txr);
1540	em_txeof(txr);
1541#ifdef EM_MULTIQUEUE
1542	if (!drbr_empty(ifp, txr->br))
1543		em_mq_start_locked(ifp, txr);
1544#else
1545	if (!if_sendq_empty(ifp))
1546		em_start_locked(ifp, txr);
1547#endif
1548	EM_TX_UNLOCK(txr);
1549
1550	return (rx_done);
1551}
1552#endif /* DEVICE_POLLING */
1553
1554
1555/*********************************************************************
1556 *
1557 *  Fast Legacy/MSI Combined Interrupt Service routine
1558 *
1559 *********************************************************************/
1560static int
1561em_irq_fast(void *arg)
1562{
1563	struct adapter	*adapter = arg;
1564	if_t ifp;
1565	u32		reg_icr;
1566
1567	ifp = adapter->ifp;
1568
1569	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1570
1571	/* Hot eject?  */
1572	if (reg_icr == 0xffffffff)
1573		return FILTER_STRAY;
1574
1575	/* Definitely not our interrupt.  */
1576	if (reg_icr == 0x0)
1577		return FILTER_STRAY;
1578
1579	/*
1580	 * Starting with the 82571 chip, bit 31 should be used to
1581	 * determine whether the interrupt belongs to us.
1582	 */
1583	if (adapter->hw.mac.type >= e1000_82571 &&
1584	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1585		return FILTER_STRAY;
1586
1587	em_disable_intr(adapter);
1588	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1589
1590	/* Link status change */
1591	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1592		adapter->hw.mac.get_link_status = 1;
1593		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1594	}
1595
1596	if (reg_icr & E1000_ICR_RXO)
1597		adapter->rx_overruns++;
1598	return FILTER_HANDLED;
1599}
1600
1601/* Combined RX/TX handler, used by Legacy and MSI */
1602static void
1603em_handle_que(void *context, int pending)
1604{
1605	struct adapter	*adapter = context;
1606	if_t ifp = adapter->ifp;
1607	struct tx_ring	*txr = adapter->tx_rings;
1608	struct rx_ring	*rxr = adapter->rx_rings;
1609
1610	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1611		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1612
1613		EM_TX_LOCK(txr);
1614		em_txeof(txr);
1615#ifdef EM_MULTIQUEUE
1616		if (!drbr_empty(ifp, txr->br))
1617			em_mq_start_locked(ifp, txr);
1618#else
1619		if (!if_sendq_empty(ifp))
1620			em_start_locked(ifp, txr);
1621#endif
1622		EM_TX_UNLOCK(txr);
1623		if (more) {
1624			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1625			return;
1626		}
1627	}
1628
1629	em_enable_intr(adapter);
1630	return;
1631}
1632
1633
1634/*********************************************************************
1635 *
1636 *  MSIX Interrupt Service Routines
1637 *
1638 **********************************************************************/
1639static void
1640em_msix_tx(void *arg)
1641{
1642	struct tx_ring *txr = arg;
1643	struct adapter *adapter = txr->adapter;
1644	if_t ifp = adapter->ifp;
1645
1646	++txr->tx_irq;
1647	EM_TX_LOCK(txr);
1648	em_txeof(txr);
1649#ifdef EM_MULTIQUEUE
1650	if (!drbr_empty(ifp, txr->br))
1651		em_mq_start_locked(ifp, txr);
1652#else
1653	if (!if_sendq_empty(ifp))
1654		em_start_locked(ifp, txr);
1655#endif
1656
1657	/* Reenable this interrupt */
1658	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1659	EM_TX_UNLOCK(txr);
1660	return;
1661}
1662
1663/*********************************************************************
1664 *
1665 *  MSIX RX Interrupt Service routine
1666 *
1667 **********************************************************************/
1668
1669static void
1670em_msix_rx(void *arg)
1671{
1672	struct rx_ring	*rxr = arg;
1673	struct adapter	*adapter = rxr->adapter;
1674	bool		more;
1675
1676	++rxr->rx_irq;
1677	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1678		return;
1679	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1680	if (more)
1681		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1682	else {
1683		/* Reenable this interrupt */
1684		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1685	}
1686	return;
1687}
1688
1689/*********************************************************************
1690 *
1691 *  MSIX Link Fast Interrupt Service routine
1692 *
1693 **********************************************************************/
1694static void
1695em_msix_link(void *arg)
1696{
1697	struct adapter	*adapter = arg;
1698	u32		reg_icr;
1699
1700	++adapter->link_irq;
1701	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1702
1703	if (reg_icr & E1000_ICR_RXO)
1704		adapter->rx_overruns++;
1705
1706	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1707		adapter->hw.mac.get_link_status = 1;
1708		em_handle_link(adapter, 0);
1709	} else
1710		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1711		    EM_MSIX_LINK | E1000_IMS_LSC);
1712	/*
1713 	** Because we must read the ICR for this interrupt
1714 	** it may clear other causes using autoclear, for
1715 	** this reason we simply create a soft interrupt
1716 	** for all these vectors.
1717 	*/
1718	if (reg_icr) {
1719		E1000_WRITE_REG(&adapter->hw,
1720			E1000_ICS, adapter->ims);
1721	}
1722	return;
1723}
1724
1725static void
1726em_handle_rx(void *context, int pending)
1727{
1728	struct rx_ring	*rxr = context;
1729	struct adapter	*adapter = rxr->adapter;
1730        bool            more;
1731
1732	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1733	if (more)
1734		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1735	else {
1736		/* Reenable this interrupt */
1737		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1738	}
1739}
1740
1741static void
1742em_handle_tx(void *context, int pending)
1743{
1744	struct tx_ring	*txr = context;
1745	struct adapter	*adapter = txr->adapter;
1746	if_t ifp = adapter->ifp;
1747
1748	EM_TX_LOCK(txr);
1749	em_txeof(txr);
1750#ifdef EM_MULTIQUEUE
1751	if (!drbr_empty(ifp, txr->br))
1752		em_mq_start_locked(ifp, txr);
1753#else
1754	if (!if_sendq_empty(ifp))
1755		em_start_locked(ifp, txr);
1756#endif
1757	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1758	EM_TX_UNLOCK(txr);
1759}
1760
1761static void
1762em_handle_link(void *context, int pending)
1763{
1764	struct adapter	*adapter = context;
1765	struct tx_ring	*txr = adapter->tx_rings;
1766	if_t ifp = adapter->ifp;
1767
1768	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1769		return;
1770
1771	EM_CORE_LOCK(adapter);
1772	callout_stop(&adapter->timer);
1773	em_update_link_status(adapter);
1774	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1775	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1776	    EM_MSIX_LINK | E1000_IMS_LSC);
1777	if (adapter->link_active) {
1778		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1779			EM_TX_LOCK(txr);
1780#ifdef EM_MULTIQUEUE
1781			if (!drbr_empty(ifp, txr->br))
1782				em_mq_start_locked(ifp, txr);
1783#else
1784			if (if_sendq_empty(ifp))
1785				em_start_locked(ifp, txr);
1786#endif
1787			EM_TX_UNLOCK(txr);
1788		}
1789	}
1790	EM_CORE_UNLOCK(adapter);
1791}
1792
1793
1794/*********************************************************************
1795 *
1796 *  Media Ioctl callback
1797 *
1798 *  This routine is called whenever the user queries the status of
1799 *  the interface using ifconfig.
1800 *
1801 **********************************************************************/
1802static void
1803em_media_status(if_t ifp, struct ifmediareq *ifmr)
1804{
1805	struct adapter *adapter = if_getsoftc(ifp);
1806	u_char fiber_type = IFM_1000_SX;
1807
1808	INIT_DEBUGOUT("em_media_status: begin");
1809
1810	EM_CORE_LOCK(adapter);
1811	em_update_link_status(adapter);
1812
1813	ifmr->ifm_status = IFM_AVALID;
1814	ifmr->ifm_active = IFM_ETHER;
1815
1816	if (!adapter->link_active) {
1817		EM_CORE_UNLOCK(adapter);
1818		return;
1819	}
1820
1821	ifmr->ifm_status |= IFM_ACTIVE;
1822
1823	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1824	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1825		ifmr->ifm_active |= fiber_type | IFM_FDX;
1826	} else {
1827		switch (adapter->link_speed) {
1828		case 10:
1829			ifmr->ifm_active |= IFM_10_T;
1830			break;
1831		case 100:
1832			ifmr->ifm_active |= IFM_100_TX;
1833			break;
1834		case 1000:
1835			ifmr->ifm_active |= IFM_1000_T;
1836			break;
1837		}
1838		if (adapter->link_duplex == FULL_DUPLEX)
1839			ifmr->ifm_active |= IFM_FDX;
1840		else
1841			ifmr->ifm_active |= IFM_HDX;
1842	}
1843	EM_CORE_UNLOCK(adapter);
1844}
1845
1846/*********************************************************************
1847 *
1848 *  Media Ioctl callback
1849 *
1850 *  This routine is called when the user changes speed/duplex using
1851 *  media/mediopt option with ifconfig.
1852 *
1853 **********************************************************************/
1854static int
1855em_media_change(if_t ifp)
1856{
1857	struct adapter *adapter = if_getsoftc(ifp);
1858	struct ifmedia  *ifm = &adapter->media;
1859
1860	INIT_DEBUGOUT("em_media_change: begin");
1861
1862	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1863		return (EINVAL);
1864
1865	EM_CORE_LOCK(adapter);
1866	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1867	case IFM_AUTO:
1868		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1870		break;
1871	case IFM_1000_LX:
1872	case IFM_1000_SX:
1873	case IFM_1000_T:
1874		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1875		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1876		break;
1877	case IFM_100_TX:
1878		adapter->hw.mac.autoneg = FALSE;
1879		adapter->hw.phy.autoneg_advertised = 0;
1880		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1881			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1882		else
1883			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1884		break;
1885	case IFM_10_T:
1886		adapter->hw.mac.autoneg = FALSE;
1887		adapter->hw.phy.autoneg_advertised = 0;
1888		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1889			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1890		else
1891			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1892		break;
1893	default:
1894		device_printf(adapter->dev, "Unsupported media type\n");
1895	}
1896
1897	em_init_locked(adapter);
1898	EM_CORE_UNLOCK(adapter);
1899
1900	return (0);
1901}
1902
1903/*********************************************************************
1904 *
1905 *  This routine maps the mbufs to tx descriptors.
1906 *
1907 *  return 0 on success, positive on failure
1908 **********************************************************************/
1909
1910static int
1911em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1912{
1913	struct adapter		*adapter = txr->adapter;
1914	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1915	bus_dmamap_t		map;
1916	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1917	struct e1000_tx_desc	*ctxd = NULL;
1918	struct mbuf		*m_head;
1919	struct ether_header	*eh;
1920	struct ip		*ip = NULL;
1921	struct tcphdr		*tp = NULL;
1922	u32			txd_upper = 0, txd_lower = 0;
1923	int			ip_off, poff;
1924	int			nsegs, i, j, first, last = 0;
1925	int			error;
1926	bool			do_tso, tso_desc, remap = TRUE;
1927
1928	m_head = *m_headp;
1929	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1930	tso_desc = FALSE;
1931	ip_off = poff = 0;
1932
1933	/*
1934	 * Intel recommends entire IP/TCP header length reside in a single
1935	 * buffer. If multiple descriptors are used to describe the IP and
1936	 * TCP header, each descriptor should describe one or more
1937	 * complete headers; descriptors referencing only parts of headers
1938	 * are not supported. If all layer headers are not coalesced into
1939	 * a single buffer, each buffer should not cross a 4KB boundary,
1940	 * or be larger than the maximum read request size.
1941	 * Controller also requires modifing IP/TCP header to make TSO work
1942	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1943	 * IP/TCP header into a single buffer to meet the requirement of
1944	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1945	 * which also has similar restrictions.
1946	 */
1947	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1948		if (do_tso || (m_head->m_next != NULL &&
1949		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1950			if (M_WRITABLE(*m_headp) == 0) {
1951				m_head = m_dup(*m_headp, M_NOWAIT);
1952				m_freem(*m_headp);
1953				if (m_head == NULL) {
1954					*m_headp = NULL;
1955					return (ENOBUFS);
1956				}
1957				*m_headp = m_head;
1958			}
1959		}
1960		/*
1961		 * XXX
1962		 * Assume IPv4, we don't have TSO/checksum offload support
1963		 * for IPv6 yet.
1964		 */
1965		ip_off = sizeof(struct ether_header);
1966		if (m_head->m_len < ip_off) {
1967			m_head = m_pullup(m_head, ip_off);
1968			if (m_head == NULL) {
1969				*m_headp = NULL;
1970				return (ENOBUFS);
1971			}
1972		}
1973		eh = mtod(m_head, struct ether_header *);
1974		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1975			ip_off = sizeof(struct ether_vlan_header);
1976			if (m_head->m_len < ip_off) {
1977				m_head = m_pullup(m_head, ip_off);
1978				if (m_head == NULL) {
1979					*m_headp = NULL;
1980					return (ENOBUFS);
1981				}
1982			}
1983		}
1984		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1985			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1986			if (m_head == NULL) {
1987				*m_headp = NULL;
1988				return (ENOBUFS);
1989			}
1990		}
1991		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1992		poff = ip_off + (ip->ip_hl << 2);
1993
1994		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1995			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1996				m_head = m_pullup(m_head, poff +
1997				    sizeof(struct tcphdr));
1998				if (m_head == NULL) {
1999					*m_headp = NULL;
2000					return (ENOBUFS);
2001				}
2002			}
2003			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2004			/*
2005			 * TSO workaround:
2006			 *   pull 4 more bytes of data into it.
2007			 */
2008			if (m_head->m_len < poff + (tp->th_off << 2)) {
2009				m_head = m_pullup(m_head, poff +
2010				                 (tp->th_off << 2) +
2011				                 TSO_WORKAROUND);
2012				if (m_head == NULL) {
2013					*m_headp = NULL;
2014					return (ENOBUFS);
2015				}
2016			}
2017			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2018			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2019			if (do_tso) {
2020				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2021				                  (ip->ip_hl << 2) +
2022				                  (tp->th_off << 2));
2023				ip->ip_sum = 0;
2024				/*
2025				 * The pseudo TCP checksum does not include TCP
2026				 * payload length so driver should recompute
2027				 * the checksum here what hardware expect to
2028				 * see. This is adherence of Microsoft's Large
2029				 * Send specification.
2030			 	*/
2031				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2032				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2033			}
2034		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2035			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2036				m_head = m_pullup(m_head, poff +
2037				    sizeof(struct udphdr));
2038				if (m_head == NULL) {
2039					*m_headp = NULL;
2040					return (ENOBUFS);
2041				}
2042			}
2043			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2044		}
2045		*m_headp = m_head;
2046	}
2047
2048	/*
2049	 * Map the packet for DMA
2050	 *
2051	 * Capture the first descriptor index,
2052	 * this descriptor will have the index
2053	 * of the EOP which is the only one that
2054	 * now gets a DONE bit writeback.
2055	 */
2056	first = txr->next_avail_desc;
2057	tx_buffer = &txr->tx_buffers[first];
2058	tx_buffer_mapped = tx_buffer;
2059	map = tx_buffer->map;
2060
2061retry:
2062	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2063	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2064
2065	/*
2066	 * There are two types of errors we can (try) to handle:
2067	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2068	 *   out of segments.  Defragment the mbuf chain and try again.
2069	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2070	 *   at this point in time.  Defer sending and try again later.
2071	 * All other errors, in particular EINVAL, are fatal and prevent the
2072	 * mbuf chain from ever going through.  Drop it and report error.
2073	 */
2074	if (error == EFBIG && remap) {
2075		struct mbuf *m;
2076
2077		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2078		if (m == NULL) {
2079			adapter->mbuf_defrag_failed++;
2080			m_freem(*m_headp);
2081			*m_headp = NULL;
2082			return (ENOBUFS);
2083		}
2084		*m_headp = m;
2085
2086		/* Try it again, but only once */
2087		remap = FALSE;
2088		goto retry;
2089	} else if (error != 0) {
2090		adapter->no_tx_dma_setup++;
2091		m_freem(*m_headp);
2092		*m_headp = NULL;
2093		return (error);
2094	}
2095
2096	/*
2097	 * TSO Hardware workaround, if this packet is not
2098	 * TSO, and is only a single descriptor long, and
2099	 * it follows a TSO burst, then we need to add a
2100	 * sentinel descriptor to prevent premature writeback.
2101	 */
2102	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2103		if (nsegs == 1)
2104			tso_desc = TRUE;
2105		txr->tx_tso = FALSE;
2106	}
2107
2108        if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2109                txr->no_desc_avail++;
2110		bus_dmamap_unload(txr->txtag, map);
2111		return (ENOBUFS);
2112        }
2113	m_head = *m_headp;
2114
2115	/* Do hardware assists */
2116	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2117		em_tso_setup(txr, m_head, ip_off, ip, tp,
2118		    &txd_upper, &txd_lower);
2119		/* we need to make a final sentinel transmit desc */
2120		tso_desc = TRUE;
2121	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2122		em_transmit_checksum_setup(txr, m_head,
2123		    ip_off, ip, &txd_upper, &txd_lower);
2124
2125	if (m_head->m_flags & M_VLANTAG) {
2126		/* Set the vlan id. */
2127		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2128                /* Tell hardware to add tag */
2129                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2130        }
2131
2132	i = txr->next_avail_desc;
2133
2134	/* Set up our transmit descriptors */
2135	for (j = 0; j < nsegs; j++) {
2136		bus_size_t seg_len;
2137		bus_addr_t seg_addr;
2138
2139		tx_buffer = &txr->tx_buffers[i];
2140		ctxd = &txr->tx_base[i];
2141		seg_addr = segs[j].ds_addr;
2142		seg_len  = segs[j].ds_len;
2143		/*
2144		** TSO Workaround:
2145		** If this is the last descriptor, we want to
2146		** split it so we have a small final sentinel
2147		*/
2148		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2149			seg_len -= TSO_WORKAROUND;
2150			ctxd->buffer_addr = htole64(seg_addr);
2151			ctxd->lower.data = htole32(
2152				adapter->txd_cmd | txd_lower | seg_len);
2153			ctxd->upper.data = htole32(txd_upper);
2154			if (++i == adapter->num_tx_desc)
2155				i = 0;
2156
2157			/* Now make the sentinel */
2158			txr->tx_avail--;
2159			ctxd = &txr->tx_base[i];
2160			tx_buffer = &txr->tx_buffers[i];
2161			ctxd->buffer_addr =
2162			    htole64(seg_addr + seg_len);
2163			ctxd->lower.data = htole32(
2164			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2165			ctxd->upper.data =
2166			    htole32(txd_upper);
2167			last = i;
2168			if (++i == adapter->num_tx_desc)
2169				i = 0;
2170		} else {
2171			ctxd->buffer_addr = htole64(seg_addr);
2172			ctxd->lower.data = htole32(
2173			adapter->txd_cmd | txd_lower | seg_len);
2174			ctxd->upper.data = htole32(txd_upper);
2175			last = i;
2176			if (++i == adapter->num_tx_desc)
2177				i = 0;
2178		}
2179		tx_buffer->m_head = NULL;
2180		tx_buffer->next_eop = -1;
2181	}
2182
2183	txr->next_avail_desc = i;
2184	txr->tx_avail -= nsegs;
2185
2186        tx_buffer->m_head = m_head;
2187	/*
2188	** Here we swap the map so the last descriptor,
2189	** which gets the completion interrupt has the
2190	** real map, and the first descriptor gets the
2191	** unused map from this descriptor.
2192	*/
2193	tx_buffer_mapped->map = tx_buffer->map;
2194	tx_buffer->map = map;
2195        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2196
2197        /*
2198         * Last Descriptor of Packet
2199	 * needs End Of Packet (EOP)
2200	 * and Report Status (RS)
2201         */
2202        ctxd->lower.data |=
2203	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2204	/*
2205	 * Keep track in the first buffer which
2206	 * descriptor will be written back
2207	 */
2208	tx_buffer = &txr->tx_buffers[first];
2209	tx_buffer->next_eop = last;
2210
2211	/*
2212	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2213	 * that this frame is available to transmit.
2214	 */
2215	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2216	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2217	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2218
2219	return (0);
2220}
2221
2222static void
2223em_set_promisc(struct adapter *adapter)
2224{
2225	if_t ifp = adapter->ifp;
2226	u32		reg_rctl;
2227
2228	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2229
2230	if (if_getflags(ifp) & IFF_PROMISC) {
2231		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2232		/* Turn this on if you want to see bad packets */
2233		if (em_debug_sbp)
2234			reg_rctl |= E1000_RCTL_SBP;
2235		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2236	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2237		reg_rctl |= E1000_RCTL_MPE;
2238		reg_rctl &= ~E1000_RCTL_UPE;
2239		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2240	}
2241}
2242
2243static void
2244em_disable_promisc(struct adapter *adapter)
2245{
2246	if_t		ifp = adapter->ifp;
2247	u32		reg_rctl;
2248	int		mcnt = 0;
2249
2250	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2251	reg_rctl &=  (~E1000_RCTL_UPE);
2252	if (if_getflags(ifp) & IFF_ALLMULTI)
2253		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2254	else
2255		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2256	/* Don't disable if in MAX groups */
2257	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2258		reg_rctl &=  (~E1000_RCTL_MPE);
2259	reg_rctl &=  (~E1000_RCTL_SBP);
2260	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2261}
2262
2263
2264/*********************************************************************
2265 *  Multicast Update
2266 *
2267 *  This routine is called whenever multicast address list is updated.
2268 *
2269 **********************************************************************/
2270
2271static void
2272em_set_multi(struct adapter *adapter)
2273{
2274	if_t ifp = adapter->ifp;
2275	u32 reg_rctl = 0;
2276	u8  *mta; /* Multicast array memory */
2277	int mcnt = 0;
2278
2279	IOCTL_DEBUGOUT("em_set_multi: begin");
2280
2281	mta = adapter->mta;
2282	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2283
2284	if (adapter->hw.mac.type == e1000_82542 &&
2285	    adapter->hw.revision_id == E1000_REVISION_2) {
2286		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2287		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2288			e1000_pci_clear_mwi(&adapter->hw);
2289		reg_rctl |= E1000_RCTL_RST;
2290		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2291		msec_delay(5);
2292	}
2293
2294	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2295
2296	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2297		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2298		reg_rctl |= E1000_RCTL_MPE;
2299		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2300	} else
2301		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2302
2303	if (adapter->hw.mac.type == e1000_82542 &&
2304	    adapter->hw.revision_id == E1000_REVISION_2) {
2305		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2306		reg_rctl &= ~E1000_RCTL_RST;
2307		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2308		msec_delay(5);
2309		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2310			e1000_pci_set_mwi(&adapter->hw);
2311	}
2312}
2313
2314
2315/*********************************************************************
2316 *  Timer routine
2317 *
2318 *  This routine checks for link status and updates statistics.
2319 *
2320 **********************************************************************/
2321
2322static void
2323em_local_timer(void *arg)
2324{
2325	struct adapter	*adapter = arg;
2326	if_t ifp = adapter->ifp;
2327	struct tx_ring	*txr = adapter->tx_rings;
2328	struct rx_ring	*rxr = adapter->rx_rings;
2329	u32		trigger = 0;
2330
2331	EM_CORE_LOCK_ASSERT(adapter);
2332
2333	em_update_link_status(adapter);
2334	em_update_stats_counters(adapter);
2335
2336	/* Reset LAA into RAR[0] on 82571 */
2337	if ((adapter->hw.mac.type == e1000_82571) &&
2338	    e1000_get_laa_state_82571(&adapter->hw))
2339		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2340
2341	/* Mask to use in the irq trigger */
2342	if (adapter->msix_mem) {
2343		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2344			trigger |= rxr->ims;
2345		rxr = adapter->rx_rings;
2346	} else
2347		trigger = E1000_ICS_RXDMT0;
2348
2349	/*
2350	** Check on the state of the TX queue(s), this
2351	** can be done without the lock because its RO
2352	** and the HUNG state will be static if set.
2353	*/
2354	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2355		if (txr->busy == EM_TX_HUNG)
2356			goto hung;
2357		if (txr->busy >= EM_TX_MAXTRIES)
2358			txr->busy = EM_TX_HUNG;
2359		/* Schedule a TX tasklet if needed */
2360		if (txr->tx_avail <= EM_MAX_SCATTER)
2361			taskqueue_enqueue(txr->tq, &txr->tx_task);
2362	}
2363
2364	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2365#ifndef DEVICE_POLLING
2366	/* Trigger an RX interrupt to guarantee mbuf refresh */
2367	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2368#endif
2369	return;
2370hung:
2371	/* Looks like we're hung */
2372	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2373			txr->me);
2374	em_print_debug_info(adapter);
2375	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2376	adapter->watchdog_events++;
2377	em_init_locked(adapter);
2378}
2379
2380
2381static void
2382em_update_link_status(struct adapter *adapter)
2383{
2384	struct e1000_hw *hw = &adapter->hw;
2385	if_t ifp = adapter->ifp;
2386	device_t dev = adapter->dev;
2387	struct tx_ring *txr = adapter->tx_rings;
2388	u32 link_check = 0;
2389
2390	/* Get the cached link value or read phy for real */
2391	switch (hw->phy.media_type) {
2392	case e1000_media_type_copper:
2393		if (hw->mac.get_link_status) {
2394			if (hw->mac.type == e1000_pch_spt)
2395				msec_delay(50);
2396			/* Do the work to read phy */
2397			e1000_check_for_link(hw);
2398			link_check = !hw->mac.get_link_status;
2399			if (link_check) /* ESB2 fix */
2400				e1000_cfg_on_link_up(hw);
2401		} else
2402			link_check = TRUE;
2403		break;
2404	case e1000_media_type_fiber:
2405		e1000_check_for_link(hw);
2406		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2407                                 E1000_STATUS_LU);
2408		break;
2409	case e1000_media_type_internal_serdes:
2410		e1000_check_for_link(hw);
2411		link_check = adapter->hw.mac.serdes_has_link;
2412		break;
2413	default:
2414	case e1000_media_type_unknown:
2415		break;
2416	}
2417
2418	/* Now check for a transition */
2419	if (link_check && (adapter->link_active == 0)) {
2420		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2421		    &adapter->link_duplex);
2422		/* Check if we must disable SPEED_MODE bit on PCI-E */
2423		if ((adapter->link_speed != SPEED_1000) &&
2424		    ((hw->mac.type == e1000_82571) ||
2425		    (hw->mac.type == e1000_82572))) {
2426			int tarc0;
2427			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2428			tarc0 &= ~TARC_SPEED_MODE_BIT;
2429			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2430		}
2431		if (bootverbose)
2432			device_printf(dev, "Link is up %d Mbps %s\n",
2433			    adapter->link_speed,
2434			    ((adapter->link_duplex == FULL_DUPLEX) ?
2435			    "Full Duplex" : "Half Duplex"));
2436		adapter->link_active = 1;
2437		adapter->smartspeed = 0;
2438		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2439		if_link_state_change(ifp, LINK_STATE_UP);
2440	} else if (!link_check && (adapter->link_active == 1)) {
2441		if_setbaudrate(ifp, 0);
2442		adapter->link_speed = 0;
2443		adapter->link_duplex = 0;
2444		if (bootverbose)
2445			device_printf(dev, "Link is Down\n");
2446		adapter->link_active = 0;
2447		/* Link down, disable hang detection */
2448		for (int i = 0; i < adapter->num_queues; i++, txr++)
2449			txr->busy = EM_TX_IDLE;
2450		if_link_state_change(ifp, LINK_STATE_DOWN);
2451	}
2452}
2453
2454/*********************************************************************
2455 *
2456 *  This routine disables all traffic on the adapter by issuing a
2457 *  global reset on the MAC and deallocates TX/RX buffers.
2458 *
2459 *  This routine should always be called with BOTH the CORE
2460 *  and TX locks.
2461 **********************************************************************/
2462
2463static void
2464em_stop(void *arg)
2465{
2466	struct adapter	*adapter = arg;
2467	if_t ifp = adapter->ifp;
2468	struct tx_ring	*txr = adapter->tx_rings;
2469
2470	EM_CORE_LOCK_ASSERT(adapter);
2471
2472	INIT_DEBUGOUT("em_stop: begin");
2473
2474	em_disable_intr(adapter);
2475	callout_stop(&adapter->timer);
2476
2477	/* Tell the stack that the interface is no longer active */
2478	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2479
2480        /* Disarm Hang Detection. */
2481	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2482		EM_TX_LOCK(txr);
2483		txr->busy = EM_TX_IDLE;
2484		EM_TX_UNLOCK(txr);
2485	}
2486
2487	/* I219 needs some special flushing to avoid hangs */
2488	if (adapter->hw.mac.type == e1000_pch_spt)
2489		em_flush_desc_rings(adapter);
2490
2491	e1000_reset_hw(&adapter->hw);
2492	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2493
2494	e1000_led_off(&adapter->hw);
2495	e1000_cleanup_led(&adapter->hw);
2496}
2497
2498
2499/*********************************************************************
2500 *
2501 *  Determine hardware revision.
2502 *
2503 **********************************************************************/
2504static void
2505em_identify_hardware(struct adapter *adapter)
2506{
2507	device_t dev = adapter->dev;
2508
2509	/* Make sure our PCI config space has the necessary stuff set */
2510	pci_enable_busmaster(dev);
2511	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2512
2513	/* Save off the information about this board */
2514	adapter->hw.vendor_id = pci_get_vendor(dev);
2515	adapter->hw.device_id = pci_get_device(dev);
2516	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2517	adapter->hw.subsystem_vendor_id =
2518	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2519	adapter->hw.subsystem_device_id =
2520	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2521
2522	/* Do Shared Code Init and Setup */
2523	if (e1000_set_mac_type(&adapter->hw)) {
2524		device_printf(dev, "Setup init failure\n");
2525		return;
2526	}
2527}
2528
2529static int
2530em_allocate_pci_resources(struct adapter *adapter)
2531{
2532	device_t	dev = adapter->dev;
2533	int		rid;
2534
2535	rid = PCIR_BAR(0);
2536	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2537	    &rid, RF_ACTIVE);
2538	if (adapter->memory == NULL) {
2539		device_printf(dev, "Unable to allocate bus resource: memory\n");
2540		return (ENXIO);
2541	}
2542	adapter->osdep.mem_bus_space_tag =
2543	    rman_get_bustag(adapter->memory);
2544	adapter->osdep.mem_bus_space_handle =
2545	    rman_get_bushandle(adapter->memory);
2546	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2547
2548	adapter->hw.back = &adapter->osdep;
2549
2550	return (0);
2551}
2552
2553/*********************************************************************
2554 *
2555 *  Setup the Legacy or MSI Interrupt handler
2556 *
2557 **********************************************************************/
2558int
2559em_allocate_legacy(struct adapter *adapter)
2560{
2561	device_t dev = adapter->dev;
2562	struct tx_ring	*txr = adapter->tx_rings;
2563	int error, rid = 0;
2564
2565	/* Manually turn off all interrupts */
2566	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2567
2568	if (adapter->msix == 1) /* using MSI */
2569		rid = 1;
2570	/* We allocate a single interrupt resource */
2571	adapter->res = bus_alloc_resource_any(dev,
2572	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2573	if (adapter->res == NULL) {
2574		device_printf(dev, "Unable to allocate bus resource: "
2575		    "interrupt\n");
2576		return (ENXIO);
2577	}
2578
2579	/*
2580	 * Allocate a fast interrupt and the associated
2581	 * deferred processing contexts.
2582	 */
2583	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2584	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2585	    taskqueue_thread_enqueue, &adapter->tq);
2586	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2587	    device_get_nameunit(adapter->dev));
2588	/* Use a TX only tasklet for local timer */
2589	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2590	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2591	    taskqueue_thread_enqueue, &txr->tq);
2592	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2593	    device_get_nameunit(adapter->dev));
2594	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2595	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2596	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2597		device_printf(dev, "Failed to register fast interrupt "
2598			    "handler: %d\n", error);
2599		taskqueue_free(adapter->tq);
2600		adapter->tq = NULL;
2601		return (error);
2602	}
2603
2604	return (0);
2605}
2606
2607/*********************************************************************
2608 *
2609 *  Setup the MSIX Interrupt handlers
2610 *   This is not really Multiqueue, rather
2611 *   its just separate interrupt vectors
2612 *   for TX, RX, and Link.
2613 *
2614 **********************************************************************/
2615int
2616em_allocate_msix(struct adapter *adapter)
2617{
2618	device_t	dev = adapter->dev;
2619	struct		tx_ring *txr = adapter->tx_rings;
2620	struct		rx_ring *rxr = adapter->rx_rings;
2621	int		error, rid, vector = 0;
2622	int		cpu_id = 0;
2623
2624
2625	/* Make sure all interrupts are disabled */
2626	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2627
2628	/* First set up ring resources */
2629	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2630
2631		/* RX ring */
2632		rid = vector + 1;
2633
2634		rxr->res = bus_alloc_resource_any(dev,
2635		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2636		if (rxr->res == NULL) {
2637			device_printf(dev,
2638			    "Unable to allocate bus resource: "
2639			    "RX MSIX Interrupt %d\n", i);
2640			return (ENXIO);
2641		}
2642		if ((error = bus_setup_intr(dev, rxr->res,
2643		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2644		    rxr, &rxr->tag)) != 0) {
2645			device_printf(dev, "Failed to register RX handler");
2646			return (error);
2647		}
2648#if __FreeBSD_version >= 800504
2649		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2650#endif
2651		rxr->msix = vector;
2652
2653		if (em_last_bind_cpu < 0)
2654			em_last_bind_cpu = CPU_FIRST();
2655		cpu_id = em_last_bind_cpu;
2656		bus_bind_intr(dev, rxr->res, cpu_id);
2657
2658		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2659		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2660		    taskqueue_thread_enqueue, &rxr->tq);
2661		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2662		    device_get_nameunit(adapter->dev), cpu_id);
2663		/*
2664		** Set the bit to enable interrupt
2665		** in E1000_IMS -- bits 20 and 21
2666		** are for RX0 and RX1, note this has
2667		** NOTHING to do with the MSIX vector
2668		*/
2669		rxr->ims = 1 << (20 + i);
2670		adapter->ims |= rxr->ims;
2671		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2672
2673		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2674	}
2675
2676	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2677		/* TX ring */
2678		rid = vector + 1;
2679		txr->res = bus_alloc_resource_any(dev,
2680		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2681		if (txr->res == NULL) {
2682			device_printf(dev,
2683			    "Unable to allocate bus resource: "
2684			    "TX MSIX Interrupt %d\n", i);
2685			return (ENXIO);
2686		}
2687		if ((error = bus_setup_intr(dev, txr->res,
2688		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2689		    txr, &txr->tag)) != 0) {
2690			device_printf(dev, "Failed to register TX handler");
2691			return (error);
2692		}
2693#if __FreeBSD_version >= 800504
2694		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2695#endif
2696		txr->msix = vector;
2697
2698                if (em_last_bind_cpu < 0)
2699                        em_last_bind_cpu = CPU_FIRST();
2700                cpu_id = em_last_bind_cpu;
2701                bus_bind_intr(dev, txr->res, cpu_id);
2702
2703		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2704		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2705		    taskqueue_thread_enqueue, &txr->tq);
2706		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2707		    device_get_nameunit(adapter->dev), cpu_id);
2708		/*
2709		** Set the bit to enable interrupt
2710		** in E1000_IMS -- bits 22 and 23
2711		** are for TX0 and TX1, note this has
2712		** NOTHING to do with the MSIX vector
2713		*/
2714		txr->ims = 1 << (22 + i);
2715		adapter->ims |= txr->ims;
2716		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2717
2718		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2719	}
2720
2721	/* Link interrupt */
2722	rid = vector + 1;
2723	adapter->res = bus_alloc_resource_any(dev,
2724	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2725	if (!adapter->res) {
2726		device_printf(dev,"Unable to allocate "
2727		    "bus resource: Link interrupt [%d]\n", rid);
2728		return (ENXIO);
2729        }
2730	/* Set the link handler function */
2731	error = bus_setup_intr(dev, adapter->res,
2732	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2733	    em_msix_link, adapter, &adapter->tag);
2734	if (error) {
2735		adapter->res = NULL;
2736		device_printf(dev, "Failed to register LINK handler");
2737		return (error);
2738	}
2739#if __FreeBSD_version >= 800504
2740	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2741#endif
2742	adapter->linkvec = vector;
2743	adapter->ivars |=  (8 | vector) << 16;
2744	adapter->ivars |= 0x80000000;
2745
2746	return (0);
2747}
2748
2749
2750static void
2751em_free_pci_resources(struct adapter *adapter)
2752{
2753	device_t	dev = adapter->dev;
2754	struct tx_ring	*txr;
2755	struct rx_ring	*rxr;
2756	int		rid;
2757
2758
2759	/*
2760	** Release all the queue interrupt resources:
2761	*/
2762	for (int i = 0; i < adapter->num_queues; i++) {
2763		txr = &adapter->tx_rings[i];
2764		/* an early abort? */
2765		if (txr == NULL)
2766			break;
2767		rid = txr->msix +1;
2768		if (txr->tag != NULL) {
2769			bus_teardown_intr(dev, txr->res, txr->tag);
2770			txr->tag = NULL;
2771		}
2772		if (txr->res != NULL)
2773			bus_release_resource(dev, SYS_RES_IRQ,
2774			    rid, txr->res);
2775
2776		rxr = &adapter->rx_rings[i];
2777		/* an early abort? */
2778		if (rxr == NULL)
2779			break;
2780		rid = rxr->msix +1;
2781		if (rxr->tag != NULL) {
2782			bus_teardown_intr(dev, rxr->res, rxr->tag);
2783			rxr->tag = NULL;
2784		}
2785		if (rxr->res != NULL)
2786			bus_release_resource(dev, SYS_RES_IRQ,
2787			    rid, rxr->res);
2788	}
2789
2790        if (adapter->linkvec) /* we are doing MSIX */
2791                rid = adapter->linkvec + 1;
2792        else
2793                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2794
2795	if (adapter->tag != NULL) {
2796		bus_teardown_intr(dev, adapter->res, adapter->tag);
2797		adapter->tag = NULL;
2798	}
2799
2800	if (adapter->res != NULL)
2801		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2802
2803
2804	if (adapter->msix)
2805		pci_release_msi(dev);
2806
2807	if (adapter->msix_mem != NULL)
2808		bus_release_resource(dev, SYS_RES_MEMORY,
2809		    adapter->memrid, adapter->msix_mem);
2810
2811	if (adapter->memory != NULL)
2812		bus_release_resource(dev, SYS_RES_MEMORY,
2813		    PCIR_BAR(0), adapter->memory);
2814
2815	if (adapter->flash != NULL)
2816		bus_release_resource(dev, SYS_RES_MEMORY,
2817		    EM_FLASH, adapter->flash);
2818}
2819
2820/*
2821 * Setup MSI or MSI/X
2822 */
2823static int
2824em_setup_msix(struct adapter *adapter)
2825{
2826	device_t dev = adapter->dev;
2827	int val;
2828
2829	/* Nearly always going to use one queue */
2830	adapter->num_queues = 1;
2831
2832	/*
2833	** Try using MSI-X for Hartwell adapters
2834	*/
2835	if ((adapter->hw.mac.type == e1000_82574) &&
2836	    (em_enable_msix == TRUE)) {
2837#ifdef EM_MULTIQUEUE
2838		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2839		if (adapter->num_queues > 1)
2840			em_enable_vectors_82574(adapter);
2841#endif
2842		/* Map the MSIX BAR */
2843		adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2844		adapter->msix_mem = bus_alloc_resource_any(dev,
2845		    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2846       		if (adapter->msix_mem == NULL) {
2847			/* May not be enabled */
2848               		device_printf(adapter->dev,
2849			    "Unable to map MSIX table \n");
2850			goto msi;
2851       		}
2852		val = pci_msix_count(dev);
2853
2854#ifdef EM_MULTIQUEUE
2855		/* We need 5 vectors in the multiqueue case */
2856		if (adapter->num_queues > 1 ) {
2857			if (val >= 5)
2858				val = 5;
2859			else {
2860				adapter->num_queues = 1;
2861				device_printf(adapter->dev,
2862				    "Insufficient MSIX vectors for >1 queue, "
2863				    "using single queue...\n");
2864				goto msix_one;
2865			}
2866		} else {
2867msix_one:
2868#endif
2869			if (val >= 3)
2870				val = 3;
2871			else {
2872				device_printf(adapter->dev,
2873			    	"Insufficient MSIX vectors, using MSI\n");
2874				goto msi;
2875			}
2876#ifdef EM_MULTIQUEUE
2877		}
2878#endif
2879
2880		if ((pci_alloc_msix(dev, &val) == 0)) {
2881			device_printf(adapter->dev,
2882			    "Using MSIX interrupts "
2883			    "with %d vectors\n", val);
2884			return (val);
2885		}
2886
2887		/*
2888		** If MSIX alloc failed or provided us with
2889		** less than needed, free and fall through to MSI
2890		*/
2891		pci_release_msi(dev);
2892	}
2893msi:
2894	if (adapter->msix_mem != NULL) {
2895		bus_release_resource(dev, SYS_RES_MEMORY,
2896		    adapter->memrid, adapter->msix_mem);
2897		adapter->msix_mem = NULL;
2898	}
2899       	val = 1;
2900       	if (pci_alloc_msi(dev, &val) == 0) {
2901               	device_printf(adapter->dev, "Using an MSI interrupt\n");
2902		return (val);
2903	}
2904	/* Should only happen due to manual configuration */
2905	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2906	return (0);
2907}
2908
2909
2910/*
2911** The 3 following flush routines are used as a workaround in the
2912** I219 client parts and only for them.
2913**
2914** em_flush_tx_ring - remove all descriptors from the tx_ring
2915**
2916** We want to clear all pending descriptors from the TX ring.
2917** zeroing happens when the HW reads the regs. We  assign the ring itself as
2918** the data of the next descriptor. We don't care about the data we are about
2919** to reset the HW.
2920*/
2921static void
2922em_flush_tx_ring(struct adapter *adapter)
2923{
2924	struct e1000_hw		*hw = &adapter->hw;
2925	struct tx_ring		*txr = adapter->tx_rings;
2926	struct e1000_tx_desc	*txd;
2927	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2928	u16			size = 512;
2929
2930	tctl = E1000_READ_REG(hw, E1000_TCTL);
2931	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2932
2933	txd = &txr->tx_base[txr->next_avail_desc++];
2934	if (txr->next_avail_desc == adapter->num_tx_desc)
2935		txr->next_avail_desc = 0;
2936
2937	/* Just use the ring as a dummy buffer addr */
2938	txd->buffer_addr = txr->txdma.dma_paddr;
2939	txd->lower.data = htole32(txd_lower | size);
2940	txd->upper.data = 0;
2941
2942	/* flush descriptors to memory before notifying the HW */
2943	wmb();
2944
2945	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2946	mb();
2947	usec_delay(250);
2948}
2949
2950/*
2951** em_flush_rx_ring - remove all descriptors from the rx_ring
2952**
2953** Mark all descriptors in the RX ring as consumed and disable the rx ring
2954*/
2955static void
2956em_flush_rx_ring(struct adapter *adapter)
2957{
2958	struct e1000_hw	*hw = &adapter->hw;
2959	u32		rctl, rxdctl;
2960
2961	rctl = E1000_READ_REG(hw, E1000_RCTL);
2962	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2963	E1000_WRITE_FLUSH(hw);
2964	usec_delay(150);
2965
2966	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2967	/* zero the lower 14 bits (prefetch and host thresholds) */
2968	rxdctl &= 0xffffc000;
2969	/*
2970	 * update thresholds: prefetch threshold to 31, host threshold to 1
2971	 * and make sure the granularity is "descriptors" and not "cache lines"
2972	 */
2973	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2974	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2975
2976	/* momentarily enable the RX ring for the changes to take effect */
2977	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2978	E1000_WRITE_FLUSH(hw);
2979	usec_delay(150);
2980	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2981}
2982
2983/*
2984** em_flush_desc_rings - remove all descriptors from the descriptor rings
2985**
2986** In i219, the descriptor rings must be emptied before resetting the HW
2987** or before changing the device state to D3 during runtime (runtime PM).
2988**
2989** Failure to do this will cause the HW to enter a unit hang state which can
2990** only be released by PCI reset on the device
2991**
2992*/
2993static void
2994em_flush_desc_rings(struct adapter *adapter)
2995{
2996	struct e1000_hw	*hw = &adapter->hw;
2997	device_t	dev = adapter->dev;
2998	u16		hang_state;
2999	u32		fext_nvm11, tdlen;
3000
3001	/* First, disable MULR fix in FEXTNVM11 */
3002	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3003	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3004	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3005
3006	/* do nothing if we're not in faulty state, or if the queue is empty */
3007	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3008	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3010		return;
3011	em_flush_tx_ring(adapter);
3012
3013	/* recheck, maybe the fault is caused by the rx ring */
3014	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3015	if (hang_state & FLUSH_DESC_REQUIRED)
3016		em_flush_rx_ring(adapter);
3017}
3018
3019
3020/*********************************************************************
3021 *
3022 *  Initialize the hardware to a configuration
3023 *  as specified by the adapter structure.
3024 *
3025 **********************************************************************/
3026static void
3027em_reset(struct adapter *adapter)
3028{
3029	device_t	dev = adapter->dev;
3030	if_t ifp = adapter->ifp;
3031	struct e1000_hw	*hw = &adapter->hw;
3032	u16		rx_buffer_size;
3033	u32		pba;
3034
3035	INIT_DEBUGOUT("em_reset: begin");
3036
3037	/* Set up smart power down as default off on newer adapters. */
3038	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3039	    hw->mac.type == e1000_82572)) {
3040		u16 phy_tmp = 0;
3041
3042		/* Speed up time to link by disabling smart power down. */
3043		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3044		phy_tmp &= ~IGP02E1000_PM_SPD;
3045		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3046	}
3047
3048	/*
3049	 * Packet Buffer Allocation (PBA)
3050	 * Writing PBA sets the receive portion of the buffer
3051	 * the remainder is used for the transmit buffer.
3052	 */
3053	switch (hw->mac.type) {
3054	/* Total Packet Buffer on these is 48K */
3055	case e1000_82571:
3056	case e1000_82572:
3057	case e1000_80003es2lan:
3058			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3059		break;
3060	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3061			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3062		break;
3063	case e1000_82574:
3064	case e1000_82583:
3065			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3066		break;
3067	case e1000_ich8lan:
3068		pba = E1000_PBA_8K;
3069		break;
3070	case e1000_ich9lan:
3071	case e1000_ich10lan:
3072		/* Boost Receive side for jumbo frames */
3073		if (adapter->hw.mac.max_frame_size > 4096)
3074			pba = E1000_PBA_14K;
3075		else
3076			pba = E1000_PBA_10K;
3077		break;
3078	case e1000_pchlan:
3079	case e1000_pch2lan:
3080	case e1000_pch_lpt:
3081	case e1000_pch_spt:
3082		pba = E1000_PBA_26K;
3083		break;
3084	default:
3085		if (adapter->hw.mac.max_frame_size > 8192)
3086			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3087		else
3088			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3089	}
3090	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3091
3092	/*
3093	 * These parameters control the automatic generation (Tx) and
3094	 * response (Rx) to Ethernet PAUSE frames.
3095	 * - High water mark should allow for at least two frames to be
3096	 *   received after sending an XOFF.
3097	 * - Low water mark works best when it is very near the high water mark.
3098	 *   This allows the receiver to restart by sending XON when it has
3099	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3100	 *   restart after one full frame is pulled from the buffer. There
3101	 *   could be several smaller frames in the buffer and if so they will
3102	 *   not trigger the XON until their total number reduces the buffer
3103	 *   by 1500.
3104	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3105	 */
3106	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3107	hw->fc.high_water = rx_buffer_size -
3108	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3109	hw->fc.low_water = hw->fc.high_water - 1500;
3110
3111	if (adapter->fc) /* locally set flow control value? */
3112		hw->fc.requested_mode = adapter->fc;
3113	else
3114		hw->fc.requested_mode = e1000_fc_full;
3115
3116	if (hw->mac.type == e1000_80003es2lan)
3117		hw->fc.pause_time = 0xFFFF;
3118	else
3119		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3120
3121	hw->fc.send_xon = TRUE;
3122
3123	/* Device specific overrides/settings */
3124	switch (hw->mac.type) {
3125	case e1000_pchlan:
3126		/* Workaround: no TX flow ctrl for PCH */
3127                hw->fc.requested_mode = e1000_fc_rx_pause;
3128		hw->fc.pause_time = 0xFFFF; /* override */
3129		if (if_getmtu(ifp) > ETHERMTU) {
3130			hw->fc.high_water = 0x3500;
3131			hw->fc.low_water = 0x1500;
3132		} else {
3133			hw->fc.high_water = 0x5000;
3134			hw->fc.low_water = 0x3000;
3135		}
3136		hw->fc.refresh_time = 0x1000;
3137		break;
3138	case e1000_pch2lan:
3139	case e1000_pch_lpt:
3140	case e1000_pch_spt:
3141		hw->fc.high_water = 0x5C20;
3142		hw->fc.low_water = 0x5048;
3143		hw->fc.pause_time = 0x0650;
3144		hw->fc.refresh_time = 0x0400;
3145		/* Jumbos need adjusted PBA */
3146		if (if_getmtu(ifp) > ETHERMTU)
3147			E1000_WRITE_REG(hw, E1000_PBA, 12);
3148		else
3149			E1000_WRITE_REG(hw, E1000_PBA, 26);
3150		break;
3151        case e1000_ich9lan:
3152        case e1000_ich10lan:
3153		if (if_getmtu(ifp) > ETHERMTU) {
3154			hw->fc.high_water = 0x2800;
3155			hw->fc.low_water = hw->fc.high_water - 8;
3156			break;
3157		}
3158		/* else fall thru */
3159	default:
3160		if (hw->mac.type == e1000_80003es2lan)
3161			hw->fc.pause_time = 0xFFFF;
3162		break;
3163	}
3164
3165	/* I219 needs some special flushing to avoid hangs */
3166	if (hw->mac.type == e1000_pch_spt)
3167		em_flush_desc_rings(adapter);
3168
3169	/* Issue a global reset */
3170	e1000_reset_hw(hw);
3171	E1000_WRITE_REG(hw, E1000_WUC, 0);
3172	em_disable_aspm(adapter);
3173	/* and a re-init */
3174	if (e1000_init_hw(hw) < 0) {
3175		device_printf(dev, "Hardware Initialization Failed\n");
3176		return;
3177	}
3178
3179	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3180	e1000_get_phy_info(hw);
3181	e1000_check_for_link(hw);
3182	return;
3183}
3184
3185/*********************************************************************
3186 *
3187 *  Setup networking device structure and register an interface.
3188 *
3189 **********************************************************************/
3190static int
3191em_setup_interface(device_t dev, struct adapter *adapter)
3192{
3193	if_t ifp;
3194
3195	INIT_DEBUGOUT("em_setup_interface: begin");
3196
3197	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3198	if (ifp == 0) {
3199		device_printf(dev, "can not allocate ifnet structure\n");
3200		return (-1);
3201	}
3202	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3203	if_setdev(ifp, dev);
3204	if_setinitfn(ifp, em_init);
3205	if_setsoftc(ifp, adapter);
3206	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3207	if_setioctlfn(ifp, em_ioctl);
3208	if_setgetcounterfn(ifp, em_get_counter);
3209
3210	/* TSO parameters */
3211	ifp->if_hw_tsomax = IP_MAXPACKET;
3212	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3213	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3214	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3215
3216#ifdef EM_MULTIQUEUE
3217	/* Multiqueue stack interface */
3218	if_settransmitfn(ifp, em_mq_start);
3219	if_setqflushfn(ifp, em_qflush);
3220#else
3221	if_setstartfn(ifp, em_start);
3222	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3223	if_setsendqready(ifp);
3224#endif
3225
3226	ether_ifattach(ifp, adapter->hw.mac.addr);
3227
3228	if_setcapabilities(ifp, 0);
3229	if_setcapenable(ifp, 0);
3230
3231
3232	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3233	    IFCAP_TSO4, 0);
3234	/*
3235	 * Tell the upper layer(s) we
3236	 * support full VLAN capability
3237	 */
3238	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3239	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3240	    IFCAP_VLAN_MTU, 0);
3241	if_setcapenable(ifp, if_getcapabilities(ifp));
3242
3243	/*
3244	** Don't turn this on by default, if vlans are
3245	** created on another pseudo device (eg. lagg)
3246	** then vlan events are not passed thru, breaking
3247	** operation, but with HW FILTER off it works. If
3248	** using vlans directly on the em driver you can
3249	** enable this and get full hardware tag filtering.
3250	*/
3251	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3252
3253#ifdef DEVICE_POLLING
3254	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3255#endif
3256
3257	/* Enable only WOL MAGIC by default */
3258	if (adapter->wol) {
3259		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3260		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3261	}
3262
3263	/*
3264	 * Specify the media types supported by this adapter and register
3265	 * callbacks to update media and link information
3266	 */
3267	ifmedia_init(&adapter->media, IFM_IMASK,
3268	    em_media_change, em_media_status);
3269	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3270	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3271		u_char fiber_type = IFM_1000_SX;	/* default type */
3272
3273		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3274			    0, NULL);
3275		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3276	} else {
3277		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3278		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3279			    0, NULL);
3280		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3281			    0, NULL);
3282		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3283			    0, NULL);
3284		if (adapter->hw.phy.type != e1000_phy_ife) {
3285			ifmedia_add(&adapter->media,
3286				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3287			ifmedia_add(&adapter->media,
3288				IFM_ETHER | IFM_1000_T, 0, NULL);
3289		}
3290	}
3291	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3292	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3293	return (0);
3294}
3295
3296
3297/*
3298 * Manage DMA'able memory.
3299 */
3300static void
3301em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3302{
3303	if (error)
3304		return;
3305	*(bus_addr_t *) arg = segs[0].ds_addr;
3306}
3307
3308static int
3309em_dma_malloc(struct adapter *adapter, bus_size_t size,
3310        struct em_dma_alloc *dma, int mapflags)
3311{
3312	int error;
3313
3314	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3315				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3316				BUS_SPACE_MAXADDR,	/* lowaddr */
3317				BUS_SPACE_MAXADDR,	/* highaddr */
3318				NULL, NULL,		/* filter, filterarg */
3319				size,			/* maxsize */
3320				1,			/* nsegments */
3321				size,			/* maxsegsize */
3322				0,			/* flags */
3323				NULL,			/* lockfunc */
3324				NULL,			/* lockarg */
3325				&dma->dma_tag);
3326	if (error) {
3327		device_printf(adapter->dev,
3328		    "%s: bus_dma_tag_create failed: %d\n",
3329		    __func__, error);
3330		goto fail_0;
3331	}
3332
3333	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3334	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3335	if (error) {
3336		device_printf(adapter->dev,
3337		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3338		    __func__, (uintmax_t)size, error);
3339		goto fail_2;
3340	}
3341
3342	dma->dma_paddr = 0;
3343	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3344	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3345	if (error || dma->dma_paddr == 0) {
3346		device_printf(adapter->dev,
3347		    "%s: bus_dmamap_load failed: %d\n",
3348		    __func__, error);
3349		goto fail_3;
3350	}
3351
3352	return (0);
3353
3354fail_3:
3355	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3356fail_2:
3357	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3358	bus_dma_tag_destroy(dma->dma_tag);
3359fail_0:
3360	dma->dma_tag = NULL;
3361
3362	return (error);
3363}
3364
3365static void
3366em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3367{
3368	if (dma->dma_tag == NULL)
3369		return;
3370	if (dma->dma_paddr != 0) {
3371		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3372		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3373		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3374		dma->dma_paddr = 0;
3375	}
3376	if (dma->dma_vaddr != NULL) {
3377		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3378		dma->dma_vaddr = NULL;
3379	}
3380	bus_dma_tag_destroy(dma->dma_tag);
3381	dma->dma_tag = NULL;
3382}
3383
3384
3385/*********************************************************************
3386 *
3387 *  Allocate memory for the transmit and receive rings, and then
3388 *  the descriptors associated with each, called only once at attach.
3389 *
3390 **********************************************************************/
3391static int
3392em_allocate_queues(struct adapter *adapter)
3393{
3394	device_t		dev = adapter->dev;
3395	struct tx_ring		*txr = NULL;
3396	struct rx_ring		*rxr = NULL;
3397	int rsize, tsize, error = E1000_SUCCESS;
3398	int txconf = 0, rxconf = 0;
3399
3400
3401	/* Allocate the TX ring struct memory */
3402	if (!(adapter->tx_rings =
3403	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3404	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3405		device_printf(dev, "Unable to allocate TX ring memory\n");
3406		error = ENOMEM;
3407		goto fail;
3408	}
3409
3410	/* Now allocate the RX */
3411	if (!(adapter->rx_rings =
3412	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3413	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3414		device_printf(dev, "Unable to allocate RX ring memory\n");
3415		error = ENOMEM;
3416		goto rx_fail;
3417	}
3418
3419	tsize = roundup2(adapter->num_tx_desc *
3420	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3421	/*
3422	 * Now set up the TX queues, txconf is needed to handle the
3423	 * possibility that things fail midcourse and we need to
3424	 * undo memory gracefully
3425	 */
3426	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3427		/* Set up some basics */
3428		txr = &adapter->tx_rings[i];
3429		txr->adapter = adapter;
3430		txr->me = i;
3431
3432		/* Initialize the TX lock */
3433		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3434		    device_get_nameunit(dev), txr->me);
3435		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3436
3437		if (em_dma_malloc(adapter, tsize,
3438			&txr->txdma, BUS_DMA_NOWAIT)) {
3439			device_printf(dev,
3440			    "Unable to allocate TX Descriptor memory\n");
3441			error = ENOMEM;
3442			goto err_tx_desc;
3443		}
3444		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3445		bzero((void *)txr->tx_base, tsize);
3446
3447        	if (em_allocate_transmit_buffers(txr)) {
3448			device_printf(dev,
3449			    "Critical Failure setting up transmit buffers\n");
3450			error = ENOMEM;
3451			goto err_tx_desc;
3452        	}
3453#if __FreeBSD_version >= 800000
3454		/* Allocate a buf ring */
3455		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3456		    M_WAITOK, &txr->tx_mtx);
3457#endif
3458	}
3459
3460	/*
3461	 * Next the RX queues...
3462	 */
3463	rsize = roundup2(adapter->num_rx_desc *
3464	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3465	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3466		rxr = &adapter->rx_rings[i];
3467		rxr->adapter = adapter;
3468		rxr->me = i;
3469
3470		/* Initialize the RX lock */
3471		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3472		    device_get_nameunit(dev), txr->me);
3473		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3474
3475		if (em_dma_malloc(adapter, rsize,
3476			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3477			device_printf(dev,
3478			    "Unable to allocate RxDescriptor memory\n");
3479			error = ENOMEM;
3480			goto err_rx_desc;
3481		}
3482		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3483		bzero((void *)rxr->rx_base, rsize);
3484
3485        	/* Allocate receive buffers for the ring*/
3486		if (em_allocate_receive_buffers(rxr)) {
3487			device_printf(dev,
3488			    "Critical Failure setting up receive buffers\n");
3489			error = ENOMEM;
3490			goto err_rx_desc;
3491		}
3492	}
3493
3494	return (0);
3495
3496err_rx_desc:
3497	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3498		em_dma_free(adapter, &rxr->rxdma);
3499err_tx_desc:
3500	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3501		em_dma_free(adapter, &txr->txdma);
3502	free(adapter->rx_rings, M_DEVBUF);
3503rx_fail:
3504#if __FreeBSD_version >= 800000
3505	buf_ring_free(txr->br, M_DEVBUF);
3506#endif
3507	free(adapter->tx_rings, M_DEVBUF);
3508fail:
3509	return (error);
3510}
3511
3512
3513/*********************************************************************
3514 *
3515 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3516 *  the information needed to transmit a packet on the wire. This is
3517 *  called only once at attach, setup is done every reset.
3518 *
3519 **********************************************************************/
3520static int
3521em_allocate_transmit_buffers(struct tx_ring *txr)
3522{
3523	struct adapter *adapter = txr->adapter;
3524	device_t dev = adapter->dev;
3525	struct em_txbuffer *txbuf;
3526	int error, i;
3527
3528	/*
3529	 * Setup DMA descriptor areas.
3530	 */
3531	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3532			       1, 0,			/* alignment, bounds */
3533			       BUS_SPACE_MAXADDR,	/* lowaddr */
3534			       BUS_SPACE_MAXADDR,	/* highaddr */
3535			       NULL, NULL,		/* filter, filterarg */
3536			       EM_TSO_SIZE,		/* maxsize */
3537			       EM_MAX_SCATTER,		/* nsegments */
3538			       PAGE_SIZE,		/* maxsegsize */
3539			       0,			/* flags */
3540			       NULL,			/* lockfunc */
3541			       NULL,			/* lockfuncarg */
3542			       &txr->txtag))) {
3543		device_printf(dev,"Unable to allocate TX DMA tag\n");
3544		goto fail;
3545	}
3546
3547	if (!(txr->tx_buffers =
3548	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3549	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3550		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3551		error = ENOMEM;
3552		goto fail;
3553	}
3554
3555        /* Create the descriptor buffer dma maps */
3556	txbuf = txr->tx_buffers;
3557	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3558		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3559		if (error != 0) {
3560			device_printf(dev, "Unable to create TX DMA map\n");
3561			goto fail;
3562		}
3563	}
3564
3565	return 0;
3566fail:
3567	/* We free all, it handles case where we are in the middle */
3568	em_free_transmit_structures(adapter);
3569	return (error);
3570}
3571
3572/*********************************************************************
3573 *
3574 *  Initialize a transmit ring.
3575 *
3576 **********************************************************************/
3577static void
3578em_setup_transmit_ring(struct tx_ring *txr)
3579{
3580	struct adapter *adapter = txr->adapter;
3581	struct em_txbuffer *txbuf;
3582	int i;
3583#ifdef DEV_NETMAP
3584	struct netmap_slot *slot;
3585	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3586#endif /* DEV_NETMAP */
3587
3588	/* Clear the old descriptor contents */
3589	EM_TX_LOCK(txr);
3590#ifdef DEV_NETMAP
3591	slot = netmap_reset(na, NR_TX, txr->me, 0);
3592#endif /* DEV_NETMAP */
3593
3594	bzero((void *)txr->tx_base,
3595	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3596	/* Reset indices */
3597	txr->next_avail_desc = 0;
3598	txr->next_to_clean = 0;
3599
3600	/* Free any existing tx buffers. */
3601        txbuf = txr->tx_buffers;
3602	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3603		if (txbuf->m_head != NULL) {
3604			bus_dmamap_sync(txr->txtag, txbuf->map,
3605			    BUS_DMASYNC_POSTWRITE);
3606			bus_dmamap_unload(txr->txtag, txbuf->map);
3607			m_freem(txbuf->m_head);
3608			txbuf->m_head = NULL;
3609		}
3610#ifdef DEV_NETMAP
3611		if (slot) {
3612			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3613			uint64_t paddr;
3614			void *addr;
3615
3616			addr = PNMB(na, slot + si, &paddr);
3617			txr->tx_base[i].buffer_addr = htole64(paddr);
3618			/* reload the map for netmap mode */
3619			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3620		}
3621#endif /* DEV_NETMAP */
3622
3623		/* clear the watch index */
3624		txbuf->next_eop = -1;
3625        }
3626
3627	/* Set number of descriptors available */
3628	txr->tx_avail = adapter->num_tx_desc;
3629	txr->busy = EM_TX_IDLE;
3630
3631	/* Clear checksum offload context. */
3632	txr->last_hw_offload = 0;
3633	txr->last_hw_ipcss = 0;
3634	txr->last_hw_ipcso = 0;
3635	txr->last_hw_tucss = 0;
3636	txr->last_hw_tucso = 0;
3637
3638	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3639	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3640	EM_TX_UNLOCK(txr);
3641}
3642
3643/*********************************************************************
3644 *
3645 *  Initialize all transmit rings.
3646 *
3647 **********************************************************************/
3648static void
3649em_setup_transmit_structures(struct adapter *adapter)
3650{
3651	struct tx_ring *txr = adapter->tx_rings;
3652
3653	for (int i = 0; i < adapter->num_queues; i++, txr++)
3654		em_setup_transmit_ring(txr);
3655
3656	return;
3657}
3658
3659/*********************************************************************
3660 *
3661 *  Enable transmit unit.
3662 *
3663 **********************************************************************/
3664static void
3665em_initialize_transmit_unit(struct adapter *adapter)
3666{
3667	struct tx_ring	*txr = adapter->tx_rings;
3668	struct e1000_hw	*hw = &adapter->hw;
3669	u32	tctl, txdctl = 0, tarc, tipg = 0;
3670
3671	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3672
3673	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3674		u64 bus_addr = txr->txdma.dma_paddr;
3675		/* Base and Len of TX Ring */
3676		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3677	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3678		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3679	    	    (u32)(bus_addr >> 32));
3680		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3681	    	    (u32)bus_addr);
3682		/* Init the HEAD/TAIL indices */
3683		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3684		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3685
3686		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3687		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3688		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3689
3690		txr->busy = EM_TX_IDLE;
3691		txdctl = 0; /* clear txdctl */
3692                txdctl |= 0x1f; /* PTHRESH */
3693                txdctl |= 1 << 8; /* HTHRESH */
3694                txdctl |= 1 << 16;/* WTHRESH */
3695		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3696		txdctl |= E1000_TXDCTL_GRAN;
3697                txdctl |= 1 << 25; /* LWTHRESH */
3698
3699                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3700	}
3701
3702	/* Set the default values for the Tx Inter Packet Gap timer */
3703	switch (adapter->hw.mac.type) {
3704	case e1000_80003es2lan:
3705		tipg = DEFAULT_82543_TIPG_IPGR1;
3706		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3707		    E1000_TIPG_IPGR2_SHIFT;
3708		break;
3709	default:
3710		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3711		    (adapter->hw.phy.media_type ==
3712		    e1000_media_type_internal_serdes))
3713			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3714		else
3715			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3716		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3717		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3718	}
3719
3720	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3721	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3722
3723	if(adapter->hw.mac.type >= e1000_82540)
3724		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3725		    adapter->tx_abs_int_delay.value);
3726
3727	if ((adapter->hw.mac.type == e1000_82571) ||
3728	    (adapter->hw.mac.type == e1000_82572)) {
3729		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3730		tarc |= TARC_SPEED_MODE_BIT;
3731		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3732	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3733		/* errata: program both queues to unweighted RR */
3734		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3735		tarc |= 1;
3736		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3737		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3738		tarc |= 1;
3739		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3740	} else if (adapter->hw.mac.type == e1000_82574) {
3741		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3742		tarc |= TARC_ERRATA_BIT;
3743		if ( adapter->num_queues > 1) {
3744			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3745			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3746			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3747		} else
3748			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3749	}
3750
3751	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3752	if (adapter->tx_int_delay.value > 0)
3753		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3754
3755	/* Program the Transmit Control Register */
3756	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3757	tctl &= ~E1000_TCTL_CT;
3758	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3759		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3760
3761	if (adapter->hw.mac.type >= e1000_82571)
3762		tctl |= E1000_TCTL_MULR;
3763
3764	/* This write will effectively turn on the transmit unit. */
3765	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3766
3767	if (hw->mac.type == e1000_pch_spt) {
3768		u32 reg;
3769		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3770		reg |= E1000_RCTL_RDMTS_HEX;
3771		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3772		reg = E1000_READ_REG(hw, E1000_TARC(0));
3773		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3774		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3775	}
3776}
3777
3778
3779/*********************************************************************
3780 *
3781 *  Free all transmit rings.
3782 *
3783 **********************************************************************/
3784static void
3785em_free_transmit_structures(struct adapter *adapter)
3786{
3787	struct tx_ring *txr = adapter->tx_rings;
3788
3789	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3790		EM_TX_LOCK(txr);
3791		em_free_transmit_buffers(txr);
3792		em_dma_free(adapter, &txr->txdma);
3793		EM_TX_UNLOCK(txr);
3794		EM_TX_LOCK_DESTROY(txr);
3795	}
3796
3797	free(adapter->tx_rings, M_DEVBUF);
3798}
3799
3800/*********************************************************************
3801 *
3802 *  Free transmit ring related data structures.
3803 *
3804 **********************************************************************/
3805static void
3806em_free_transmit_buffers(struct tx_ring *txr)
3807{
3808	struct adapter		*adapter = txr->adapter;
3809	struct em_txbuffer	*txbuf;
3810
3811	INIT_DEBUGOUT("free_transmit_ring: begin");
3812
3813	if (txr->tx_buffers == NULL)
3814		return;
3815
3816	for (int i = 0; i < adapter->num_tx_desc; i++) {
3817		txbuf = &txr->tx_buffers[i];
3818		if (txbuf->m_head != NULL) {
3819			bus_dmamap_sync(txr->txtag, txbuf->map,
3820			    BUS_DMASYNC_POSTWRITE);
3821			bus_dmamap_unload(txr->txtag,
3822			    txbuf->map);
3823			m_freem(txbuf->m_head);
3824			txbuf->m_head = NULL;
3825			if (txbuf->map != NULL) {
3826				bus_dmamap_destroy(txr->txtag,
3827				    txbuf->map);
3828				txbuf->map = NULL;
3829			}
3830		} else if (txbuf->map != NULL) {
3831			bus_dmamap_unload(txr->txtag,
3832			    txbuf->map);
3833			bus_dmamap_destroy(txr->txtag,
3834			    txbuf->map);
3835			txbuf->map = NULL;
3836		}
3837	}
3838#if __FreeBSD_version >= 800000
3839	if (txr->br != NULL)
3840		buf_ring_free(txr->br, M_DEVBUF);
3841#endif
3842	if (txr->tx_buffers != NULL) {
3843		free(txr->tx_buffers, M_DEVBUF);
3844		txr->tx_buffers = NULL;
3845	}
3846	if (txr->txtag != NULL) {
3847		bus_dma_tag_destroy(txr->txtag);
3848		txr->txtag = NULL;
3849	}
3850	return;
3851}
3852
3853
3854/*********************************************************************
3855 *  The offload context is protocol specific (TCP/UDP) and thus
3856 *  only needs to be set when the protocol changes. The occasion
3857 *  of a context change can be a performance detriment, and
3858 *  might be better just disabled. The reason arises in the way
3859 *  in which the controller supports pipelined requests from the
3860 *  Tx data DMA. Up to four requests can be pipelined, and they may
3861 *  belong to the same packet or to multiple packets. However all
3862 *  requests for one packet are issued before a request is issued
3863 *  for a subsequent packet and if a request for the next packet
3864 *  requires a context change, that request will be stalled
3865 *  until the previous request completes. This means setting up
3866 *  a new context effectively disables pipelined Tx data DMA which
3867 *  in turn greatly slow down performance to send small sized
3868 *  frames.
3869 **********************************************************************/
3870static void
3871em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3872    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3873{
3874	struct adapter			*adapter = txr->adapter;
3875	struct e1000_context_desc	*TXD = NULL;
3876	struct em_txbuffer		*tx_buffer;
3877	int				cur, hdr_len;
3878	u32				cmd = 0;
3879	u16				offload = 0;
3880	u8				ipcso, ipcss, tucso, tucss;
3881
3882	ipcss = ipcso = tucss = tucso = 0;
3883	hdr_len = ip_off + (ip->ip_hl << 2);
3884	cur = txr->next_avail_desc;
3885
3886	/* Setup of IP header checksum. */
3887	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3888		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3889		offload |= CSUM_IP;
3890		ipcss = ip_off;
3891		ipcso = ip_off + offsetof(struct ip, ip_sum);
3892		/*
3893		 * Start offset for header checksum calculation.
3894		 * End offset for header checksum calculation.
3895		 * Offset of place to put the checksum.
3896		 */
3897		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3898		TXD->lower_setup.ip_fields.ipcss = ipcss;
3899		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3900		TXD->lower_setup.ip_fields.ipcso = ipcso;
3901		cmd |= E1000_TXD_CMD_IP;
3902	}
3903
3904	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3905 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3906 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3907 		offload |= CSUM_TCP;
3908 		tucss = hdr_len;
3909 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3910		/*
3911		 * The 82574L can only remember the *last* context used
3912		 * regardless of queue that it was use for.  We cannot reuse
3913		 * contexts on this hardware platform and must generate a new
3914		 * context every time.  82574L hardware spec, section 7.2.6,
3915		 * second note.
3916		 */
3917		if (adapter->num_queues < 2) {
3918 			/*
3919 		 	* Setting up new checksum offload context for every
3920			* frames takes a lot of processing time for hardware.
3921			* This also reduces performance a lot for small sized
3922			* frames so avoid it if driver can use previously
3923			* configured checksum offload context.
3924 		 	*/
3925 			if (txr->last_hw_offload == offload) {
3926 				if (offload & CSUM_IP) {
3927 					if (txr->last_hw_ipcss == ipcss &&
3928 				    	txr->last_hw_ipcso == ipcso &&
3929 				    	txr->last_hw_tucss == tucss &&
3930 				    	txr->last_hw_tucso == tucso)
3931 						return;
3932 				} else {
3933 					if (txr->last_hw_tucss == tucss &&
3934 				    	txr->last_hw_tucso == tucso)
3935 						return;
3936 				}
3937  			}
3938 			txr->last_hw_offload = offload;
3939 			txr->last_hw_tucss = tucss;
3940 			txr->last_hw_tucso = tucso;
3941		}
3942 		/*
3943 		 * Start offset for payload checksum calculation.
3944 		 * End offset for payload checksum calculation.
3945 		 * Offset of place to put the checksum.
3946 		 */
3947		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3948 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3949 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3950 		TXD->upper_setup.tcp_fields.tucso = tucso;
3951 		cmd |= E1000_TXD_CMD_TCP;
3952 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3953 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3954 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3955 		tucss = hdr_len;
3956 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3957		/*
3958		 * The 82574L can only remember the *last* context used
3959		 * regardless of queue that it was use for.  We cannot reuse
3960		 * contexts on this hardware platform and must generate a new
3961		 * context every time.  82574L hardware spec, section 7.2.6,
3962		 * second note.
3963		 */
3964		if (adapter->num_queues < 2) {
3965 			/*
3966 		 	* Setting up new checksum offload context for every
3967			* frames takes a lot of processing time for hardware.
3968			* This also reduces performance a lot for small sized
3969			* frames so avoid it if driver can use previously
3970			* configured checksum offload context.
3971 		 	*/
3972 			if (txr->last_hw_offload == offload) {
3973 				if (offload & CSUM_IP) {
3974 					if (txr->last_hw_ipcss == ipcss &&
3975 				    	txr->last_hw_ipcso == ipcso &&
3976 				    	txr->last_hw_tucss == tucss &&
3977 				    	txr->last_hw_tucso == tucso)
3978 						return;
3979 				} else {
3980 					if (txr->last_hw_tucss == tucss &&
3981 				    	txr->last_hw_tucso == tucso)
3982 						return;
3983 				}
3984 			}
3985 			txr->last_hw_offload = offload;
3986 			txr->last_hw_tucss = tucss;
3987 			txr->last_hw_tucso = tucso;
3988		}
3989 		/*
3990 		 * Start offset for header checksum calculation.
3991 		 * End offset for header checksum calculation.
3992 		 * Offset of place to put the checksum.
3993 		 */
3994		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3995 		TXD->upper_setup.tcp_fields.tucss = tucss;
3996 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3997 		TXD->upper_setup.tcp_fields.tucso = tucso;
3998  	}
3999
4000 	if (offload & CSUM_IP) {
4001 		txr->last_hw_ipcss = ipcss;
4002 		txr->last_hw_ipcso = ipcso;
4003  	}
4004
4005	TXD->tcp_seg_setup.data = htole32(0);
4006	TXD->cmd_and_length =
4007	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4008	tx_buffer = &txr->tx_buffers[cur];
4009	tx_buffer->m_head = NULL;
4010	tx_buffer->next_eop = -1;
4011
4012	if (++cur == adapter->num_tx_desc)
4013		cur = 0;
4014
4015	txr->tx_avail--;
4016	txr->next_avail_desc = cur;
4017}
4018
4019
4020/**********************************************************************
4021 *
4022 *  Setup work for hardware segmentation offload (TSO)
4023 *
4024 **********************************************************************/
4025static void
4026em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4027    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4028{
4029	struct adapter			*adapter = txr->adapter;
4030	struct e1000_context_desc	*TXD;
4031	struct em_txbuffer		*tx_buffer;
4032	int cur, hdr_len;
4033
4034	/*
4035	 * In theory we can use the same TSO context if and only if
4036	 * frame is the same type(IP/TCP) and the same MSS. However
4037	 * checking whether a frame has the same IP/TCP structure is
4038	 * hard thing so just ignore that and always restablish a
4039	 * new TSO context.
4040	 */
4041	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4042	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4043		      E1000_TXD_DTYP_D |	/* Data descr type */
4044		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4045
4046	/* IP and/or TCP header checksum calculation and insertion. */
4047	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4048
4049	cur = txr->next_avail_desc;
4050	tx_buffer = &txr->tx_buffers[cur];
4051	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4052
4053	/*
4054	 * Start offset for header checksum calculation.
4055	 * End offset for header checksum calculation.
4056	 * Offset of place put the checksum.
4057	 */
4058	TXD->lower_setup.ip_fields.ipcss = ip_off;
4059	TXD->lower_setup.ip_fields.ipcse =
4060	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4061	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4062	/*
4063	 * Start offset for payload checksum calculation.
4064	 * End offset for payload checksum calculation.
4065	 * Offset of place to put the checksum.
4066	 */
4067	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4068	TXD->upper_setup.tcp_fields.tucse = 0;
4069	TXD->upper_setup.tcp_fields.tucso =
4070	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4071	/*
4072	 * Payload size per packet w/o any headers.
4073	 * Length of all headers up to payload.
4074	 */
4075	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4076	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4077
4078	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4079				E1000_TXD_CMD_DEXT |	/* Extended descr */
4080				E1000_TXD_CMD_TSE |	/* TSE context */
4081				E1000_TXD_CMD_IP |	/* Do IP csum */
4082				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4083				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4084
4085	tx_buffer->m_head = NULL;
4086	tx_buffer->next_eop = -1;
4087
4088	if (++cur == adapter->num_tx_desc)
4089		cur = 0;
4090
4091	txr->tx_avail--;
4092	txr->next_avail_desc = cur;
4093	txr->tx_tso = TRUE;
4094}
4095
4096
4097/**********************************************************************
4098 *
4099 *  Examine each tx_buffer in the used queue. If the hardware is done
4100 *  processing the packet then free associated resources. The
4101 *  tx_buffer is put back on the free queue.
4102 *
4103 **********************************************************************/
4104static void
4105em_txeof(struct tx_ring *txr)
4106{
4107	struct adapter	*adapter = txr->adapter;
4108        int first, last, done, processed;
4109        struct em_txbuffer *tx_buffer;
4110        struct e1000_tx_desc   *tx_desc, *eop_desc;
4111	if_t ifp = adapter->ifp;
4112
4113	EM_TX_LOCK_ASSERT(txr);
4114#ifdef DEV_NETMAP
4115	if (netmap_tx_irq(ifp, txr->me))
4116		return;
4117#endif /* DEV_NETMAP */
4118
4119	/* No work, make sure hang detection is disabled */
4120        if (txr->tx_avail == adapter->num_tx_desc) {
4121		txr->busy = EM_TX_IDLE;
4122                return;
4123	}
4124
4125	processed = 0;
4126        first = txr->next_to_clean;
4127        tx_desc = &txr->tx_base[first];
4128        tx_buffer = &txr->tx_buffers[first];
4129	last = tx_buffer->next_eop;
4130        eop_desc = &txr->tx_base[last];
4131
4132	/*
4133	 * What this does is get the index of the
4134	 * first descriptor AFTER the EOP of the
4135	 * first packet, that way we can do the
4136	 * simple comparison on the inner while loop.
4137	 */
4138	if (++last == adapter->num_tx_desc)
4139 		last = 0;
4140	done = last;
4141
4142        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4143            BUS_DMASYNC_POSTREAD);
4144
4145        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4146		/* We clean the range of the packet */
4147		while (first != done) {
4148                	tx_desc->upper.data = 0;
4149                	tx_desc->lower.data = 0;
4150                	tx_desc->buffer_addr = 0;
4151                	++txr->tx_avail;
4152			++processed;
4153
4154			if (tx_buffer->m_head) {
4155				bus_dmamap_sync(txr->txtag,
4156				    tx_buffer->map,
4157				    BUS_DMASYNC_POSTWRITE);
4158				bus_dmamap_unload(txr->txtag,
4159				    tx_buffer->map);
4160                        	m_freem(tx_buffer->m_head);
4161                        	tx_buffer->m_head = NULL;
4162                	}
4163			tx_buffer->next_eop = -1;
4164
4165	                if (++first == adapter->num_tx_desc)
4166				first = 0;
4167
4168	                tx_buffer = &txr->tx_buffers[first];
4169			tx_desc = &txr->tx_base[first];
4170		}
4171		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4172		/* See if we can continue to the next packet */
4173		last = tx_buffer->next_eop;
4174		if (last != -1) {
4175        		eop_desc = &txr->tx_base[last];
4176			/* Get new done point */
4177			if (++last == adapter->num_tx_desc) last = 0;
4178			done = last;
4179		} else
4180			break;
4181        }
4182        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4183            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4184
4185        txr->next_to_clean = first;
4186
4187	/*
4188	** Hang detection: we know there's work outstanding
4189	** or the entry return would have been taken, so no
4190	** descriptor processed here indicates a potential hang.
4191	** The local timer will examine this and do a reset if needed.
4192	*/
4193	if (processed == 0) {
4194		if (txr->busy != EM_TX_HUNG)
4195			++txr->busy;
4196	} else /* At least one descriptor was cleaned */
4197		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4198
4199        /*
4200         * If we have a minimum free, clear IFF_DRV_OACTIVE
4201         * to tell the stack that it is OK to send packets.
4202	 * Notice that all writes of OACTIVE happen under the
4203	 * TX lock which, with a single queue, guarantees
4204	 * sanity.
4205         */
4206        if (txr->tx_avail >= EM_MAX_SCATTER) {
4207		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4208	}
4209
4210	/* Disable hang detection if all clean */
4211	if (txr->tx_avail == adapter->num_tx_desc)
4212		txr->busy = EM_TX_IDLE;
4213}
4214
4215/*********************************************************************
4216 *
4217 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4218 *
4219 **********************************************************************/
4220static void
4221em_refresh_mbufs(struct rx_ring *rxr, int limit)
4222{
4223	struct adapter		*adapter = rxr->adapter;
4224	struct mbuf		*m;
4225	bus_dma_segment_t	segs;
4226	struct em_rxbuffer	*rxbuf;
4227	int			i, j, error, nsegs;
4228	bool			cleaned = FALSE;
4229
4230	i = j = rxr->next_to_refresh;
4231	/*
4232	** Get one descriptor beyond
4233	** our work mark to control
4234	** the loop.
4235	*/
4236	if (++j == adapter->num_rx_desc)
4237		j = 0;
4238
4239	while (j != limit) {
4240		rxbuf = &rxr->rx_buffers[i];
4241		if (rxbuf->m_head == NULL) {
4242			m = m_getjcl(M_NOWAIT, MT_DATA,
4243			    M_PKTHDR, adapter->rx_mbuf_sz);
4244			/*
4245			** If we have a temporary resource shortage
4246			** that causes a failure, just abort refresh
4247			** for now, we will return to this point when
4248			** reinvoked from em_rxeof.
4249			*/
4250			if (m == NULL)
4251				goto update;
4252		} else
4253			m = rxbuf->m_head;
4254
4255		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4256		m->m_flags |= M_PKTHDR;
4257		m->m_data = m->m_ext.ext_buf;
4258
4259		/* Use bus_dma machinery to setup the memory mapping  */
4260		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4261		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4262		if (error != 0) {
4263			printf("Refresh mbufs: hdr dmamap load"
4264			    " failure - %d\n", error);
4265			m_free(m);
4266			rxbuf->m_head = NULL;
4267			goto update;
4268		}
4269		rxbuf->m_head = m;
4270		rxbuf->paddr = segs.ds_addr;
4271		bus_dmamap_sync(rxr->rxtag,
4272		    rxbuf->map, BUS_DMASYNC_PREREAD);
4273		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4274		cleaned = TRUE;
4275
4276		i = j; /* Next is precalulated for us */
4277		rxr->next_to_refresh = i;
4278		/* Calculate next controlling index */
4279		if (++j == adapter->num_rx_desc)
4280			j = 0;
4281	}
4282update:
4283	/*
4284	** Update the tail pointer only if,
4285	** and as far as we have refreshed.
4286	*/
4287	if (cleaned)
4288		E1000_WRITE_REG(&adapter->hw,
4289		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4290
4291	return;
4292}
4293
4294
4295/*********************************************************************
4296 *
4297 *  Allocate memory for rx_buffer structures. Since we use one
4298 *  rx_buffer per received packet, the maximum number of rx_buffer's
4299 *  that we'll need is equal to the number of receive descriptors
4300 *  that we've allocated.
4301 *
4302 **********************************************************************/
4303static int
4304em_allocate_receive_buffers(struct rx_ring *rxr)
4305{
4306	struct adapter		*adapter = rxr->adapter;
4307	device_t		dev = adapter->dev;
4308	struct em_rxbuffer	*rxbuf;
4309	int			error;
4310
4311	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4312	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4313	if (rxr->rx_buffers == NULL) {
4314		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4315		return (ENOMEM);
4316	}
4317
4318	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4319				1, 0,			/* alignment, bounds */
4320				BUS_SPACE_MAXADDR,	/* lowaddr */
4321				BUS_SPACE_MAXADDR,	/* highaddr */
4322				NULL, NULL,		/* filter, filterarg */
4323				MJUM9BYTES,		/* maxsize */
4324				1,			/* nsegments */
4325				MJUM9BYTES,		/* maxsegsize */
4326				0,			/* flags */
4327				NULL,			/* lockfunc */
4328				NULL,			/* lockarg */
4329				&rxr->rxtag);
4330	if (error) {
4331		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4332		    __func__, error);
4333		goto fail;
4334	}
4335
4336	rxbuf = rxr->rx_buffers;
4337	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4338		rxbuf = &rxr->rx_buffers[i];
4339		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4340		if (error) {
4341			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4342			    __func__, error);
4343			goto fail;
4344		}
4345	}
4346
4347	return (0);
4348
4349fail:
4350	em_free_receive_structures(adapter);
4351	return (error);
4352}
4353
4354
4355/*********************************************************************
4356 *
4357 *  Initialize a receive ring and its buffers.
4358 *
4359 **********************************************************************/
4360static int
4361em_setup_receive_ring(struct rx_ring *rxr)
4362{
4363	struct	adapter 	*adapter = rxr->adapter;
4364	struct em_rxbuffer	*rxbuf;
4365	bus_dma_segment_t	seg[1];
4366	int			rsize, nsegs, error = 0;
4367#ifdef DEV_NETMAP
4368	struct netmap_slot *slot;
4369	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4370#endif
4371
4372
4373	/* Clear the ring contents */
4374	EM_RX_LOCK(rxr);
4375	rsize = roundup2(adapter->num_rx_desc *
4376	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4377	bzero((void *)rxr->rx_base, rsize);
4378#ifdef DEV_NETMAP
4379	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4380#endif
4381
4382	/*
4383	** Free current RX buffer structs and their mbufs
4384	*/
4385	for (int i = 0; i < adapter->num_rx_desc; i++) {
4386		rxbuf = &rxr->rx_buffers[i];
4387		if (rxbuf->m_head != NULL) {
4388			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4389			    BUS_DMASYNC_POSTREAD);
4390			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4391			m_freem(rxbuf->m_head);
4392			rxbuf->m_head = NULL; /* mark as freed */
4393		}
4394	}
4395
4396	/* Now replenish the mbufs */
4397        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4398		rxbuf = &rxr->rx_buffers[j];
4399#ifdef DEV_NETMAP
4400		if (slot) {
4401			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4402			uint64_t paddr;
4403			void *addr;
4404
4405			addr = PNMB(na, slot + si, &paddr);
4406			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4407			rxbuf->paddr = paddr;
4408			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4409			continue;
4410		}
4411#endif /* DEV_NETMAP */
4412		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4413		    M_PKTHDR, adapter->rx_mbuf_sz);
4414		if (rxbuf->m_head == NULL) {
4415			error = ENOBUFS;
4416			goto fail;
4417		}
4418		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4419		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4420		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4421
4422		/* Get the memory mapping */
4423		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4424		    rxbuf->map, rxbuf->m_head, seg,
4425		    &nsegs, BUS_DMA_NOWAIT);
4426		if (error != 0) {
4427			m_freem(rxbuf->m_head);
4428			rxbuf->m_head = NULL;
4429			goto fail;
4430		}
4431		bus_dmamap_sync(rxr->rxtag,
4432		    rxbuf->map, BUS_DMASYNC_PREREAD);
4433
4434		rxbuf->paddr = seg[0].ds_addr;
4435		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4436	}
4437	rxr->next_to_check = 0;
4438	rxr->next_to_refresh = 0;
4439	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4440	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4441
4442fail:
4443	EM_RX_UNLOCK(rxr);
4444	return (error);
4445}
4446
4447/*********************************************************************
4448 *
4449 *  Initialize all receive rings.
4450 *
4451 **********************************************************************/
4452static int
4453em_setup_receive_structures(struct adapter *adapter)
4454{
4455	struct rx_ring *rxr = adapter->rx_rings;
4456	int q;
4457
4458	for (q = 0; q < adapter->num_queues; q++, rxr++)
4459		if (em_setup_receive_ring(rxr))
4460			goto fail;
4461
4462	return (0);
4463fail:
4464	/*
4465	 * Free RX buffers allocated so far, we will only handle
4466	 * the rings that completed, the failing case will have
4467	 * cleaned up for itself. 'q' failed, so its the terminus.
4468	 */
4469	for (int i = 0; i < q; ++i) {
4470		rxr = &adapter->rx_rings[i];
4471		for (int n = 0; n < adapter->num_rx_desc; n++) {
4472			struct em_rxbuffer *rxbuf;
4473			rxbuf = &rxr->rx_buffers[n];
4474			if (rxbuf->m_head != NULL) {
4475				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4476			  	  BUS_DMASYNC_POSTREAD);
4477				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4478				m_freem(rxbuf->m_head);
4479				rxbuf->m_head = NULL;
4480			}
4481		}
4482		rxr->next_to_check = 0;
4483		rxr->next_to_refresh = 0;
4484	}
4485
4486	return (ENOBUFS);
4487}
4488
4489/*********************************************************************
4490 *
4491 *  Free all receive rings.
4492 *
4493 **********************************************************************/
4494static void
4495em_free_receive_structures(struct adapter *adapter)
4496{
4497	struct rx_ring *rxr = adapter->rx_rings;
4498
4499	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4500		em_free_receive_buffers(rxr);
4501		/* Free the ring memory as well */
4502		em_dma_free(adapter, &rxr->rxdma);
4503		EM_RX_LOCK_DESTROY(rxr);
4504	}
4505
4506	free(adapter->rx_rings, M_DEVBUF);
4507}
4508
4509
4510/*********************************************************************
4511 *
4512 *  Free receive ring data structures
4513 *
4514 **********************************************************************/
4515static void
4516em_free_receive_buffers(struct rx_ring *rxr)
4517{
4518	struct adapter		*adapter = rxr->adapter;
4519	struct em_rxbuffer	*rxbuf = NULL;
4520
4521	INIT_DEBUGOUT("free_receive_buffers: begin");
4522
4523	if (rxr->rx_buffers != NULL) {
4524		for (int i = 0; i < adapter->num_rx_desc; i++) {
4525			rxbuf = &rxr->rx_buffers[i];
4526			if (rxbuf->map != NULL) {
4527				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4528				    BUS_DMASYNC_POSTREAD);
4529				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4530				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4531			}
4532			if (rxbuf->m_head != NULL) {
4533				m_freem(rxbuf->m_head);
4534				rxbuf->m_head = NULL;
4535			}
4536		}
4537		free(rxr->rx_buffers, M_DEVBUF);
4538		rxr->rx_buffers = NULL;
4539		rxr->next_to_check = 0;
4540		rxr->next_to_refresh = 0;
4541	}
4542
4543	if (rxr->rxtag != NULL) {
4544		bus_dma_tag_destroy(rxr->rxtag);
4545		rxr->rxtag = NULL;
4546	}
4547
4548	return;
4549}
4550
4551
4552/*********************************************************************
4553 *
4554 *  Enable receive unit.
4555 *
4556 **********************************************************************/
4557
4558static void
4559em_initialize_receive_unit(struct adapter *adapter)
4560{
4561	struct rx_ring *rxr = adapter->rx_rings;
4562	if_t ifp = adapter->ifp;
4563	struct e1000_hw	*hw = &adapter->hw;
4564	u32	rctl, rxcsum, rfctl;
4565
4566	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4567
4568	/*
4569	 * Make sure receives are disabled while setting
4570	 * up the descriptor ring
4571	 */
4572	rctl = E1000_READ_REG(hw, E1000_RCTL);
4573	/* Do not disable if ever enabled on this hardware */
4574	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4575		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4576
4577	/* Setup the Receive Control Register */
4578	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4579	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4580	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4581	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4582
4583	/* Do not store bad packets */
4584	rctl &= ~E1000_RCTL_SBP;
4585
4586	/* Enable Long Packet receive */
4587	if (if_getmtu(ifp) > ETHERMTU)
4588		rctl |= E1000_RCTL_LPE;
4589	else
4590		rctl &= ~E1000_RCTL_LPE;
4591
4592        /* Strip the CRC */
4593        if (!em_disable_crc_stripping)
4594		rctl |= E1000_RCTL_SECRC;
4595
4596	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4597	    adapter->rx_abs_int_delay.value);
4598
4599	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4600	    adapter->rx_int_delay.value);
4601	/*
4602	 * Set the interrupt throttling rate. Value is calculated
4603	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4604	 */
4605	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4606
4607	/* Use extended rx descriptor formats */
4608	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4609	rfctl |= E1000_RFCTL_EXTEN;
4610	/*
4611	** When using MSIX interrupts we need to throttle
4612	** using the EITR register (82574 only)
4613	*/
4614	if (hw->mac.type == e1000_82574) {
4615		for (int i = 0; i < 4; i++)
4616			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4617			    DEFAULT_ITR);
4618		/* Disable accelerated acknowledge */
4619		rfctl |= E1000_RFCTL_ACK_DIS;
4620	}
4621	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4622
4623	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4624	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4625#ifdef EM_MULTIQUEUE
4626		rxcsum |= E1000_RXCSUM_TUOFL |
4627			  E1000_RXCSUM_IPOFL |
4628			  E1000_RXCSUM_PCSD;
4629#else
4630		rxcsum |= E1000_RXCSUM_TUOFL;
4631#endif
4632	} else
4633		rxcsum &= ~E1000_RXCSUM_TUOFL;
4634
4635	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4636
4637#ifdef EM_MULTIQUEUE
4638#define RSSKEYLEN 10
4639	if (adapter->num_queues > 1) {
4640		uint8_t  rss_key[4 * RSSKEYLEN];
4641		uint32_t reta = 0;
4642		int i;
4643
4644		/*
4645		* Configure RSS key
4646		*/
4647		arc4rand(rss_key, sizeof(rss_key), 0);
4648		for (i = 0; i < RSSKEYLEN; ++i) {
4649			uint32_t rssrk = 0;
4650
4651			rssrk = EM_RSSRK_VAL(rss_key, i);
4652			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4653		}
4654
4655		/*
4656		* Configure RSS redirect table in following fashion:
4657		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4658		*/
4659		for (i = 0; i < sizeof(reta); ++i) {
4660			uint32_t q;
4661
4662			q = (i % adapter->num_queues) << 7;
4663			reta |= q << (8 * i);
4664		}
4665
4666		for (i = 0; i < 32; ++i) {
4667			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4668		}
4669
4670		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4671				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4672				E1000_MRQC_RSS_FIELD_IPV4 |
4673				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4674				E1000_MRQC_RSS_FIELD_IPV6_EX |
4675				E1000_MRQC_RSS_FIELD_IPV6);
4676	}
4677#endif
4678	/*
4679	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4680	** long latencies are observed, like Lenovo X60. This
4681	** change eliminates the problem, but since having positive
4682	** values in RDTR is a known source of problems on other
4683	** platforms another solution is being sought.
4684	*/
4685	if (hw->mac.type == e1000_82573)
4686		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4687
4688	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4689		/* Setup the Base and Length of the Rx Descriptor Ring */
4690		u64 bus_addr = rxr->rxdma.dma_paddr;
4691		u32 rdt = adapter->num_rx_desc - 1; /* default */
4692
4693		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4694		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4695		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4696		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4697		/* Setup the Head and Tail Descriptor Pointers */
4698		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4699#ifdef DEV_NETMAP
4700		/*
4701		 * an init() while a netmap client is active must
4702		 * preserve the rx buffers passed to userspace.
4703		 */
4704		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4705			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4706			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4707		}
4708#endif /* DEV_NETMAP */
4709		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4710	}
4711
4712	/*
4713	 * Set PTHRESH for improved jumbo performance
4714	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4715	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4716	 * Only write to RXDCTL(1) if there is a need for different
4717	 * settings.
4718	 */
4719	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4720	    (adapter->hw.mac.type == e1000_pch2lan) ||
4721	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4722	    (if_getmtu(ifp) > ETHERMTU)) {
4723		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4724		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4725	} else if (adapter->hw.mac.type == e1000_82574) {
4726		for (int i = 0; i < adapter->num_queues; i++) {
4727			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4728
4729			rxdctl |= 0x20; /* PTHRESH */
4730			rxdctl |= 4 << 8; /* HTHRESH */
4731			rxdctl |= 4 << 16;/* WTHRESH */
4732			rxdctl |= 1 << 24; /* Switch to granularity */
4733			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4734		}
4735	}
4736
4737	if (adapter->hw.mac.type >= e1000_pch2lan) {
4738		if (if_getmtu(ifp) > ETHERMTU)
4739			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4740		else
4741			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4742	}
4743
4744        /* Make sure VLAN Filters are off */
4745        rctl &= ~E1000_RCTL_VFE;
4746
4747	if (adapter->rx_mbuf_sz == MCLBYTES)
4748		rctl |= E1000_RCTL_SZ_2048;
4749	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4750		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4751	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4752		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4753
4754	/* ensure we clear use DTYPE of 00 here */
4755	rctl &= ~0x00000C00;
4756	/* Write out the settings */
4757	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4758
4759	return;
4760}
4761
4762
4763/*********************************************************************
4764 *
4765 *  This routine executes in interrupt context. It replenishes
4766 *  the mbufs in the descriptor and sends data which has been
4767 *  dma'ed into host memory to upper layer.
4768 *
4769 *  We loop at most count times if count is > 0, or until done if
4770 *  count < 0.
4771 *
4772 *  For polling we also now return the number of cleaned packets
4773 *********************************************************************/
4774static bool
4775em_rxeof(struct rx_ring *rxr, int count, int *done)
4776{
4777	struct adapter		*adapter = rxr->adapter;
4778	if_t ifp = adapter->ifp;
4779	struct mbuf		*mp, *sendmp;
4780	u32			status = 0;
4781	u16 			len;
4782	int			i, processed, rxdone = 0;
4783	bool			eop;
4784	union e1000_rx_desc_extended	*cur;
4785
4786	EM_RX_LOCK(rxr);
4787
4788	/* Sync the ring */
4789	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4790	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4791
4792
4793#ifdef DEV_NETMAP
4794	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4795		EM_RX_UNLOCK(rxr);
4796		return (FALSE);
4797	}
4798#endif /* DEV_NETMAP */
4799
4800	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4801		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4802			break;
4803
4804		cur = &rxr->rx_base[i];
4805		status = le32toh(cur->wb.upper.status_error);
4806		mp = sendmp = NULL;
4807
4808		if ((status & E1000_RXD_STAT_DD) == 0)
4809			break;
4810
4811		len = le16toh(cur->wb.upper.length);
4812		eop = (status & E1000_RXD_STAT_EOP) != 0;
4813
4814		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4815		    (rxr->discard == TRUE)) {
4816			adapter->dropped_pkts++;
4817			++rxr->rx_discarded;
4818			if (!eop) /* Catch subsequent segs */
4819				rxr->discard = TRUE;
4820			else
4821				rxr->discard = FALSE;
4822			em_rx_discard(rxr, i);
4823			goto next_desc;
4824		}
4825		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4826
4827		/* Assign correct length to the current fragment */
4828		mp = rxr->rx_buffers[i].m_head;
4829		mp->m_len = len;
4830
4831		/* Trigger for refresh */
4832		rxr->rx_buffers[i].m_head = NULL;
4833
4834		/* First segment? */
4835		if (rxr->fmp == NULL) {
4836			mp->m_pkthdr.len = len;
4837			rxr->fmp = rxr->lmp = mp;
4838		} else {
4839			/* Chain mbuf's together */
4840			mp->m_flags &= ~M_PKTHDR;
4841			rxr->lmp->m_next = mp;
4842			rxr->lmp = mp;
4843			rxr->fmp->m_pkthdr.len += len;
4844		}
4845
4846		if (eop) {
4847			--count;
4848			sendmp = rxr->fmp;
4849			if_setrcvif(sendmp, ifp);
4850			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4851			em_receive_checksum(status, sendmp);
4852#ifndef __NO_STRICT_ALIGNMENT
4853			if (adapter->hw.mac.max_frame_size >
4854			    (MCLBYTES - ETHER_ALIGN) &&
4855			    em_fixup_rx(rxr) != 0)
4856				goto skip;
4857#endif
4858			if (status & E1000_RXD_STAT_VP) {
4859				if_setvtag(sendmp,
4860				    le16toh(cur->wb.upper.vlan));
4861				sendmp->m_flags |= M_VLANTAG;
4862			}
4863#ifndef __NO_STRICT_ALIGNMENT
4864skip:
4865#endif
4866			rxr->fmp = rxr->lmp = NULL;
4867		}
4868next_desc:
4869		/* Sync the ring */
4870		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4871	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4872
4873		/* Zero out the receive descriptors status. */
4874		cur->wb.upper.status_error &= htole32(~0xFF);
4875		++rxdone;	/* cumulative for POLL */
4876		++processed;
4877
4878		/* Advance our pointers to the next descriptor. */
4879		if (++i == adapter->num_rx_desc)
4880			i = 0;
4881
4882		/* Send to the stack */
4883		if (sendmp != NULL) {
4884			rxr->next_to_check = i;
4885			EM_RX_UNLOCK(rxr);
4886			if_input(ifp, sendmp);
4887			EM_RX_LOCK(rxr);
4888			i = rxr->next_to_check;
4889		}
4890
4891		/* Only refresh mbufs every 8 descriptors */
4892		if (processed == 8) {
4893			em_refresh_mbufs(rxr, i);
4894			processed = 0;
4895		}
4896	}
4897
4898	/* Catch any remaining refresh work */
4899	if (e1000_rx_unrefreshed(rxr))
4900		em_refresh_mbufs(rxr, i);
4901
4902	rxr->next_to_check = i;
4903	if (done != NULL)
4904		*done = rxdone;
4905	EM_RX_UNLOCK(rxr);
4906
4907	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4908}
4909
4910static __inline void
4911em_rx_discard(struct rx_ring *rxr, int i)
4912{
4913	struct em_rxbuffer	*rbuf;
4914
4915	rbuf = &rxr->rx_buffers[i];
4916	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4917
4918	/* Free any previous pieces */
4919	if (rxr->fmp != NULL) {
4920		rxr->fmp->m_flags |= M_PKTHDR;
4921		m_freem(rxr->fmp);
4922		rxr->fmp = NULL;
4923		rxr->lmp = NULL;
4924	}
4925	/*
4926	** Free buffer and allow em_refresh_mbufs()
4927	** to clean up and recharge buffer.
4928	*/
4929	if (rbuf->m_head) {
4930		m_free(rbuf->m_head);
4931		rbuf->m_head = NULL;
4932	}
4933	return;
4934}
4935
4936#ifndef __NO_STRICT_ALIGNMENT
4937/*
4938 * When jumbo frames are enabled we should realign entire payload on
4939 * architecures with strict alignment. This is serious design mistake of 8254x
4940 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4941 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4942 * payload. On architecures without strict alignment restrictions 8254x still
4943 * performs unaligned memory access which would reduce the performance too.
4944 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4945 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4946 * existing mbuf chain.
4947 *
4948 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4949 * not used at all on architectures with strict alignment.
4950 */
4951static int
4952em_fixup_rx(struct rx_ring *rxr)
4953{
4954	struct adapter *adapter = rxr->adapter;
4955	struct mbuf *m, *n;
4956	int error;
4957
4958	error = 0;
4959	m = rxr->fmp;
4960	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4961		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4962		m->m_data += ETHER_HDR_LEN;
4963	} else {
4964		MGETHDR(n, M_NOWAIT, MT_DATA);
4965		if (n != NULL) {
4966			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4967			m->m_data += ETHER_HDR_LEN;
4968			m->m_len -= ETHER_HDR_LEN;
4969			n->m_len = ETHER_HDR_LEN;
4970			M_MOVE_PKTHDR(n, m);
4971			n->m_next = m;
4972			rxr->fmp = n;
4973		} else {
4974			adapter->dropped_pkts++;
4975			m_freem(rxr->fmp);
4976			rxr->fmp = NULL;
4977			error = ENOMEM;
4978		}
4979	}
4980
4981	return (error);
4982}
4983#endif
4984
4985static void
4986em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4987{
4988	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4989	/* DD bits must be cleared */
4990	rxd->wb.upper.status_error= 0;
4991}
4992
4993/*********************************************************************
4994 *
4995 *  Verify that the hardware indicated that the checksum is valid.
4996 *  Inform the stack about the status of checksum so that stack
4997 *  doesn't spend time verifying the checksum.
4998 *
4999 *********************************************************************/
5000static void
5001em_receive_checksum(uint32_t status, struct mbuf *mp)
5002{
5003	mp->m_pkthdr.csum_flags = 0;
5004
5005	/* Ignore Checksum bit is set */
5006	if (status & E1000_RXD_STAT_IXSM)
5007		return;
5008
5009	/* If the IP checksum exists and there is no IP Checksum error */
5010	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5011		E1000_RXD_STAT_IPCS) {
5012		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5013	}
5014
5015	/* TCP or UDP checksum */
5016	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5017	    E1000_RXD_STAT_TCPCS) {
5018		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5019		mp->m_pkthdr.csum_data = htons(0xffff);
5020	}
5021	if (status & E1000_RXD_STAT_UDPCS) {
5022		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5023		mp->m_pkthdr.csum_data = htons(0xffff);
5024	}
5025}
5026
5027/*
5028 * This routine is run via an vlan
5029 * config EVENT
5030 */
5031static void
5032em_register_vlan(void *arg, if_t ifp, u16 vtag)
5033{
5034	struct adapter	*adapter = if_getsoftc(ifp);
5035	u32		index, bit;
5036
5037	if ((void*)adapter !=  arg)   /* Not our event */
5038		return;
5039
5040	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5041                return;
5042
5043	EM_CORE_LOCK(adapter);
5044	index = (vtag >> 5) & 0x7F;
5045	bit = vtag & 0x1F;
5046	adapter->shadow_vfta[index] |= (1 << bit);
5047	++adapter->num_vlans;
5048	/* Re-init to load the changes */
5049	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5050		em_init_locked(adapter);
5051	EM_CORE_UNLOCK(adapter);
5052}
5053
5054/*
5055 * This routine is run via an vlan
5056 * unconfig EVENT
5057 */
5058static void
5059em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5060{
5061	struct adapter	*adapter = if_getsoftc(ifp);
5062	u32		index, bit;
5063
5064	if (adapter != arg)
5065		return;
5066
5067	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5068                return;
5069
5070	EM_CORE_LOCK(adapter);
5071	index = (vtag >> 5) & 0x7F;
5072	bit = vtag & 0x1F;
5073	adapter->shadow_vfta[index] &= ~(1 << bit);
5074	--adapter->num_vlans;
5075	/* Re-init to load the changes */
5076	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5077		em_init_locked(adapter);
5078	EM_CORE_UNLOCK(adapter);
5079}
5080
5081static void
5082em_setup_vlan_hw_support(struct adapter *adapter)
5083{
5084	struct e1000_hw *hw = &adapter->hw;
5085	u32             reg;
5086
5087	/*
5088	** We get here thru init_locked, meaning
5089	** a soft reset, this has already cleared
5090	** the VFTA and other state, so if there
5091	** have been no vlan's registered do nothing.
5092	*/
5093	if (adapter->num_vlans == 0)
5094                return;
5095
5096	/*
5097	** A soft reset zero's out the VFTA, so
5098	** we need to repopulate it now.
5099	*/
5100	for (int i = 0; i < EM_VFTA_SIZE; i++)
5101                if (adapter->shadow_vfta[i] != 0)
5102			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5103                            i, adapter->shadow_vfta[i]);
5104
5105	reg = E1000_READ_REG(hw, E1000_CTRL);
5106	reg |= E1000_CTRL_VME;
5107	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5108
5109	/* Enable the Filter Table */
5110	reg = E1000_READ_REG(hw, E1000_RCTL);
5111	reg &= ~E1000_RCTL_CFIEN;
5112	reg |= E1000_RCTL_VFE;
5113	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5114}
5115
5116static void
5117em_enable_intr(struct adapter *adapter)
5118{
5119	struct e1000_hw *hw = &adapter->hw;
5120	u32 ims_mask = IMS_ENABLE_MASK;
5121
5122	if (hw->mac.type == e1000_82574) {
5123		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5124		ims_mask |= EM_MSIX_MASK;
5125	}
5126	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5127}
5128
5129static void
5130em_disable_intr(struct adapter *adapter)
5131{
5132	struct e1000_hw *hw = &adapter->hw;
5133
5134	if (hw->mac.type == e1000_82574)
5135		E1000_WRITE_REG(hw, EM_EIAC, 0);
5136	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5137}
5138
5139/*
5140 * Bit of a misnomer, what this really means is
5141 * to enable OS management of the system... aka
5142 * to disable special hardware management features
5143 */
5144static void
5145em_init_manageability(struct adapter *adapter)
5146{
5147	/* A shared code workaround */
5148#define E1000_82542_MANC2H E1000_MANC2H
5149	if (adapter->has_manage) {
5150		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5151		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5152
5153		/* disable hardware interception of ARP */
5154		manc &= ~(E1000_MANC_ARP_EN);
5155
5156                /* enable receiving management packets to the host */
5157		manc |= E1000_MANC_EN_MNG2HOST;
5158#define E1000_MNG2HOST_PORT_623 (1 << 5)
5159#define E1000_MNG2HOST_PORT_664 (1 << 6)
5160		manc2h |= E1000_MNG2HOST_PORT_623;
5161		manc2h |= E1000_MNG2HOST_PORT_664;
5162		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5163		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5164	}
5165}
5166
5167/*
5168 * Give control back to hardware management
5169 * controller if there is one.
5170 */
5171static void
5172em_release_manageability(struct adapter *adapter)
5173{
5174	if (adapter->has_manage) {
5175		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5176
5177		/* re-enable hardware interception of ARP */
5178		manc |= E1000_MANC_ARP_EN;
5179		manc &= ~E1000_MANC_EN_MNG2HOST;
5180
5181		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5182	}
5183}
5184
5185/*
5186 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5187 * For ASF and Pass Through versions of f/w this means
5188 * that the driver is loaded. For AMT version type f/w
5189 * this means that the network i/f is open.
5190 */
5191static void
5192em_get_hw_control(struct adapter *adapter)
5193{
5194	u32 ctrl_ext, swsm;
5195
5196	if (adapter->hw.mac.type == e1000_82573) {
5197		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5198		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5199		    swsm | E1000_SWSM_DRV_LOAD);
5200		return;
5201	}
5202	/* else */
5203	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5204	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5205	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5206	return;
5207}
5208
5209/*
5210 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5211 * For ASF and Pass Through versions of f/w this means that
5212 * the driver is no longer loaded. For AMT versions of the
5213 * f/w this means that the network i/f is closed.
5214 */
5215static void
5216em_release_hw_control(struct adapter *adapter)
5217{
5218	u32 ctrl_ext, swsm;
5219
5220	if (!adapter->has_manage)
5221		return;
5222
5223	if (adapter->hw.mac.type == e1000_82573) {
5224		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5225		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5226		    swsm & ~E1000_SWSM_DRV_LOAD);
5227		return;
5228	}
5229	/* else */
5230	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5231	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5232	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5233	return;
5234}
5235
5236static int
5237em_is_valid_ether_addr(u8 *addr)
5238{
5239	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5240
5241	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5242		return (FALSE);
5243	}
5244
5245	return (TRUE);
5246}
5247
5248/*
5249** Parse the interface capabilities with regard
5250** to both system management and wake-on-lan for
5251** later use.
5252*/
5253static void
5254em_get_wakeup(device_t dev)
5255{
5256	struct adapter	*adapter = device_get_softc(dev);
5257	u16		eeprom_data = 0, device_id, apme_mask;
5258
5259	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5260	apme_mask = EM_EEPROM_APME;
5261
5262	switch (adapter->hw.mac.type) {
5263	case e1000_82573:
5264	case e1000_82583:
5265		adapter->has_amt = TRUE;
5266		/* Falls thru */
5267	case e1000_82571:
5268	case e1000_82572:
5269	case e1000_80003es2lan:
5270		if (adapter->hw.bus.func == 1) {
5271			e1000_read_nvm(&adapter->hw,
5272			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5273			break;
5274		} else
5275			e1000_read_nvm(&adapter->hw,
5276			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5277		break;
5278	case e1000_ich8lan:
5279	case e1000_ich9lan:
5280	case e1000_ich10lan:
5281	case e1000_pchlan:
5282	case e1000_pch2lan:
5283	case e1000_pch_lpt:
5284	case e1000_pch_spt:
5285		apme_mask = E1000_WUC_APME;
5286		adapter->has_amt = TRUE;
5287		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5288		break;
5289	default:
5290		e1000_read_nvm(&adapter->hw,
5291		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5292		break;
5293	}
5294	if (eeprom_data & apme_mask)
5295		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5296	/*
5297         * We have the eeprom settings, now apply the special cases
5298         * where the eeprom may be wrong or the board won't support
5299         * wake on lan on a particular port
5300	 */
5301	device_id = pci_get_device(dev);
5302        switch (device_id) {
5303	case E1000_DEV_ID_82571EB_FIBER:
5304		/* Wake events only supported on port A for dual fiber
5305		 * regardless of eeprom setting */
5306		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5307		    E1000_STATUS_FUNC_1)
5308			adapter->wol = 0;
5309		break;
5310	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5311	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5312	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5313                /* if quad port adapter, disable WoL on all but port A */
5314		if (global_quad_port_a != 0)
5315			adapter->wol = 0;
5316		/* Reset for multiple quad port adapters */
5317		if (++global_quad_port_a == 4)
5318			global_quad_port_a = 0;
5319                break;
5320	}
5321	return;
5322}
5323
5324
5325/*
5326 * Enable PCI Wake On Lan capability
5327 */
5328static void
5329em_enable_wakeup(device_t dev)
5330{
5331	struct adapter	*adapter = device_get_softc(dev);
5332	if_t ifp = adapter->ifp;
5333	int		error = 0;
5334	u32		pmc, ctrl, ctrl_ext, rctl;
5335	u16     	status;
5336
5337	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5338		return;
5339
5340	/*
5341	** Determine type of Wakeup: note that wol
5342	** is set with all bits on by default.
5343	*/
5344	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5345		adapter->wol &= ~E1000_WUFC_MAG;
5346
5347	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5348		adapter->wol &= ~E1000_WUFC_MC;
5349	else {
5350		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5351		rctl |= E1000_RCTL_MPE;
5352		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5353	}
5354
5355	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5356		goto pme;
5357
5358	/* Advertise the wakeup capability */
5359	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5360	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5361	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5362
5363	/* Keep the laser running on Fiber adapters */
5364	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5365	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5366		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5367		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5368		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5369	}
5370
5371	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5372	    (adapter->hw.mac.type == e1000_pchlan) ||
5373	    (adapter->hw.mac.type == e1000_ich9lan) ||
5374	    (adapter->hw.mac.type == e1000_ich10lan))
5375		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5376
5377	if ((adapter->hw.mac.type == e1000_pchlan)  ||
5378	    (adapter->hw.mac.type == e1000_pch2lan) ||
5379	    (adapter->hw.mac.type == e1000_pch_lpt) ||
5380	    (adapter->hw.mac.type == e1000_pch_spt)) {
5381		error = em_enable_phy_wakeup(adapter);
5382		if (error)
5383			goto pme;
5384	} else {
5385		/* Enable wakeup by the MAC */
5386		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5387		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5388	}
5389
5390	if (adapter->hw.phy.type == e1000_phy_igp_3)
5391		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5392
5393pme:
5394        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5395	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5396	if (!error && (if_getcapenable(ifp) & IFCAP_WOL))
5397		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5398        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5399
5400	return;
5401}
5402
5403/*
5404** WOL in the newer chipset interfaces (pchlan)
5405** require thing to be copied into the phy
5406*/
5407static int
5408em_enable_phy_wakeup(struct adapter *adapter)
5409{
5410	struct e1000_hw *hw = &adapter->hw;
5411	u32 mreg, ret = 0;
5412	u16 preg;
5413
5414	/* copy MAC RARs to PHY RARs */
5415	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5416
5417	/* copy MAC MTA to PHY MTA */
5418	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5419		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5420		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5421		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5422		    (u16)((mreg >> 16) & 0xFFFF));
5423	}
5424
5425	/* configure PHY Rx Control register */
5426	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5427	mreg = E1000_READ_REG(hw, E1000_RCTL);
5428	if (mreg & E1000_RCTL_UPE)
5429		preg |= BM_RCTL_UPE;
5430	if (mreg & E1000_RCTL_MPE)
5431		preg |= BM_RCTL_MPE;
5432	preg &= ~(BM_RCTL_MO_MASK);
5433	if (mreg & E1000_RCTL_MO_3)
5434		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5435				<< BM_RCTL_MO_SHIFT);
5436	if (mreg & E1000_RCTL_BAM)
5437		preg |= BM_RCTL_BAM;
5438	if (mreg & E1000_RCTL_PMCF)
5439		preg |= BM_RCTL_PMCF;
5440	mreg = E1000_READ_REG(hw, E1000_CTRL);
5441	if (mreg & E1000_CTRL_RFCE)
5442		preg |= BM_RCTL_RFCE;
5443	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5444
5445	/* enable PHY wakeup in MAC register */
5446	E1000_WRITE_REG(hw, E1000_WUC,
5447	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5448	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5449
5450	/* configure and enable PHY wakeup in PHY registers */
5451	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5452	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5453
5454	/* activate PHY wakeup */
5455	ret = hw->phy.ops.acquire(hw);
5456	if (ret) {
5457		printf("Could not acquire PHY\n");
5458		return ret;
5459	}
5460	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5461	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5462	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5463	if (ret) {
5464		printf("Could not read PHY page 769\n");
5465		goto out;
5466	}
5467	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5468	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5469	if (ret)
5470		printf("Could not set PHY Host Wakeup bit\n");
5471out:
5472	hw->phy.ops.release(hw);
5473
5474	return ret;
5475}
5476
5477static void
5478em_led_func(void *arg, int onoff)
5479{
5480	struct adapter	*adapter = arg;
5481
5482	EM_CORE_LOCK(adapter);
5483	if (onoff) {
5484		e1000_setup_led(&adapter->hw);
5485		e1000_led_on(&adapter->hw);
5486	} else {
5487		e1000_led_off(&adapter->hw);
5488		e1000_cleanup_led(&adapter->hw);
5489	}
5490	EM_CORE_UNLOCK(adapter);
5491}
5492
5493/*
5494** Disable the L0S and L1 LINK states
5495*/
5496static void
5497em_disable_aspm(struct adapter *adapter)
5498{
5499	int		base, reg;
5500	u16		link_cap,link_ctrl;
5501	device_t	dev = adapter->dev;
5502
5503	switch (adapter->hw.mac.type) {
5504		case e1000_82573:
5505		case e1000_82574:
5506		case e1000_82583:
5507			break;
5508		default:
5509			return;
5510	}
5511	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5512		return;
5513	reg = base + PCIER_LINK_CAP;
5514	link_cap = pci_read_config(dev, reg, 2);
5515	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5516		return;
5517	reg = base + PCIER_LINK_CTL;
5518	link_ctrl = pci_read_config(dev, reg, 2);
5519	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5520	pci_write_config(dev, reg, link_ctrl, 2);
5521	return;
5522}
5523
5524/**********************************************************************
5525 *
5526 *  Update the board statistics counters.
5527 *
5528 **********************************************************************/
5529static void
5530em_update_stats_counters(struct adapter *adapter)
5531{
5532
5533	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5534	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5535		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5536		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5537	}
5538	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5539	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5540	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5541	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5542
5543	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5544	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5545	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5546	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5547	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5548	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5549	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5550	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5551	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5552	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5553	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5554	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5555	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5556	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5557	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5558	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5559	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5560	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5561	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5562	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5563
5564	/* For the 64-bit byte counters the low dword must be read first. */
5565	/* Both registers clear on the read of the high dword */
5566
5567	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5568	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5569	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5570	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5571
5572	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5573	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5574	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5575	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5576	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5577
5578	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5579	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5580
5581	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5582	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5583	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5584	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5585	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5586	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5587	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5588	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5589	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5590	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5591
5592	/* Interrupt Counts */
5593
5594	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5595	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5596	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5597	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5598	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5599	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5600	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5601	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5602	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5603
5604	if (adapter->hw.mac.type >= e1000_82543) {
5605		adapter->stats.algnerrc +=
5606		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5607		adapter->stats.rxerrc +=
5608		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5609		adapter->stats.tncrs +=
5610		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5611		adapter->stats.cexterr +=
5612		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5613		adapter->stats.tsctc +=
5614		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5615		adapter->stats.tsctfc +=
5616		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5617	}
5618}
5619
5620static uint64_t
5621em_get_counter(if_t ifp, ift_counter cnt)
5622{
5623	struct adapter *adapter;
5624
5625	adapter = if_getsoftc(ifp);
5626
5627	switch (cnt) {
5628	case IFCOUNTER_COLLISIONS:
5629		return (adapter->stats.colc);
5630	case IFCOUNTER_IERRORS:
5631		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5632		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5633		    adapter->stats.ruc + adapter->stats.roc +
5634		    adapter->stats.mpc + adapter->stats.cexterr);
5635	case IFCOUNTER_OERRORS:
5636		return (adapter->stats.ecol + adapter->stats.latecol +
5637		    adapter->watchdog_events);
5638	default:
5639		return (if_get_counter_default(ifp, cnt));
5640	}
5641}
5642
5643/* Export a single 32-bit register via a read-only sysctl. */
5644static int
5645em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5646{
5647	struct adapter *adapter;
5648	u_int val;
5649
5650	adapter = oidp->oid_arg1;
5651	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5652	return (sysctl_handle_int(oidp, &val, 0, req));
5653}
5654
5655/*
5656 * Add sysctl variables, one per statistic, to the system.
5657 */
5658static void
5659em_add_hw_stats(struct adapter *adapter)
5660{
5661	device_t dev = adapter->dev;
5662
5663	struct tx_ring *txr = adapter->tx_rings;
5664	struct rx_ring *rxr = adapter->rx_rings;
5665
5666	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5667	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5668	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5669	struct e1000_hw_stats *stats = &adapter->stats;
5670
5671	struct sysctl_oid *stat_node, *queue_node, *int_node;
5672	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5673
5674#define QUEUE_NAME_LEN 32
5675	char namebuf[QUEUE_NAME_LEN];
5676
5677	/* Driver Statistics */
5678	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5679			CTLFLAG_RD, &adapter->dropped_pkts,
5680			"Driver dropped packets");
5681	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5682			CTLFLAG_RD, &adapter->link_irq,
5683			"Link MSIX IRQ Handled");
5684	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5685			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5686			 "Defragmenting mbuf chain failed");
5687	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5688			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5689			"Driver tx dma failure in xmit");
5690	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5691			CTLFLAG_RD, &adapter->rx_overruns,
5692			"RX overruns");
5693	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5694			CTLFLAG_RD, &adapter->watchdog_events,
5695			"Watchdog timeouts");
5696
5697	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5698			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5699			em_sysctl_reg_handler, "IU",
5700			"Device Control Register");
5701	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5702			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5703			em_sysctl_reg_handler, "IU",
5704			"Receiver Control Register");
5705	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5706			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5707			"Flow Control High Watermark");
5708	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5709			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5710			"Flow Control Low Watermark");
5711
5712	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5713		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5714		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5715					    CTLFLAG_RD, NULL, "TX Queue Name");
5716		queue_list = SYSCTL_CHILDREN(queue_node);
5717
5718		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5719				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5720				E1000_TDH(txr->me),
5721				em_sysctl_reg_handler, "IU",
5722 				"Transmit Descriptor Head");
5723		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5724				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5725				E1000_TDT(txr->me),
5726				em_sysctl_reg_handler, "IU",
5727 				"Transmit Descriptor Tail");
5728		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5729				CTLFLAG_RD, &txr->tx_irq,
5730				"Queue MSI-X Transmit Interrupts");
5731		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5732				CTLFLAG_RD, &txr->no_desc_avail,
5733				"Queue No Descriptor Available");
5734
5735		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5736		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5737					    CTLFLAG_RD, NULL, "RX Queue Name");
5738		queue_list = SYSCTL_CHILDREN(queue_node);
5739
5740		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5741				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5742				E1000_RDH(rxr->me),
5743				em_sysctl_reg_handler, "IU",
5744				"Receive Descriptor Head");
5745		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5746				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5747				E1000_RDT(rxr->me),
5748				em_sysctl_reg_handler, "IU",
5749				"Receive Descriptor Tail");
5750		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5751				CTLFLAG_RD, &rxr->rx_irq,
5752				"Queue MSI-X Receive Interrupts");
5753	}
5754
5755	/* MAC stats get their own sub node */
5756
5757	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5758				    CTLFLAG_RD, NULL, "Statistics");
5759	stat_list = SYSCTL_CHILDREN(stat_node);
5760
5761	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5762			CTLFLAG_RD, &stats->ecol,
5763			"Excessive collisions");
5764	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5765			CTLFLAG_RD, &stats->scc,
5766			"Single collisions");
5767	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5768			CTLFLAG_RD, &stats->mcc,
5769			"Multiple collisions");
5770	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5771			CTLFLAG_RD, &stats->latecol,
5772			"Late collisions");
5773	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5774			CTLFLAG_RD, &stats->colc,
5775			"Collision Count");
5776	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5777			CTLFLAG_RD, &adapter->stats.symerrs,
5778			"Symbol Errors");
5779	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5780			CTLFLAG_RD, &adapter->stats.sec,
5781			"Sequence Errors");
5782	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5783			CTLFLAG_RD, &adapter->stats.dc,
5784			"Defer Count");
5785	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5786			CTLFLAG_RD, &adapter->stats.mpc,
5787			"Missed Packets");
5788	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5789			CTLFLAG_RD, &adapter->stats.rnbc,
5790			"Receive No Buffers");
5791	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5792			CTLFLAG_RD, &adapter->stats.ruc,
5793			"Receive Undersize");
5794	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5795			CTLFLAG_RD, &adapter->stats.rfc,
5796			"Fragmented Packets Received ");
5797	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5798			CTLFLAG_RD, &adapter->stats.roc,
5799			"Oversized Packets Received");
5800	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5801			CTLFLAG_RD, &adapter->stats.rjc,
5802			"Recevied Jabber");
5803	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5804			CTLFLAG_RD, &adapter->stats.rxerrc,
5805			"Receive Errors");
5806	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5807			CTLFLAG_RD, &adapter->stats.crcerrs,
5808			"CRC errors");
5809	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5810			CTLFLAG_RD, &adapter->stats.algnerrc,
5811			"Alignment Errors");
5812	/* On 82575 these are collision counts */
5813	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5814			CTLFLAG_RD, &adapter->stats.cexterr,
5815			"Collision/Carrier extension errors");
5816	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5817			CTLFLAG_RD, &adapter->stats.xonrxc,
5818			"XON Received");
5819	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5820			CTLFLAG_RD, &adapter->stats.xontxc,
5821			"XON Transmitted");
5822	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5823			CTLFLAG_RD, &adapter->stats.xoffrxc,
5824			"XOFF Received");
5825	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5826			CTLFLAG_RD, &adapter->stats.xofftxc,
5827			"XOFF Transmitted");
5828
5829	/* Packet Reception Stats */
5830	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5831			CTLFLAG_RD, &adapter->stats.tpr,
5832			"Total Packets Received ");
5833	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5834			CTLFLAG_RD, &adapter->stats.gprc,
5835			"Good Packets Received");
5836	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5837			CTLFLAG_RD, &adapter->stats.bprc,
5838			"Broadcast Packets Received");
5839	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5840			CTLFLAG_RD, &adapter->stats.mprc,
5841			"Multicast Packets Received");
5842	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5843			CTLFLAG_RD, &adapter->stats.prc64,
5844			"64 byte frames received ");
5845	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5846			CTLFLAG_RD, &adapter->stats.prc127,
5847			"65-127 byte frames received");
5848	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5849			CTLFLAG_RD, &adapter->stats.prc255,
5850			"128-255 byte frames received");
5851	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5852			CTLFLAG_RD, &adapter->stats.prc511,
5853			"256-511 byte frames received");
5854	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5855			CTLFLAG_RD, &adapter->stats.prc1023,
5856			"512-1023 byte frames received");
5857	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5858			CTLFLAG_RD, &adapter->stats.prc1522,
5859			"1023-1522 byte frames received");
5860 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5861 			CTLFLAG_RD, &adapter->stats.gorc,
5862 			"Good Octets Received");
5863
5864	/* Packet Transmission Stats */
5865 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5866 			CTLFLAG_RD, &adapter->stats.gotc,
5867 			"Good Octets Transmitted");
5868	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5869			CTLFLAG_RD, &adapter->stats.tpt,
5870			"Total Packets Transmitted");
5871	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5872			CTLFLAG_RD, &adapter->stats.gptc,
5873			"Good Packets Transmitted");
5874	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5875			CTLFLAG_RD, &adapter->stats.bptc,
5876			"Broadcast Packets Transmitted");
5877	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5878			CTLFLAG_RD, &adapter->stats.mptc,
5879			"Multicast Packets Transmitted");
5880	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5881			CTLFLAG_RD, &adapter->stats.ptc64,
5882			"64 byte frames transmitted ");
5883	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5884			CTLFLAG_RD, &adapter->stats.ptc127,
5885			"65-127 byte frames transmitted");
5886	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5887			CTLFLAG_RD, &adapter->stats.ptc255,
5888			"128-255 byte frames transmitted");
5889	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5890			CTLFLAG_RD, &adapter->stats.ptc511,
5891			"256-511 byte frames transmitted");
5892	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5893			CTLFLAG_RD, &adapter->stats.ptc1023,
5894			"512-1023 byte frames transmitted");
5895	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5896			CTLFLAG_RD, &adapter->stats.ptc1522,
5897			"1024-1522 byte frames transmitted");
5898	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5899			CTLFLAG_RD, &adapter->stats.tsctc,
5900			"TSO Contexts Transmitted");
5901	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5902			CTLFLAG_RD, &adapter->stats.tsctfc,
5903			"TSO Contexts Failed");
5904
5905
5906	/* Interrupt Stats */
5907
5908	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5909				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5910	int_list = SYSCTL_CHILDREN(int_node);
5911
5912	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5913			CTLFLAG_RD, &adapter->stats.iac,
5914			"Interrupt Assertion Count");
5915
5916	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5917			CTLFLAG_RD, &adapter->stats.icrxptc,
5918			"Interrupt Cause Rx Pkt Timer Expire Count");
5919
5920	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5921			CTLFLAG_RD, &adapter->stats.icrxatc,
5922			"Interrupt Cause Rx Abs Timer Expire Count");
5923
5924	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5925			CTLFLAG_RD, &adapter->stats.ictxptc,
5926			"Interrupt Cause Tx Pkt Timer Expire Count");
5927
5928	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5929			CTLFLAG_RD, &adapter->stats.ictxatc,
5930			"Interrupt Cause Tx Abs Timer Expire Count");
5931
5932	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5933			CTLFLAG_RD, &adapter->stats.ictxqec,
5934			"Interrupt Cause Tx Queue Empty Count");
5935
5936	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5937			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5938			"Interrupt Cause Tx Queue Min Thresh Count");
5939
5940	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5941			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5942			"Interrupt Cause Rx Desc Min Thresh Count");
5943
5944	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5945			CTLFLAG_RD, &adapter->stats.icrxoc,
5946			"Interrupt Cause Receiver Overrun Count");
5947}
5948
5949/**********************************************************************
5950 *
5951 *  This routine provides a way to dump out the adapter eeprom,
5952 *  often a useful debug/service tool. This only dumps the first
5953 *  32 words, stuff that matters is in that extent.
5954 *
5955 **********************************************************************/
5956static int
5957em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5958{
5959	struct adapter *adapter = (struct adapter *)arg1;
5960	int error;
5961	int result;
5962
5963	result = -1;
5964	error = sysctl_handle_int(oidp, &result, 0, req);
5965
5966	if (error || !req->newptr)
5967		return (error);
5968
5969	/*
5970	 * This value will cause a hex dump of the
5971	 * first 32 16-bit words of the EEPROM to
5972	 * the screen.
5973	 */
5974	if (result == 1)
5975		em_print_nvm_info(adapter);
5976
5977	return (error);
5978}
5979
5980static void
5981em_print_nvm_info(struct adapter *adapter)
5982{
5983	u16	eeprom_data;
5984	int	i, j, row = 0;
5985
5986	/* Its a bit crude, but it gets the job done */
5987	printf("\nInterface EEPROM Dump:\n");
5988	printf("Offset\n0x0000  ");
5989	for (i = 0, j = 0; i < 32; i++, j++) {
5990		if (j == 8) { /* Make the offset block */
5991			j = 0; ++row;
5992			printf("\n0x00%x0  ",row);
5993		}
5994		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5995		printf("%04x ", eeprom_data);
5996	}
5997	printf("\n");
5998}
5999
6000static int
6001em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6002{
6003	struct em_int_delay_info *info;
6004	struct adapter *adapter;
6005	u32 regval;
6006	int error, usecs, ticks;
6007
6008	info = (struct em_int_delay_info *)arg1;
6009	usecs = info->value;
6010	error = sysctl_handle_int(oidp, &usecs, 0, req);
6011	if (error != 0 || req->newptr == NULL)
6012		return (error);
6013	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6014		return (EINVAL);
6015	info->value = usecs;
6016	ticks = EM_USECS_TO_TICKS(usecs);
6017	if (info->offset == E1000_ITR)	/* units are 256ns here */
6018		ticks *= 4;
6019
6020	adapter = info->adapter;
6021
6022	EM_CORE_LOCK(adapter);
6023	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6024	regval = (regval & ~0xffff) | (ticks & 0xffff);
6025	/* Handle a few special cases. */
6026	switch (info->offset) {
6027	case E1000_RDTR:
6028		break;
6029	case E1000_TIDV:
6030		if (ticks == 0) {
6031			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6032			/* Don't write 0 into the TIDV register. */
6033			regval++;
6034		} else
6035			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6036		break;
6037	}
6038	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6039	EM_CORE_UNLOCK(adapter);
6040	return (0);
6041}
6042
6043static void
6044em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6045	const char *description, struct em_int_delay_info *info,
6046	int offset, int value)
6047{
6048	info->adapter = adapter;
6049	info->offset = offset;
6050	info->value = value;
6051	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6052	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6053	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6054	    info, 0, em_sysctl_int_delay, "I", description);
6055}
6056
6057static void
6058em_set_sysctl_value(struct adapter *adapter, const char *name,
6059	const char *description, int *limit, int value)
6060{
6061	*limit = value;
6062	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6063	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6064	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6065}
6066
6067
6068/*
6069** Set flow control using sysctl:
6070** Flow control values:
6071**      0 - off
6072**      1 - rx pause
6073**      2 - tx pause
6074**      3 - full
6075*/
6076static int
6077em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6078{
6079        int		error;
6080	static int	input = 3; /* default is full */
6081        struct adapter	*adapter = (struct adapter *) arg1;
6082
6083        error = sysctl_handle_int(oidp, &input, 0, req);
6084
6085        if ((error) || (req->newptr == NULL))
6086                return (error);
6087
6088	if (input == adapter->fc) /* no change? */
6089		return (error);
6090
6091        switch (input) {
6092                case e1000_fc_rx_pause:
6093                case e1000_fc_tx_pause:
6094                case e1000_fc_full:
6095                case e1000_fc_none:
6096                        adapter->hw.fc.requested_mode = input;
6097			adapter->fc = input;
6098                        break;
6099                default:
6100			/* Do nothing */
6101			return (error);
6102        }
6103
6104        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6105        e1000_force_mac_fc(&adapter->hw);
6106        return (error);
6107}
6108
6109/*
6110** Manage Energy Efficient Ethernet:
6111** Control values:
6112**     0/1 - enabled/disabled
6113*/
6114static int
6115em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6116{
6117       struct adapter *adapter = (struct adapter *) arg1;
6118       int             error, value;
6119
6120       value = adapter->hw.dev_spec.ich8lan.eee_disable;
6121       error = sysctl_handle_int(oidp, &value, 0, req);
6122       if (error || req->newptr == NULL)
6123               return (error);
6124       EM_CORE_LOCK(adapter);
6125       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6126       em_init_locked(adapter);
6127       EM_CORE_UNLOCK(adapter);
6128       return (0);
6129}
6130
6131static int
6132em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6133{
6134	struct adapter *adapter;
6135	int error;
6136	int result;
6137
6138	result = -1;
6139	error = sysctl_handle_int(oidp, &result, 0, req);
6140
6141	if (error || !req->newptr)
6142		return (error);
6143
6144	if (result == 1) {
6145		adapter = (struct adapter *)arg1;
6146		em_print_debug_info(adapter);
6147        }
6148
6149	return (error);
6150}
6151
6152/*
6153** This routine is meant to be fluid, add whatever is
6154** needed for debugging a problem.  -jfv
6155*/
6156static void
6157em_print_debug_info(struct adapter *adapter)
6158{
6159	device_t dev = adapter->dev;
6160	struct tx_ring *txr = adapter->tx_rings;
6161	struct rx_ring *rxr = adapter->rx_rings;
6162
6163	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6164		printf("Interface is RUNNING ");
6165	else
6166		printf("Interface is NOT RUNNING\n");
6167
6168	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6169		printf("and INACTIVE\n");
6170	else
6171		printf("and ACTIVE\n");
6172
6173	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6174		device_printf(dev, "TX Queue %d ------\n", i);
6175		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6176	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6177	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6178		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6179		device_printf(dev, "TX descriptors avail = %d\n",
6180	    		txr->tx_avail);
6181		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6182	    		txr->no_desc_avail);
6183		device_printf(dev, "RX Queue %d ------\n", i);
6184		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6185	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6186	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6187		device_printf(dev, "RX discarded packets = %ld\n",
6188	    		rxr->rx_discarded);
6189		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6190		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6191	}
6192}
6193
6194#ifdef EM_MULTIQUEUE
6195/*
6196 * 82574 only:
6197 * Write a new value to the EEPROM increasing the number of MSIX
6198 * vectors from 3 to 5, for proper multiqueue support.
6199 */
6200static void
6201em_enable_vectors_82574(struct adapter *adapter)
6202{
6203	struct e1000_hw *hw = &adapter->hw;
6204	device_t dev = adapter->dev;
6205	u16 edata;
6206
6207	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6208	printf("Current cap: %#06x\n", edata);
6209	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6210		device_printf(dev, "Writing to eeprom: increasing "
6211		    "reported MSIX vectors from 3 to 5...\n");
6212		edata &= ~(EM_NVM_MSIX_N_MASK);
6213		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6214		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6215		e1000_update_nvm_checksum(hw);
6216		device_printf(dev, "Writing to eeprom: done\n");
6217	}
6218}
6219#endif
6220
6221#ifdef DDB
6222DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6223{
6224	devclass_t	dc;
6225	int max_em;
6226
6227	dc = devclass_find("em");
6228	max_em = devclass_get_maxunit(dc);
6229
6230	for (int index = 0; index < (max_em - 1); index++) {
6231		device_t dev;
6232		dev = devclass_get_device(dc, index);
6233		if (device_get_driver(dev) == &em_driver) {
6234			struct adapter *adapter = device_get_softc(dev);
6235			EM_CORE_LOCK(adapter);
6236			em_init_locked(adapter);
6237			EM_CORE_UNLOCK(adapter);
6238		}
6239	}
6240}
6241DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6242{
6243	devclass_t	dc;
6244	int max_em;
6245
6246	dc = devclass_find("em");
6247	max_em = devclass_get_maxunit(dc);
6248
6249	for (int index = 0; index < (max_em - 1); index++) {
6250		device_t dev;
6251		dev = devclass_get_device(dc, index);
6252		if (device_get_driver(dev) == &em_driver)
6253			em_print_debug_info(device_get_softc(dev));
6254	}
6255
6256}
6257#endif
6258