if_em.c revision 303664
1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_em.c 303664 2016-08-02 15:43:18Z sbruno $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "e1000_82571.h"
96#include "if_em.h"
97
98/*********************************************************************
99 *  Driver version:
100 *********************************************************************/
101char em_driver_version[] = "7.6.1-k";
102
103/*********************************************************************
104 *  PCI Device ID Table
105 *
106 *  Used by probe to select devices to load on
107 *  Last field stores an index into e1000_strings
108 *  Last entry must be all 0s
109 *
110 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113static em_vendor_info_t em_vendor_info_array[] =
114{
115	/* Intel(R) PRO/1000 Network Connection */
116	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182						PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184						PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195						PCI_ANY_ID, PCI_ANY_ID, 0},
196	/* required last entry */
197	{ 0, 0, 0, 0, 0}
198};
199
200/*********************************************************************
201 *  Table of branding strings for all supported NICs.
202 *********************************************************************/
203
204static char *em_strings[] = {
205	"Intel(R) PRO/1000 Network Connection"
206};
207
208/*********************************************************************
209 *  Function prototypes
210 *********************************************************************/
211static int	em_probe(device_t);
212static int	em_attach(device_t);
213static int	em_detach(device_t);
214static int	em_shutdown(device_t);
215static int	em_suspend(device_t);
216static int	em_resume(device_t);
217#ifdef EM_MULTIQUEUE
218static int	em_mq_start(if_t, struct mbuf *);
219static int	em_mq_start_locked(if_t,
220		    struct tx_ring *);
221static void	em_qflush(if_t);
222#else
223static void	em_start(if_t);
224static void	em_start_locked(if_t, struct tx_ring *);
225#endif
226static int	em_ioctl(if_t, u_long, caddr_t);
227static uint64_t	em_get_counter(if_t, ift_counter);
228static void	em_init(void *);
229static void	em_init_locked(struct adapter *);
230static void	em_stop(void *);
231static void	em_media_status(if_t, struct ifmediareq *);
232static int	em_media_change(if_t);
233static void	em_identify_hardware(struct adapter *);
234static int	em_allocate_pci_resources(struct adapter *);
235static int	em_allocate_legacy(struct adapter *);
236static int	em_allocate_msix(struct adapter *);
237static int	em_allocate_queues(struct adapter *);
238static int	em_setup_msix(struct adapter *);
239static void	em_free_pci_resources(struct adapter *);
240static void	em_local_timer(void *);
241static void	em_reset(struct adapter *);
242static int	em_setup_interface(device_t, struct adapter *);
243static void	em_flush_desc_rings(struct adapter *);
244
245static void	em_setup_transmit_structures(struct adapter *);
246static void	em_initialize_transmit_unit(struct adapter *);
247static int	em_allocate_transmit_buffers(struct tx_ring *);
248static void	em_free_transmit_structures(struct adapter *);
249static void	em_free_transmit_buffers(struct tx_ring *);
250
251static int	em_setup_receive_structures(struct adapter *);
252static int	em_allocate_receive_buffers(struct rx_ring *);
253static void	em_initialize_receive_unit(struct adapter *);
254static void	em_free_receive_structures(struct adapter *);
255static void	em_free_receive_buffers(struct rx_ring *);
256
257static void	em_enable_intr(struct adapter *);
258static void	em_disable_intr(struct adapter *);
259static void	em_update_stats_counters(struct adapter *);
260static void	em_add_hw_stats(struct adapter *adapter);
261static void	em_txeof(struct tx_ring *);
262static bool	em_rxeof(struct rx_ring *, int, int *);
263#ifndef __NO_STRICT_ALIGNMENT
264static int	em_fixup_rx(struct rx_ring *);
265#endif
266static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
267		    const struct em_rxbuffer *rxbuf);
268static void	em_receive_checksum(uint32_t status, struct mbuf *);
269static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270		    struct ip *, u32 *, u32 *);
271static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272		    struct tcphdr *, u32 *, u32 *);
273static void	em_set_promisc(struct adapter *);
274static void	em_disable_promisc(struct adapter *);
275static void	em_set_multi(struct adapter *);
276static void	em_update_link_status(struct adapter *);
277static void	em_refresh_mbufs(struct rx_ring *, int);
278static void	em_register_vlan(void *, if_t, u16);
279static void	em_unregister_vlan(void *, if_t, u16);
280static void	em_setup_vlan_hw_support(struct adapter *);
281static int	em_xmit(struct tx_ring *, struct mbuf **);
282static int	em_dma_malloc(struct adapter *, bus_size_t,
283		    struct em_dma_alloc *, int);
284static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
285static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286static void	em_print_nvm_info(struct adapter *);
287static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288static void	em_print_debug_info(struct adapter *);
289static int 	em_is_valid_ether_addr(u8 *);
290static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291static void	em_add_int_delay_sysctl(struct adapter *, const char *,
292		    const char *, struct em_int_delay_info *, int, int);
293/* Management and WOL Support */
294static void	em_init_manageability(struct adapter *);
295static void	em_release_manageability(struct adapter *);
296static void     em_get_hw_control(struct adapter *);
297static void     em_release_hw_control(struct adapter *);
298static void	em_get_wakeup(device_t);
299static void     em_enable_wakeup(device_t);
300static int	em_enable_phy_wakeup(struct adapter *);
301static void	em_led_func(void *, int);
302static void	em_disable_aspm(struct adapter *);
303
304static int	em_irq_fast(void *);
305
306/* MSIX handlers */
307static void	em_msix_tx(void *);
308static void	em_msix_rx(void *);
309static void	em_msix_link(void *);
310static void	em_handle_tx(void *context, int pending);
311static void	em_handle_rx(void *context, int pending);
312static void	em_handle_link(void *context, int pending);
313
314#ifdef EM_MULTIQUEUE
315static void	em_enable_vectors_82574(struct adapter *);
316#endif
317
318static void	em_set_sysctl_value(struct adapter *, const char *,
319		    const char *, int *, int);
320static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322
323static __inline void em_rx_discard(struct rx_ring *, int);
324
325#ifdef DEVICE_POLLING
326static poll_handler_t em_poll;
327#endif /* POLLING */
328
329/*********************************************************************
330 *  FreeBSD Device Interface Entry Points
331 *********************************************************************/
332
333static device_method_t em_methods[] = {
334	/* Device interface */
335	DEVMETHOD(device_probe, em_probe),
336	DEVMETHOD(device_attach, em_attach),
337	DEVMETHOD(device_detach, em_detach),
338	DEVMETHOD(device_shutdown, em_shutdown),
339	DEVMETHOD(device_suspend, em_suspend),
340	DEVMETHOD(device_resume, em_resume),
341	DEVMETHOD_END
342};
343
344static driver_t em_driver = {
345	"em", em_methods, sizeof(struct adapter),
346};
347
348devclass_t em_devclass;
349DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350MODULE_DEPEND(em, pci, 1, 1, 1);
351MODULE_DEPEND(em, ether, 1, 1, 1);
352#ifdef DEV_NETMAP
353MODULE_DEPEND(em, netmap, 1, 1, 1);
354#endif /* DEV_NETMAP */
355
356/*********************************************************************
357 *  Tunable default values.
358 *********************************************************************/
359
360#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
361#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
362#define M_TSO_LEN			66
363
364#define MAX_INTS_PER_SEC	8000
365#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
366
367/* Allow common code without TSO */
368#ifndef CSUM_TSO
369#define CSUM_TSO	0
370#endif
371
372#define TSO_WORKAROUND	4
373
374static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375
376static int em_disable_crc_stripping = 0;
377SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378    &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379
380static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383    0, "Default transmit interrupt delay in usecs");
384SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385    0, "Default receive interrupt delay in usecs");
386
387static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390    &em_tx_abs_int_delay_dflt, 0,
391    "Default transmit interrupt delay limit in usecs");
392SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393    &em_rx_abs_int_delay_dflt, 0,
394    "Default receive interrupt delay limit in usecs");
395
396static int em_rxd = EM_DEFAULT_RXD;
397static int em_txd = EM_DEFAULT_TXD;
398SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399    "Number of receive descriptors per queue");
400SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401    "Number of transmit descriptors per queue");
402
403static int em_smart_pwr_down = FALSE;
404SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405    0, "Set to true to leave smart power down enabled on newer adapters");
406
407/* Controls whether promiscuous also shows bad packets */
408static int em_debug_sbp = FALSE;
409SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410    "Show bad packets in promiscuous mode");
411
412static int em_enable_msix = TRUE;
413SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414    "Enable MSI-X interrupts");
415
416#ifdef EM_MULTIQUEUE
417static int em_num_queues = 1;
418SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419    "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420#endif
421
422/*
423** Global variable to store last used CPU when binding queues
424** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
425** queue is bound to a cpu.
426*/
427static int em_last_bind_cpu = -1;
428
429/* How many packets rxeof tries to clean at a time */
430static int em_rx_process_limit = 100;
431SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432    &em_rx_process_limit, 0,
433    "Maximum number of received packets to process "
434    "at a time, -1 means unlimited");
435
436/* Energy efficient ethernet - default to OFF */
437static int eee_setting = 1;
438SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439    "Enable Energy Efficient Ethernet");
440
441/* Global used in WOL setup with multiport cards */
442static int global_quad_port_a = 0;
443
444#ifdef DEV_NETMAP	/* see ixgbe.c for details */
445#include <dev/netmap/if_em_netmap.h>
446#endif /* DEV_NETMAP */
447
448/*********************************************************************
449 *  Device identification routine
450 *
451 *  em_probe determines if the driver should be loaded on
452 *  adapter based on PCI vendor/device id of the adapter.
453 *
454 *  return BUS_PROBE_DEFAULT on success, positive on failure
455 *********************************************************************/
456
457static int
458em_probe(device_t dev)
459{
460	char		adapter_name[60];
461	uint16_t	pci_vendor_id = 0;
462	uint16_t	pci_device_id = 0;
463	uint16_t	pci_subvendor_id = 0;
464	uint16_t	pci_subdevice_id = 0;
465	em_vendor_info_t *ent;
466
467	INIT_DEBUGOUT("em_probe: begin");
468
469	pci_vendor_id = pci_get_vendor(dev);
470	if (pci_vendor_id != EM_VENDOR_ID)
471		return (ENXIO);
472
473	pci_device_id = pci_get_device(dev);
474	pci_subvendor_id = pci_get_subvendor(dev);
475	pci_subdevice_id = pci_get_subdevice(dev);
476
477	ent = em_vendor_info_array;
478	while (ent->vendor_id != 0) {
479		if ((pci_vendor_id == ent->vendor_id) &&
480		    (pci_device_id == ent->device_id) &&
481
482		    ((pci_subvendor_id == ent->subvendor_id) ||
483		    (ent->subvendor_id == PCI_ANY_ID)) &&
484
485		    ((pci_subdevice_id == ent->subdevice_id) ||
486		    (ent->subdevice_id == PCI_ANY_ID))) {
487			sprintf(adapter_name, "%s %s",
488				em_strings[ent->index],
489				em_driver_version);
490			device_set_desc_copy(dev, adapter_name);
491			return (BUS_PROBE_DEFAULT);
492		}
493		ent++;
494	}
495
496	return (ENXIO);
497}
498
499/*********************************************************************
500 *  Device initialization routine
501 *
502 *  The attach entry point is called when the driver is being loaded.
503 *  This routine identifies the type of hardware, allocates all resources
504 *  and initializes the hardware.
505 *
506 *  return 0 on success, positive on failure
507 *********************************************************************/
508
509static int
510em_attach(device_t dev)
511{
512	struct adapter	*adapter;
513	struct e1000_hw	*hw;
514	int		error = 0;
515
516	INIT_DEBUGOUT("em_attach: begin");
517
518	if (resource_disabled("em", device_get_unit(dev))) {
519		device_printf(dev, "Disabled by device hint\n");
520		return (ENXIO);
521	}
522
523	adapter = device_get_softc(dev);
524	adapter->dev = adapter->osdep.dev = dev;
525	hw = &adapter->hw;
526	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527
528	/* SYSCTL stuff */
529	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532	    em_sysctl_nvm_info, "I", "NVM Information");
533
534	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537	    em_sysctl_debug_info, "I", "Debug Information");
538
539	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542	    em_set_flowcntl, "I", "Flow Control");
543
544	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545
546	/* Determine hardware and mac info */
547	em_identify_hardware(adapter);
548
549	/* Setup PCI resources */
550	if (em_allocate_pci_resources(adapter)) {
551		device_printf(dev, "Allocation of PCI resources failed\n");
552		error = ENXIO;
553		goto err_pci;
554	}
555
556	/*
557	** For ICH8 and family we need to
558	** map the flash memory, and this
559	** must happen after the MAC is
560	** identified
561	*/
562	if ((hw->mac.type == e1000_ich8lan) ||
563	    (hw->mac.type == e1000_ich9lan) ||
564	    (hw->mac.type == e1000_ich10lan) ||
565	    (hw->mac.type == e1000_pchlan) ||
566	    (hw->mac.type == e1000_pch2lan) ||
567	    (hw->mac.type == e1000_pch_lpt)) {
568		int rid = EM_BAR_TYPE_FLASH;
569		adapter->flash = bus_alloc_resource_any(dev,
570		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
571		if (adapter->flash == NULL) {
572			device_printf(dev, "Mapping of Flash failed\n");
573			error = ENXIO;
574			goto err_pci;
575		}
576		/* This is used in the shared code */
577		hw->flash_address = (u8 *)adapter->flash;
578		adapter->osdep.flash_bus_space_tag =
579		    rman_get_bustag(adapter->flash);
580		adapter->osdep.flash_bus_space_handle =
581		    rman_get_bushandle(adapter->flash);
582	}
583	/*
584	** In the new SPT device flash is not  a
585	** separate BAR, rather it is also in BAR0,
586	** so use the same tag and an offset handle for the
587	** FLASH read/write macros in the shared code.
588	*/
589	else if (hw->mac.type == e1000_pch_spt) {
590		adapter->osdep.flash_bus_space_tag =
591		    adapter->osdep.mem_bus_space_tag;
592		adapter->osdep.flash_bus_space_handle =
593		    adapter->osdep.mem_bus_space_handle
594		    + E1000_FLASH_BASE_ADDR;
595	}
596
597	/* Do Shared Code initialization */
598	error = e1000_setup_init_funcs(hw, TRUE);
599	if (error) {
600		device_printf(dev, "Setup of Shared code failed, error %d\n",
601		    error);
602		error = ENXIO;
603		goto err_pci;
604	}
605
606	/*
607	 * Setup MSI/X or MSI if PCI Express
608	 */
609	adapter->msix = em_setup_msix(adapter);
610
611	e1000_get_bus_info(hw);
612
613	/* Set up some sysctls for the tunable interrupt delays */
614	em_add_int_delay_sysctl(adapter, "rx_int_delay",
615	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
616	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617	em_add_int_delay_sysctl(adapter, "tx_int_delay",
618	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621	    "receive interrupt delay limit in usecs",
622	    &adapter->rx_abs_int_delay,
623	    E1000_REGISTER(hw, E1000_RADV),
624	    em_rx_abs_int_delay_dflt);
625	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626	    "transmit interrupt delay limit in usecs",
627	    &adapter->tx_abs_int_delay,
628	    E1000_REGISTER(hw, E1000_TADV),
629	    em_tx_abs_int_delay_dflt);
630	em_add_int_delay_sysctl(adapter, "itr",
631	    "interrupt delay limit in usecs/4",
632	    &adapter->tx_itr,
633	    E1000_REGISTER(hw, E1000_ITR),
634	    DEFAULT_ITR);
635
636	/* Sysctl for limiting the amount of work done in the taskqueue */
637	em_set_sysctl_value(adapter, "rx_processing_limit",
638	    "max number of rx packets to process", &adapter->rx_process_limit,
639	    em_rx_process_limit);
640
641	/*
642	 * Validate number of transmit and receive descriptors. It
643	 * must not exceed hardware maximum, and must be multiple
644	 * of E1000_DBA_ALIGN.
645	 */
646	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649		    EM_DEFAULT_TXD, em_txd);
650		adapter->num_tx_desc = EM_DEFAULT_TXD;
651	} else
652		adapter->num_tx_desc = em_txd;
653
654	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657		    EM_DEFAULT_RXD, em_rxd);
658		adapter->num_rx_desc = EM_DEFAULT_RXD;
659	} else
660		adapter->num_rx_desc = em_rxd;
661
662	hw->mac.autoneg = DO_AUTO_NEG;
663	hw->phy.autoneg_wait_to_complete = FALSE;
664	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665
666	/* Copper options */
667	if (hw->phy.media_type == e1000_media_type_copper) {
668		hw->phy.mdix = AUTO_ALL_MODES;
669		hw->phy.disable_polarity_correction = FALSE;
670		hw->phy.ms_type = EM_MASTER_SLAVE;
671	}
672
673	/*
674	 * Set the frame limits assuming
675	 * standard ethernet sized frames.
676	 */
677	adapter->hw.mac.max_frame_size =
678	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679
680	/*
681	 * This controls when hardware reports transmit completion
682	 * status.
683	 */
684	hw->mac.report_tx_early = 1;
685
686	/*
687	** Get queue/ring memory
688	*/
689	if (em_allocate_queues(adapter)) {
690		error = ENOMEM;
691		goto err_pci;
692	}
693
694	/* Allocate multicast array memory. */
695	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697	if (adapter->mta == NULL) {
698		device_printf(dev, "Can not allocate multicast setup array\n");
699		error = ENOMEM;
700		goto err_late;
701	}
702
703	/* Check SOL/IDER usage */
704	if (e1000_check_reset_block(hw))
705		device_printf(dev, "PHY reset is blocked"
706		    " due to SOL/IDER session.\n");
707
708	/* Sysctl for setting Energy Efficient Ethernet */
709	hw->dev_spec.ich8lan.eee_disable = eee_setting;
710	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713	    adapter, 0, em_sysctl_eee, "I",
714	    "Disable Energy Efficient Ethernet");
715
716	/*
717	** Start from a known state, this is
718	** important in reading the nvm and
719	** mac from that.
720	*/
721	e1000_reset_hw(hw);
722
723
724	/* Make sure we have a good EEPROM before we read from it */
725	if (e1000_validate_nvm_checksum(hw) < 0) {
726		/*
727		** Some PCI-E parts fail the first check due to
728		** the link being in sleep state, call it again,
729		** if it fails a second time its a real issue.
730		*/
731		if (e1000_validate_nvm_checksum(hw) < 0) {
732			device_printf(dev,
733			    "The EEPROM Checksum Is Not Valid\n");
734			error = EIO;
735			goto err_late;
736		}
737	}
738
739	/* Copy the permanent MAC address out of the EEPROM */
740	if (e1000_read_mac_addr(hw) < 0) {
741		device_printf(dev, "EEPROM read error while reading MAC"
742		    " address\n");
743		error = EIO;
744		goto err_late;
745	}
746
747	if (!em_is_valid_ether_addr(hw->mac.addr)) {
748		device_printf(dev, "Invalid MAC address\n");
749		error = EIO;
750		goto err_late;
751	}
752
753	/* Disable ULP support */
754	e1000_disable_ulp_lpt_lp(hw, TRUE);
755
756	/*
757	**  Do interrupt configuration
758	*/
759	if (adapter->msix > 1) /* Do MSIX */
760		error = em_allocate_msix(adapter);
761	else  /* MSI or Legacy */
762		error = em_allocate_legacy(adapter);
763	if (error)
764		goto err_late;
765
766	/*
767	 * Get Wake-on-Lan and Management info for later use
768	 */
769	em_get_wakeup(dev);
770
771	/* Setup OS specific network interface */
772	if (em_setup_interface(dev, adapter) != 0)
773		goto err_late;
774
775	em_reset(adapter);
776
777	/* Initialize statistics */
778	em_update_stats_counters(adapter);
779
780	hw->mac.get_link_status = 1;
781	em_update_link_status(adapter);
782
783	/* Register for VLAN events */
784	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788
789	em_add_hw_stats(adapter);
790
791	/* Non-AMT based hardware can now take control from firmware */
792	if (adapter->has_manage && !adapter->has_amt)
793		em_get_hw_control(adapter);
794
795	/* Tell the stack that the interface is not active */
796	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797
798	adapter->led_dev = led_create(em_led_func, adapter,
799	    device_get_nameunit(dev));
800#ifdef DEV_NETMAP
801	em_netmap_attach(adapter);
802#endif /* DEV_NETMAP */
803
804	INIT_DEBUGOUT("em_attach: end");
805
806	return (0);
807
808err_late:
809	em_free_transmit_structures(adapter);
810	em_free_receive_structures(adapter);
811	em_release_hw_control(adapter);
812	if (adapter->ifp != (void *)NULL)
813		if_free(adapter->ifp);
814err_pci:
815	em_free_pci_resources(adapter);
816	free(adapter->mta, M_DEVBUF);
817	EM_CORE_LOCK_DESTROY(adapter);
818
819	return (error);
820}
821
822/*********************************************************************
823 *  Device removal routine
824 *
825 *  The detach entry point is called when the driver is being removed.
826 *  This routine stops the adapter and deallocates all the resources
827 *  that were allocated for driver operation.
828 *
829 *  return 0 on success, positive on failure
830 *********************************************************************/
831
832static int
833em_detach(device_t dev)
834{
835	struct adapter	*adapter = device_get_softc(dev);
836	if_t ifp = adapter->ifp;
837
838	INIT_DEBUGOUT("em_detach: begin");
839
840	/* Make sure VLANS are not using driver */
841	if (if_vlantrunkinuse(ifp)) {
842		device_printf(dev,"Vlan in use, detach first\n");
843		return (EBUSY);
844	}
845
846#ifdef DEVICE_POLLING
847	if (if_getcapenable(ifp) & IFCAP_POLLING)
848		ether_poll_deregister(ifp);
849#endif
850
851	if (adapter->led_dev != NULL)
852		led_destroy(adapter->led_dev);
853
854	EM_CORE_LOCK(adapter);
855	adapter->in_detach = 1;
856	em_stop(adapter);
857	EM_CORE_UNLOCK(adapter);
858	EM_CORE_LOCK_DESTROY(adapter);
859
860	e1000_phy_hw_reset(&adapter->hw);
861
862	em_release_manageability(adapter);
863	em_release_hw_control(adapter);
864
865	/* Unregister VLAN events */
866	if (adapter->vlan_attach != NULL)
867		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868	if (adapter->vlan_detach != NULL)
869		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870
871	ether_ifdetach(adapter->ifp);
872	callout_drain(&adapter->timer);
873
874#ifdef DEV_NETMAP
875	netmap_detach(ifp);
876#endif /* DEV_NETMAP */
877
878	em_free_pci_resources(adapter);
879	bus_generic_detach(dev);
880	if_free(ifp);
881
882	em_free_transmit_structures(adapter);
883	em_free_receive_structures(adapter);
884
885	em_release_hw_control(adapter);
886	free(adapter->mta, M_DEVBUF);
887
888	return (0);
889}
890
891/*********************************************************************
892 *
893 *  Shutdown entry point
894 *
895 **********************************************************************/
896
897static int
898em_shutdown(device_t dev)
899{
900	return em_suspend(dev);
901}
902
903/*
904 * Suspend/resume device methods.
905 */
906static int
907em_suspend(device_t dev)
908{
909	struct adapter *adapter = device_get_softc(dev);
910
911	EM_CORE_LOCK(adapter);
912
913        em_release_manageability(adapter);
914	em_release_hw_control(adapter);
915	em_enable_wakeup(dev);
916
917	EM_CORE_UNLOCK(adapter);
918
919	return bus_generic_suspend(dev);
920}
921
922static int
923em_resume(device_t dev)
924{
925	struct adapter *adapter = device_get_softc(dev);
926	struct tx_ring	*txr = adapter->tx_rings;
927	if_t ifp = adapter->ifp;
928
929	EM_CORE_LOCK(adapter);
930	if (adapter->hw.mac.type == e1000_pch2lan)
931		e1000_resume_workarounds_pchlan(&adapter->hw);
932	em_init_locked(adapter);
933	em_init_manageability(adapter);
934
935	if ((if_getflags(ifp) & IFF_UP) &&
936	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937		for (int i = 0; i < adapter->num_queues; i++, txr++) {
938			EM_TX_LOCK(txr);
939#ifdef EM_MULTIQUEUE
940			if (!drbr_empty(ifp, txr->br))
941				em_mq_start_locked(ifp, txr);
942#else
943			if (!if_sendq_empty(ifp))
944				em_start_locked(ifp, txr);
945#endif
946			EM_TX_UNLOCK(txr);
947		}
948	}
949	EM_CORE_UNLOCK(adapter);
950
951	return bus_generic_resume(dev);
952}
953
954
955#ifndef EM_MULTIQUEUE
956static void
957em_start_locked(if_t ifp, struct tx_ring *txr)
958{
959	struct adapter	*adapter = if_getsoftc(ifp);
960	struct mbuf	*m_head;
961
962	EM_TX_LOCK_ASSERT(txr);
963
964	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965	    IFF_DRV_RUNNING)
966		return;
967
968	if (!adapter->link_active)
969		return;
970
971	while (!if_sendq_empty(ifp)) {
972        	/* Call cleanup if number of TX descriptors low */
973		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974			em_txeof(txr);
975		if (txr->tx_avail < EM_MAX_SCATTER) {
976			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977			break;
978		}
979		m_head = if_dequeue(ifp);
980		if (m_head == NULL)
981			break;
982		/*
983		 *  Encapsulation can modify our pointer, and or make it
984		 *  NULL on failure.  In that event, we can't requeue.
985		 */
986		if (em_xmit(txr, &m_head)) {
987			if (m_head == NULL)
988				break;
989			if_sendq_prepend(ifp, m_head);
990			break;
991		}
992
993		/* Mark the queue as having work */
994		if (txr->busy == EM_TX_IDLE)
995			txr->busy = EM_TX_BUSY;
996
997		/* Send a copy of the frame to the BPF listener */
998		ETHER_BPF_MTAP(ifp, m_head);
999
1000	}
1001
1002	return;
1003}
1004
1005static void
1006em_start(if_t ifp)
1007{
1008	struct adapter	*adapter = if_getsoftc(ifp);
1009	struct tx_ring	*txr = adapter->tx_rings;
1010
1011	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012		EM_TX_LOCK(txr);
1013		em_start_locked(ifp, txr);
1014		EM_TX_UNLOCK(txr);
1015	}
1016	return;
1017}
1018#else /* EM_MULTIQUEUE */
1019/*********************************************************************
1020 *  Multiqueue Transmit routines
1021 *
1022 *  em_mq_start is called by the stack to initiate a transmit.
1023 *  however, if busy the driver can queue the request rather
1024 *  than do an immediate send. It is this that is an advantage
1025 *  in this driver, rather than also having multiple tx queues.
1026 **********************************************************************/
1027/*
1028** Multiqueue capable stack interface
1029*/
1030static int
1031em_mq_start(if_t ifp, struct mbuf *m)
1032{
1033	struct adapter	*adapter = if_getsoftc(ifp);
1034	struct tx_ring	*txr = adapter->tx_rings;
1035	unsigned int	i, error;
1036
1037	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038		i = m->m_pkthdr.flowid % adapter->num_queues;
1039	else
1040		i = curcpu % adapter->num_queues;
1041
1042	txr = &adapter->tx_rings[i];
1043
1044	error = drbr_enqueue(ifp, txr->br, m);
1045	if (error)
1046		return (error);
1047
1048	if (EM_TX_TRYLOCK(txr)) {
1049		em_mq_start_locked(ifp, txr);
1050		EM_TX_UNLOCK(txr);
1051	} else
1052		taskqueue_enqueue(txr->tq, &txr->tx_task);
1053
1054	return (0);
1055}
1056
1057static int
1058em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059{
1060	struct adapter  *adapter = txr->adapter;
1061        struct mbuf     *next;
1062        int             err = 0, enq = 0;
1063
1064	EM_TX_LOCK_ASSERT(txr);
1065
1066	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067	    adapter->link_active == 0) {
1068		return (ENETDOWN);
1069	}
1070
1071	/* Process the queue */
1072	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073		if ((err = em_xmit(txr, &next)) != 0) {
1074			if (next == NULL) {
1075				/* It was freed, move forward */
1076				drbr_advance(ifp, txr->br);
1077			} else {
1078				/*
1079				 * Still have one left, it may not be
1080				 * the same since the transmit function
1081				 * may have changed it.
1082				 */
1083				drbr_putback(ifp, txr->br, next);
1084			}
1085			break;
1086		}
1087		drbr_advance(ifp, txr->br);
1088		enq++;
1089		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090		if (next->m_flags & M_MCAST)
1091			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092		ETHER_BPF_MTAP(ifp, next);
1093		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094                        break;
1095	}
1096
1097	/* Mark the queue as having work */
1098	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099		txr->busy = EM_TX_BUSY;
1100
1101	if (txr->tx_avail < EM_MAX_SCATTER)
1102		em_txeof(txr);
1103	if (txr->tx_avail < EM_MAX_SCATTER) {
1104		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105	}
1106	return (err);
1107}
1108
1109/*
1110** Flush all ring buffers
1111*/
1112static void
1113em_qflush(if_t ifp)
1114{
1115	struct adapter  *adapter = if_getsoftc(ifp);
1116	struct tx_ring  *txr = adapter->tx_rings;
1117	struct mbuf     *m;
1118
1119	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120		EM_TX_LOCK(txr);
1121		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122			m_freem(m);
1123		EM_TX_UNLOCK(txr);
1124	}
1125	if_qflush(ifp);
1126}
1127#endif /* EM_MULTIQUEUE */
1128
1129/*********************************************************************
1130 *  Ioctl entry point
1131 *
1132 *  em_ioctl is called when the user wants to configure the
1133 *  interface.
1134 *
1135 *  return 0 on success, positive on failure
1136 **********************************************************************/
1137
1138static int
1139em_ioctl(if_t ifp, u_long command, caddr_t data)
1140{
1141	struct adapter	*adapter = if_getsoftc(ifp);
1142	struct ifreq	*ifr = (struct ifreq *)data;
1143#if defined(INET) || defined(INET6)
1144	struct ifaddr	*ifa = (struct ifaddr *)data;
1145#endif
1146	bool		avoid_reset = FALSE;
1147	int		error = 0;
1148
1149	if (adapter->in_detach)
1150		return (error);
1151
1152	switch (command) {
1153	case SIOCSIFADDR:
1154#ifdef INET
1155		if (ifa->ifa_addr->sa_family == AF_INET)
1156			avoid_reset = TRUE;
1157#endif
1158#ifdef INET6
1159		if (ifa->ifa_addr->sa_family == AF_INET6)
1160			avoid_reset = TRUE;
1161#endif
1162		/*
1163		** Calling init results in link renegotiation,
1164		** so we avoid doing it when possible.
1165		*/
1166		if (avoid_reset) {
1167			if_setflagbits(ifp,IFF_UP,0);
1168			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169				em_init(adapter);
1170#ifdef INET
1171			if (!(if_getflags(ifp) & IFF_NOARP))
1172				arp_ifinit(ifp, ifa);
1173#endif
1174		} else
1175			error = ether_ioctl(ifp, command, data);
1176		break;
1177	case SIOCSIFMTU:
1178	    {
1179		int max_frame_size;
1180
1181		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182
1183		EM_CORE_LOCK(adapter);
1184		switch (adapter->hw.mac.type) {
1185		case e1000_82571:
1186		case e1000_82572:
1187		case e1000_ich9lan:
1188		case e1000_ich10lan:
1189		case e1000_pch2lan:
1190		case e1000_pch_lpt:
1191		case e1000_pch_spt:
1192		case e1000_82574:
1193		case e1000_82583:
1194		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1195			max_frame_size = 9234;
1196			break;
1197		case e1000_pchlan:
1198			max_frame_size = 4096;
1199			break;
1200			/* Adapters that do not support jumbo frames */
1201		case e1000_ich8lan:
1202			max_frame_size = ETHER_MAX_LEN;
1203			break;
1204		default:
1205			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206		}
1207		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208		    ETHER_CRC_LEN) {
1209			EM_CORE_UNLOCK(adapter);
1210			error = EINVAL;
1211			break;
1212		}
1213
1214		if_setmtu(ifp, ifr->ifr_mtu);
1215		adapter->hw.mac.max_frame_size =
1216		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1218			em_init_locked(adapter);
1219		EM_CORE_UNLOCK(adapter);
1220		break;
1221	    }
1222	case SIOCSIFFLAGS:
1223		IOCTL_DEBUGOUT("ioctl rcv'd:\
1224		    SIOCSIFFLAGS (Set Interface Flags)");
1225		EM_CORE_LOCK(adapter);
1226		if (if_getflags(ifp) & IFF_UP) {
1227			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1228				if ((if_getflags(ifp) ^ adapter->if_flags) &
1229				    (IFF_PROMISC | IFF_ALLMULTI)) {
1230					em_disable_promisc(adapter);
1231					em_set_promisc(adapter);
1232				}
1233			} else
1234				em_init_locked(adapter);
1235		} else
1236			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1237				em_stop(adapter);
1238		adapter->if_flags = if_getflags(ifp);
1239		EM_CORE_UNLOCK(adapter);
1240		break;
1241	case SIOCADDMULTI:
1242	case SIOCDELMULTI:
1243		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1244		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1245			EM_CORE_LOCK(adapter);
1246			em_disable_intr(adapter);
1247			em_set_multi(adapter);
1248#ifdef DEVICE_POLLING
1249			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1250#endif
1251				em_enable_intr(adapter);
1252			EM_CORE_UNLOCK(adapter);
1253		}
1254		break;
1255	case SIOCSIFMEDIA:
1256		/* Check SOL/IDER usage */
1257		EM_CORE_LOCK(adapter);
1258		if (e1000_check_reset_block(&adapter->hw)) {
1259			EM_CORE_UNLOCK(adapter);
1260			device_printf(adapter->dev, "Media change is"
1261			    " blocked due to SOL/IDER session.\n");
1262			break;
1263		}
1264		EM_CORE_UNLOCK(adapter);
1265		/* falls thru */
1266	case SIOCGIFMEDIA:
1267		IOCTL_DEBUGOUT("ioctl rcv'd: \
1268		    SIOCxIFMEDIA (Get/Set Interface Media)");
1269		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1270		break;
1271	case SIOCSIFCAP:
1272	    {
1273		int mask, reinit;
1274
1275		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1276		reinit = 0;
1277		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1278#ifdef DEVICE_POLLING
1279		if (mask & IFCAP_POLLING) {
1280			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1281				error = ether_poll_register(em_poll, ifp);
1282				if (error)
1283					return (error);
1284				EM_CORE_LOCK(adapter);
1285				em_disable_intr(adapter);
1286				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1287				EM_CORE_UNLOCK(adapter);
1288			} else {
1289				error = ether_poll_deregister(ifp);
1290				/* Enable interrupt even in error case */
1291				EM_CORE_LOCK(adapter);
1292				em_enable_intr(adapter);
1293				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1294				EM_CORE_UNLOCK(adapter);
1295			}
1296		}
1297#endif
1298		if (mask & IFCAP_HWCSUM) {
1299			if_togglecapenable(ifp,IFCAP_HWCSUM);
1300			reinit = 1;
1301		}
1302		if (mask & IFCAP_TSO4) {
1303			if_togglecapenable(ifp,IFCAP_TSO4);
1304			reinit = 1;
1305		}
1306		if (mask & IFCAP_VLAN_HWTAGGING) {
1307			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1308			reinit = 1;
1309		}
1310		if (mask & IFCAP_VLAN_HWFILTER) {
1311			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1312			reinit = 1;
1313		}
1314		if (mask & IFCAP_VLAN_HWTSO) {
1315			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1316			reinit = 1;
1317		}
1318		if ((mask & IFCAP_WOL) &&
1319		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1320			if (mask & IFCAP_WOL_MCAST)
1321				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1322			if (mask & IFCAP_WOL_MAGIC)
1323				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1324		}
1325		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1326			em_init(adapter);
1327		if_vlancap(ifp);
1328		break;
1329	    }
1330
1331	default:
1332		error = ether_ioctl(ifp, command, data);
1333		break;
1334	}
1335
1336	return (error);
1337}
1338
1339
1340/*********************************************************************
1341 *  Init entry point
1342 *
1343 *  This routine is used in two ways. It is used by the stack as
1344 *  init entry point in network interface structure. It is also used
1345 *  by the driver as a hw/sw initialization routine to get to a
1346 *  consistent state.
1347 *
1348 *  return 0 on success, positive on failure
1349 **********************************************************************/
1350
1351static void
1352em_init_locked(struct adapter *adapter)
1353{
1354	if_t ifp = adapter->ifp;
1355	device_t	dev = adapter->dev;
1356
1357	INIT_DEBUGOUT("em_init: begin");
1358
1359	EM_CORE_LOCK_ASSERT(adapter);
1360
1361	em_disable_intr(adapter);
1362	callout_stop(&adapter->timer);
1363
1364	/* Get the latest mac address, User can use a LAA */
1365        bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1366              ETHER_ADDR_LEN);
1367
1368	/* Put the address into the Receive Address Array */
1369	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1370
1371	/*
1372	 * With the 82571 adapter, RAR[0] may be overwritten
1373	 * when the other port is reset, we make a duplicate
1374	 * in RAR[14] for that eventuality, this assures
1375	 * the interface continues to function.
1376	 */
1377	if (adapter->hw.mac.type == e1000_82571) {
1378		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1379		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1380		    E1000_RAR_ENTRIES - 1);
1381	}
1382
1383	/* Initialize the hardware */
1384	em_reset(adapter);
1385	em_update_link_status(adapter);
1386
1387	/* Setup VLAN support, basic and offload if available */
1388	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1389
1390	/* Set hardware offload abilities */
1391	if_clearhwassist(ifp);
1392	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1393		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1394	/*
1395	** There have proven to be problems with TSO when not
1396	** at full gigabit speed, so disable the assist automatically
1397	** when at lower speeds.  -jfv
1398	*/
1399	if (if_getcapenable(ifp) & IFCAP_TSO4) {
1400		if (adapter->link_speed == SPEED_1000)
1401			if_sethwassistbits(ifp, CSUM_TSO, 0);
1402	}
1403
1404	/* Configure for OS presence */
1405	em_init_manageability(adapter);
1406
1407	/* Prepare transmit descriptors and buffers */
1408	em_setup_transmit_structures(adapter);
1409	em_initialize_transmit_unit(adapter);
1410
1411	/* Setup Multicast table */
1412	em_set_multi(adapter);
1413
1414	/*
1415	** Figure out the desired mbuf
1416	** pool for doing jumbos
1417	*/
1418	if (adapter->hw.mac.max_frame_size <= 2048)
1419		adapter->rx_mbuf_sz = MCLBYTES;
1420	else if (adapter->hw.mac.max_frame_size <= 4096)
1421		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1422	else
1423		adapter->rx_mbuf_sz = MJUM9BYTES;
1424
1425	/* Prepare receive descriptors and buffers */
1426	if (em_setup_receive_structures(adapter)) {
1427		device_printf(dev, "Could not setup receive structures\n");
1428		em_stop(adapter);
1429		return;
1430	}
1431	em_initialize_receive_unit(adapter);
1432
1433	/* Use real VLAN Filter support? */
1434	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1435		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1436			/* Use real VLAN Filter support */
1437			em_setup_vlan_hw_support(adapter);
1438		else {
1439			u32 ctrl;
1440			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1441			ctrl |= E1000_CTRL_VME;
1442			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1443		}
1444	}
1445
1446	/* Don't lose promiscuous settings */
1447	em_set_promisc(adapter);
1448
1449	/* Set the interface as ACTIVE */
1450	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1451
1452	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1453	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1454
1455	/* MSI/X configuration for 82574 */
1456	if (adapter->hw.mac.type == e1000_82574) {
1457		int tmp;
1458		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1459		tmp |= E1000_CTRL_EXT_PBA_CLR;
1460		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1461		/* Set the IVAR - interrupt vector routing. */
1462		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1463	}
1464
1465#ifdef DEVICE_POLLING
1466	/*
1467	 * Only enable interrupts if we are not polling, make sure
1468	 * they are off otherwise.
1469	 */
1470	if (if_getcapenable(ifp) & IFCAP_POLLING)
1471		em_disable_intr(adapter);
1472	else
1473#endif /* DEVICE_POLLING */
1474		em_enable_intr(adapter);
1475
1476	/* AMT based hardware can now take control from firmware */
1477	if (adapter->has_manage && adapter->has_amt)
1478		em_get_hw_control(adapter);
1479}
1480
1481static void
1482em_init(void *arg)
1483{
1484	struct adapter *adapter = arg;
1485
1486	EM_CORE_LOCK(adapter);
1487	em_init_locked(adapter);
1488	EM_CORE_UNLOCK(adapter);
1489}
1490
1491
1492#ifdef DEVICE_POLLING
1493/*********************************************************************
1494 *
1495 *  Legacy polling routine: note this only works with single queue
1496 *
1497 *********************************************************************/
1498static int
1499em_poll(if_t ifp, enum poll_cmd cmd, int count)
1500{
1501	struct adapter *adapter = if_getsoftc(ifp);
1502	struct tx_ring	*txr = adapter->tx_rings;
1503	struct rx_ring	*rxr = adapter->rx_rings;
1504	u32		reg_icr;
1505	int		rx_done;
1506
1507	EM_CORE_LOCK(adapter);
1508	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1509		EM_CORE_UNLOCK(adapter);
1510		return (0);
1511	}
1512
1513	if (cmd == POLL_AND_CHECK_STATUS) {
1514		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1515		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1516			callout_stop(&adapter->timer);
1517			adapter->hw.mac.get_link_status = 1;
1518			em_update_link_status(adapter);
1519			callout_reset(&adapter->timer, hz,
1520			    em_local_timer, adapter);
1521		}
1522	}
1523	EM_CORE_UNLOCK(adapter);
1524
1525	em_rxeof(rxr, count, &rx_done);
1526
1527	EM_TX_LOCK(txr);
1528	em_txeof(txr);
1529#ifdef EM_MULTIQUEUE
1530	if (!drbr_empty(ifp, txr->br))
1531		em_mq_start_locked(ifp, txr);
1532#else
1533	if (!if_sendq_empty(ifp))
1534		em_start_locked(ifp, txr);
1535#endif
1536	EM_TX_UNLOCK(txr);
1537
1538	return (rx_done);
1539}
1540#endif /* DEVICE_POLLING */
1541
1542
1543/*********************************************************************
1544 *
1545 *  Fast Legacy/MSI Combined Interrupt Service routine
1546 *
1547 *********************************************************************/
1548static int
1549em_irq_fast(void *arg)
1550{
1551	struct adapter	*adapter = arg;
1552	if_t ifp;
1553	u32		reg_icr;
1554
1555	ifp = adapter->ifp;
1556
1557	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1558
1559	/* Hot eject?  */
1560	if (reg_icr == 0xffffffff)
1561		return FILTER_STRAY;
1562
1563	/* Definitely not our interrupt.  */
1564	if (reg_icr == 0x0)
1565		return FILTER_STRAY;
1566
1567	/*
1568	 * Starting with the 82571 chip, bit 31 should be used to
1569	 * determine whether the interrupt belongs to us.
1570	 */
1571	if (adapter->hw.mac.type >= e1000_82571 &&
1572	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1573		return FILTER_STRAY;
1574
1575	em_disable_intr(adapter);
1576	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1577
1578	/* Link status change */
1579	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1580		adapter->hw.mac.get_link_status = 1;
1581		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1582	}
1583
1584	if (reg_icr & E1000_ICR_RXO)
1585		adapter->rx_overruns++;
1586	return FILTER_HANDLED;
1587}
1588
1589/* Combined RX/TX handler, used by Legacy and MSI */
1590static void
1591em_handle_que(void *context, int pending)
1592{
1593	struct adapter	*adapter = context;
1594	if_t ifp = adapter->ifp;
1595	struct tx_ring	*txr = adapter->tx_rings;
1596	struct rx_ring	*rxr = adapter->rx_rings;
1597
1598	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1599		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1600
1601		EM_TX_LOCK(txr);
1602		em_txeof(txr);
1603#ifdef EM_MULTIQUEUE
1604		if (!drbr_empty(ifp, txr->br))
1605			em_mq_start_locked(ifp, txr);
1606#else
1607		if (!if_sendq_empty(ifp))
1608			em_start_locked(ifp, txr);
1609#endif
1610		EM_TX_UNLOCK(txr);
1611		if (more) {
1612			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1613			return;
1614		}
1615	}
1616
1617	em_enable_intr(adapter);
1618	return;
1619}
1620
1621
1622/*********************************************************************
1623 *
1624 *  MSIX Interrupt Service Routines
1625 *
1626 **********************************************************************/
1627static void
1628em_msix_tx(void *arg)
1629{
1630	struct tx_ring *txr = arg;
1631	struct adapter *adapter = txr->adapter;
1632	if_t ifp = adapter->ifp;
1633
1634	++txr->tx_irq;
1635	EM_TX_LOCK(txr);
1636	em_txeof(txr);
1637#ifdef EM_MULTIQUEUE
1638	if (!drbr_empty(ifp, txr->br))
1639		em_mq_start_locked(ifp, txr);
1640#else
1641	if (!if_sendq_empty(ifp))
1642		em_start_locked(ifp, txr);
1643#endif
1644
1645	/* Reenable this interrupt */
1646	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1647	EM_TX_UNLOCK(txr);
1648	return;
1649}
1650
1651/*********************************************************************
1652 *
1653 *  MSIX RX Interrupt Service routine
1654 *
1655 **********************************************************************/
1656
1657static void
1658em_msix_rx(void *arg)
1659{
1660	struct rx_ring	*rxr = arg;
1661	struct adapter	*adapter = rxr->adapter;
1662	bool		more;
1663
1664	++rxr->rx_irq;
1665	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1666		return;
1667	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1668	if (more)
1669		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1670	else {
1671		/* Reenable this interrupt */
1672		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1673	}
1674	return;
1675}
1676
1677/*********************************************************************
1678 *
1679 *  MSIX Link Fast Interrupt Service routine
1680 *
1681 **********************************************************************/
1682static void
1683em_msix_link(void *arg)
1684{
1685	struct adapter	*adapter = arg;
1686	u32		reg_icr;
1687
1688	++adapter->link_irq;
1689	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1690
1691	if (reg_icr & E1000_ICR_RXO)
1692		adapter->rx_overruns++;
1693
1694	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1695		adapter->hw.mac.get_link_status = 1;
1696		em_handle_link(adapter, 0);
1697	} else
1698		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1699		    EM_MSIX_LINK | E1000_IMS_LSC);
1700	/*
1701 	** Because we must read the ICR for this interrupt
1702 	** it may clear other causes using autoclear, for
1703 	** this reason we simply create a soft interrupt
1704 	** for all these vectors.
1705 	*/
1706	if (reg_icr) {
1707		E1000_WRITE_REG(&adapter->hw,
1708			E1000_ICS, adapter->ims);
1709	}
1710	return;
1711}
1712
1713static void
1714em_handle_rx(void *context, int pending)
1715{
1716	struct rx_ring	*rxr = context;
1717	struct adapter	*adapter = rxr->adapter;
1718        bool            more;
1719
1720	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1721	if (more)
1722		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1723	else {
1724		/* Reenable this interrupt */
1725		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1726	}
1727}
1728
1729static void
1730em_handle_tx(void *context, int pending)
1731{
1732	struct tx_ring	*txr = context;
1733	struct adapter	*adapter = txr->adapter;
1734	if_t ifp = adapter->ifp;
1735
1736	EM_TX_LOCK(txr);
1737	em_txeof(txr);
1738#ifdef EM_MULTIQUEUE
1739	if (!drbr_empty(ifp, txr->br))
1740		em_mq_start_locked(ifp, txr);
1741#else
1742	if (!if_sendq_empty(ifp))
1743		em_start_locked(ifp, txr);
1744#endif
1745	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1746	EM_TX_UNLOCK(txr);
1747}
1748
1749static void
1750em_handle_link(void *context, int pending)
1751{
1752	struct adapter	*adapter = context;
1753	struct tx_ring	*txr = adapter->tx_rings;
1754	if_t ifp = adapter->ifp;
1755
1756	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1757		return;
1758
1759	EM_CORE_LOCK(adapter);
1760	callout_stop(&adapter->timer);
1761	em_update_link_status(adapter);
1762	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1763	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1764	    EM_MSIX_LINK | E1000_IMS_LSC);
1765	if (adapter->link_active) {
1766		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1767			EM_TX_LOCK(txr);
1768#ifdef EM_MULTIQUEUE
1769			if (!drbr_empty(ifp, txr->br))
1770				em_mq_start_locked(ifp, txr);
1771#else
1772			if (if_sendq_empty(ifp))
1773				em_start_locked(ifp, txr);
1774#endif
1775			EM_TX_UNLOCK(txr);
1776		}
1777	}
1778	EM_CORE_UNLOCK(adapter);
1779}
1780
1781
1782/*********************************************************************
1783 *
1784 *  Media Ioctl callback
1785 *
1786 *  This routine is called whenever the user queries the status of
1787 *  the interface using ifconfig.
1788 *
1789 **********************************************************************/
1790static void
1791em_media_status(if_t ifp, struct ifmediareq *ifmr)
1792{
1793	struct adapter *adapter = if_getsoftc(ifp);
1794	u_char fiber_type = IFM_1000_SX;
1795
1796	INIT_DEBUGOUT("em_media_status: begin");
1797
1798	EM_CORE_LOCK(adapter);
1799	em_update_link_status(adapter);
1800
1801	ifmr->ifm_status = IFM_AVALID;
1802	ifmr->ifm_active = IFM_ETHER;
1803
1804	if (!adapter->link_active) {
1805		EM_CORE_UNLOCK(adapter);
1806		return;
1807	}
1808
1809	ifmr->ifm_status |= IFM_ACTIVE;
1810
1811	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1812	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1813		ifmr->ifm_active |= fiber_type | IFM_FDX;
1814	} else {
1815		switch (adapter->link_speed) {
1816		case 10:
1817			ifmr->ifm_active |= IFM_10_T;
1818			break;
1819		case 100:
1820			ifmr->ifm_active |= IFM_100_TX;
1821			break;
1822		case 1000:
1823			ifmr->ifm_active |= IFM_1000_T;
1824			break;
1825		}
1826		if (adapter->link_duplex == FULL_DUPLEX)
1827			ifmr->ifm_active |= IFM_FDX;
1828		else
1829			ifmr->ifm_active |= IFM_HDX;
1830	}
1831	EM_CORE_UNLOCK(adapter);
1832}
1833
1834/*********************************************************************
1835 *
1836 *  Media Ioctl callback
1837 *
1838 *  This routine is called when the user changes speed/duplex using
1839 *  media/mediopt option with ifconfig.
1840 *
1841 **********************************************************************/
1842static int
1843em_media_change(if_t ifp)
1844{
1845	struct adapter *adapter = if_getsoftc(ifp);
1846	struct ifmedia  *ifm = &adapter->media;
1847
1848	INIT_DEBUGOUT("em_media_change: begin");
1849
1850	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1851		return (EINVAL);
1852
1853	EM_CORE_LOCK(adapter);
1854	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1855	case IFM_AUTO:
1856		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1857		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1858		break;
1859	case IFM_1000_LX:
1860	case IFM_1000_SX:
1861	case IFM_1000_T:
1862		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1864		break;
1865	case IFM_100_TX:
1866		adapter->hw.mac.autoneg = FALSE;
1867		adapter->hw.phy.autoneg_advertised = 0;
1868		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1869			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1870		else
1871			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1872		break;
1873	case IFM_10_T:
1874		adapter->hw.mac.autoneg = FALSE;
1875		adapter->hw.phy.autoneg_advertised = 0;
1876		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1877			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1878		else
1879			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1880		break;
1881	default:
1882		device_printf(adapter->dev, "Unsupported media type\n");
1883	}
1884
1885	em_init_locked(adapter);
1886	EM_CORE_UNLOCK(adapter);
1887
1888	return (0);
1889}
1890
1891/*********************************************************************
1892 *
1893 *  This routine maps the mbufs to tx descriptors.
1894 *
1895 *  return 0 on success, positive on failure
1896 **********************************************************************/
1897
1898static int
1899em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1900{
1901	struct adapter		*adapter = txr->adapter;
1902	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1903	bus_dmamap_t		map;
1904	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1905	struct e1000_tx_desc	*ctxd = NULL;
1906	struct mbuf		*m_head;
1907	struct ether_header	*eh;
1908	struct ip		*ip = NULL;
1909	struct tcphdr		*tp = NULL;
1910	u32			txd_upper = 0, txd_lower = 0;
1911	int			ip_off, poff;
1912	int			nsegs, i, j, first, last = 0;
1913	int			error;
1914	bool			do_tso, tso_desc, remap = TRUE;
1915
1916	m_head = *m_headp;
1917	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1918	tso_desc = FALSE;
1919	ip_off = poff = 0;
1920
1921	/*
1922	 * Intel recommends entire IP/TCP header length reside in a single
1923	 * buffer. If multiple descriptors are used to describe the IP and
1924	 * TCP header, each descriptor should describe one or more
1925	 * complete headers; descriptors referencing only parts of headers
1926	 * are not supported. If all layer headers are not coalesced into
1927	 * a single buffer, each buffer should not cross a 4KB boundary,
1928	 * or be larger than the maximum read request size.
1929	 * Controller also requires modifing IP/TCP header to make TSO work
1930	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1931	 * IP/TCP header into a single buffer to meet the requirement of
1932	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1933	 * which also has similar restrictions.
1934	 */
1935	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1936		if (do_tso || (m_head->m_next != NULL &&
1937		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1938			if (M_WRITABLE(*m_headp) == 0) {
1939				m_head = m_dup(*m_headp, M_NOWAIT);
1940				m_freem(*m_headp);
1941				if (m_head == NULL) {
1942					*m_headp = NULL;
1943					return (ENOBUFS);
1944				}
1945				*m_headp = m_head;
1946			}
1947		}
1948		/*
1949		 * XXX
1950		 * Assume IPv4, we don't have TSO/checksum offload support
1951		 * for IPv6 yet.
1952		 */
1953		ip_off = sizeof(struct ether_header);
1954		if (m_head->m_len < ip_off) {
1955			m_head = m_pullup(m_head, ip_off);
1956			if (m_head == NULL) {
1957				*m_headp = NULL;
1958				return (ENOBUFS);
1959			}
1960		}
1961		eh = mtod(m_head, struct ether_header *);
1962		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1963			ip_off = sizeof(struct ether_vlan_header);
1964			if (m_head->m_len < ip_off) {
1965				m_head = m_pullup(m_head, ip_off);
1966				if (m_head == NULL) {
1967					*m_headp = NULL;
1968					return (ENOBUFS);
1969				}
1970			}
1971		}
1972		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1973			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1974			if (m_head == NULL) {
1975				*m_headp = NULL;
1976				return (ENOBUFS);
1977			}
1978		}
1979		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1980		poff = ip_off + (ip->ip_hl << 2);
1981
1982		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1983			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1984				m_head = m_pullup(m_head, poff +
1985				    sizeof(struct tcphdr));
1986				if (m_head == NULL) {
1987					*m_headp = NULL;
1988					return (ENOBUFS);
1989				}
1990			}
1991			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1992			/*
1993			 * TSO workaround:
1994			 *   pull 4 more bytes of data into it.
1995			 */
1996			if (m_head->m_len < poff + (tp->th_off << 2)) {
1997				m_head = m_pullup(m_head, poff +
1998				                 (tp->th_off << 2) +
1999				                 TSO_WORKAROUND);
2000				if (m_head == NULL) {
2001					*m_headp = NULL;
2002					return (ENOBUFS);
2003				}
2004			}
2005			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2006			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2007			if (do_tso) {
2008				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2009				                  (ip->ip_hl << 2) +
2010				                  (tp->th_off << 2));
2011				ip->ip_sum = 0;
2012				/*
2013				 * The pseudo TCP checksum does not include TCP
2014				 * payload length so driver should recompute
2015				 * the checksum here what hardware expect to
2016				 * see. This is adherence of Microsoft's Large
2017				 * Send specification.
2018			 	*/
2019				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2020				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2021			}
2022		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2023			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2024				m_head = m_pullup(m_head, poff +
2025				    sizeof(struct udphdr));
2026				if (m_head == NULL) {
2027					*m_headp = NULL;
2028					return (ENOBUFS);
2029				}
2030			}
2031			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2032		}
2033		*m_headp = m_head;
2034	}
2035
2036	/*
2037	 * Map the packet for DMA
2038	 *
2039	 * Capture the first descriptor index,
2040	 * this descriptor will have the index
2041	 * of the EOP which is the only one that
2042	 * now gets a DONE bit writeback.
2043	 */
2044	first = txr->next_avail_desc;
2045	tx_buffer = &txr->tx_buffers[first];
2046	tx_buffer_mapped = tx_buffer;
2047	map = tx_buffer->map;
2048
2049retry:
2050	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2051	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2052
2053	/*
2054	 * There are two types of errors we can (try) to handle:
2055	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2056	 *   out of segments.  Defragment the mbuf chain and try again.
2057	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2058	 *   at this point in time.  Defer sending and try again later.
2059	 * All other errors, in particular EINVAL, are fatal and prevent the
2060	 * mbuf chain from ever going through.  Drop it and report error.
2061	 */
2062	if (error == EFBIG && remap) {
2063		struct mbuf *m;
2064
2065		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2066		if (m == NULL) {
2067			adapter->mbuf_defrag_failed++;
2068			m_freem(*m_headp);
2069			*m_headp = NULL;
2070			return (ENOBUFS);
2071		}
2072		*m_headp = m;
2073
2074		/* Try it again, but only once */
2075		remap = FALSE;
2076		goto retry;
2077	} else if (error != 0) {
2078		adapter->no_tx_dma_setup++;
2079		m_freem(*m_headp);
2080		*m_headp = NULL;
2081		return (error);
2082	}
2083
2084	/*
2085	 * TSO Hardware workaround, if this packet is not
2086	 * TSO, and is only a single descriptor long, and
2087	 * it follows a TSO burst, then we need to add a
2088	 * sentinel descriptor to prevent premature writeback.
2089	 */
2090	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2091		if (nsegs == 1)
2092			tso_desc = TRUE;
2093		txr->tx_tso = FALSE;
2094	}
2095
2096        if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2097                txr->no_desc_avail++;
2098		bus_dmamap_unload(txr->txtag, map);
2099		return (ENOBUFS);
2100        }
2101	m_head = *m_headp;
2102
2103	/* Do hardware assists */
2104	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2105		em_tso_setup(txr, m_head, ip_off, ip, tp,
2106		    &txd_upper, &txd_lower);
2107		/* we need to make a final sentinel transmit desc */
2108		tso_desc = TRUE;
2109	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2110		em_transmit_checksum_setup(txr, m_head,
2111		    ip_off, ip, &txd_upper, &txd_lower);
2112
2113	if (m_head->m_flags & M_VLANTAG) {
2114		/* Set the vlan id. */
2115		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2116                /* Tell hardware to add tag */
2117                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2118        }
2119
2120	i = txr->next_avail_desc;
2121
2122	/* Set up our transmit descriptors */
2123	for (j = 0; j < nsegs; j++) {
2124		bus_size_t seg_len;
2125		bus_addr_t seg_addr;
2126
2127		tx_buffer = &txr->tx_buffers[i];
2128		ctxd = &txr->tx_base[i];
2129		seg_addr = segs[j].ds_addr;
2130		seg_len  = segs[j].ds_len;
2131		/*
2132		** TSO Workaround:
2133		** If this is the last descriptor, we want to
2134		** split it so we have a small final sentinel
2135		*/
2136		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2137			seg_len -= TSO_WORKAROUND;
2138			ctxd->buffer_addr = htole64(seg_addr);
2139			ctxd->lower.data = htole32(
2140				adapter->txd_cmd | txd_lower | seg_len);
2141			ctxd->upper.data = htole32(txd_upper);
2142			if (++i == adapter->num_tx_desc)
2143				i = 0;
2144
2145			/* Now make the sentinel */
2146			txr->tx_avail--;
2147			ctxd = &txr->tx_base[i];
2148			tx_buffer = &txr->tx_buffers[i];
2149			ctxd->buffer_addr =
2150			    htole64(seg_addr + seg_len);
2151			ctxd->lower.data = htole32(
2152			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2153			ctxd->upper.data =
2154			    htole32(txd_upper);
2155			last = i;
2156			if (++i == adapter->num_tx_desc)
2157				i = 0;
2158		} else {
2159			ctxd->buffer_addr = htole64(seg_addr);
2160			ctxd->lower.data = htole32(
2161			adapter->txd_cmd | txd_lower | seg_len);
2162			ctxd->upper.data = htole32(txd_upper);
2163			last = i;
2164			if (++i == adapter->num_tx_desc)
2165				i = 0;
2166		}
2167		tx_buffer->m_head = NULL;
2168		tx_buffer->next_eop = -1;
2169	}
2170
2171	txr->next_avail_desc = i;
2172	txr->tx_avail -= nsegs;
2173
2174        tx_buffer->m_head = m_head;
2175	/*
2176	** Here we swap the map so the last descriptor,
2177	** which gets the completion interrupt has the
2178	** real map, and the first descriptor gets the
2179	** unused map from this descriptor.
2180	*/
2181	tx_buffer_mapped->map = tx_buffer->map;
2182	tx_buffer->map = map;
2183        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2184
2185        /*
2186         * Last Descriptor of Packet
2187	 * needs End Of Packet (EOP)
2188	 * and Report Status (RS)
2189         */
2190        ctxd->lower.data |=
2191	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2192	/*
2193	 * Keep track in the first buffer which
2194	 * descriptor will be written back
2195	 */
2196	tx_buffer = &txr->tx_buffers[first];
2197	tx_buffer->next_eop = last;
2198
2199	/*
2200	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2201	 * that this frame is available to transmit.
2202	 */
2203	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2204	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2205	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2206
2207	return (0);
2208}
2209
2210static void
2211em_set_promisc(struct adapter *adapter)
2212{
2213	if_t ifp = adapter->ifp;
2214	u32		reg_rctl;
2215
2216	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2217
2218	if (if_getflags(ifp) & IFF_PROMISC) {
2219		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2220		/* Turn this on if you want to see bad packets */
2221		if (em_debug_sbp)
2222			reg_rctl |= E1000_RCTL_SBP;
2223		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2224	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2225		reg_rctl |= E1000_RCTL_MPE;
2226		reg_rctl &= ~E1000_RCTL_UPE;
2227		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2228	}
2229}
2230
2231static void
2232em_disable_promisc(struct adapter *adapter)
2233{
2234	if_t		ifp = adapter->ifp;
2235	u32		reg_rctl;
2236	int		mcnt = 0;
2237
2238	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2239	reg_rctl &=  (~E1000_RCTL_UPE);
2240	if (if_getflags(ifp) & IFF_ALLMULTI)
2241		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2242	else
2243		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2244	/* Don't disable if in MAX groups */
2245	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2246		reg_rctl &=  (~E1000_RCTL_MPE);
2247	reg_rctl &=  (~E1000_RCTL_SBP);
2248	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249}
2250
2251
2252/*********************************************************************
2253 *  Multicast Update
2254 *
2255 *  This routine is called whenever multicast address list is updated.
2256 *
2257 **********************************************************************/
2258
2259static void
2260em_set_multi(struct adapter *adapter)
2261{
2262	if_t ifp = adapter->ifp;
2263	u32 reg_rctl = 0;
2264	u8  *mta; /* Multicast array memory */
2265	int mcnt = 0;
2266
2267	IOCTL_DEBUGOUT("em_set_multi: begin");
2268
2269	mta = adapter->mta;
2270	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2271
2272	if (adapter->hw.mac.type == e1000_82542 &&
2273	    adapter->hw.revision_id == E1000_REVISION_2) {
2274		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2275		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2276			e1000_pci_clear_mwi(&adapter->hw);
2277		reg_rctl |= E1000_RCTL_RST;
2278		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2279		msec_delay(5);
2280	}
2281
2282	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2283
2284	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2285		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2286		reg_rctl |= E1000_RCTL_MPE;
2287		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2288	} else
2289		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2290
2291	if (adapter->hw.mac.type == e1000_82542 &&
2292	    adapter->hw.revision_id == E1000_REVISION_2) {
2293		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2294		reg_rctl &= ~E1000_RCTL_RST;
2295		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2296		msec_delay(5);
2297		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2298			e1000_pci_set_mwi(&adapter->hw);
2299	}
2300}
2301
2302
2303/*********************************************************************
2304 *  Timer routine
2305 *
2306 *  This routine checks for link status and updates statistics.
2307 *
2308 **********************************************************************/
2309
2310static void
2311em_local_timer(void *arg)
2312{
2313	struct adapter	*adapter = arg;
2314	if_t ifp = adapter->ifp;
2315	struct tx_ring	*txr = adapter->tx_rings;
2316	struct rx_ring	*rxr = adapter->rx_rings;
2317	u32		trigger = 0;
2318
2319	EM_CORE_LOCK_ASSERT(adapter);
2320
2321	em_update_link_status(adapter);
2322	em_update_stats_counters(adapter);
2323
2324	/* Reset LAA into RAR[0] on 82571 */
2325	if ((adapter->hw.mac.type == e1000_82571) &&
2326	    e1000_get_laa_state_82571(&adapter->hw))
2327		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2328
2329	/* Mask to use in the irq trigger */
2330	if (adapter->msix_mem) {
2331		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2332			trigger |= rxr->ims;
2333		rxr = adapter->rx_rings;
2334	} else
2335		trigger = E1000_ICS_RXDMT0;
2336
2337	/*
2338	** Check on the state of the TX queue(s), this
2339	** can be done without the lock because its RO
2340	** and the HUNG state will be static if set.
2341	*/
2342	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2343		if (txr->busy == EM_TX_HUNG)
2344			goto hung;
2345		if (txr->busy >= EM_TX_MAXTRIES)
2346			txr->busy = EM_TX_HUNG;
2347		/* Schedule a TX tasklet if needed */
2348		if (txr->tx_avail <= EM_MAX_SCATTER)
2349			taskqueue_enqueue(txr->tq, &txr->tx_task);
2350	}
2351
2352	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2353#ifndef DEVICE_POLLING
2354	/* Trigger an RX interrupt to guarantee mbuf refresh */
2355	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2356#endif
2357	return;
2358hung:
2359	/* Looks like we're hung */
2360	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2361			txr->me);
2362	em_print_debug_info(adapter);
2363	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2364	adapter->watchdog_events++;
2365	em_init_locked(adapter);
2366}
2367
2368
2369static void
2370em_update_link_status(struct adapter *adapter)
2371{
2372	struct e1000_hw *hw = &adapter->hw;
2373	if_t ifp = adapter->ifp;
2374	device_t dev = adapter->dev;
2375	struct tx_ring *txr = adapter->tx_rings;
2376	u32 link_check = 0;
2377
2378	/* Get the cached link value or read phy for real */
2379	switch (hw->phy.media_type) {
2380	case e1000_media_type_copper:
2381		if (hw->mac.get_link_status) {
2382			if (hw->mac.type == e1000_pch_spt)
2383				msec_delay(50);
2384			/* Do the work to read phy */
2385			e1000_check_for_link(hw);
2386			link_check = !hw->mac.get_link_status;
2387			if (link_check) /* ESB2 fix */
2388				e1000_cfg_on_link_up(hw);
2389		} else
2390			link_check = TRUE;
2391		break;
2392	case e1000_media_type_fiber:
2393		e1000_check_for_link(hw);
2394		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2395                                 E1000_STATUS_LU);
2396		break;
2397	case e1000_media_type_internal_serdes:
2398		e1000_check_for_link(hw);
2399		link_check = adapter->hw.mac.serdes_has_link;
2400		break;
2401	default:
2402	case e1000_media_type_unknown:
2403		break;
2404	}
2405
2406	/* Now check for a transition */
2407	if (link_check && (adapter->link_active == 0)) {
2408		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2409		    &adapter->link_duplex);
2410		/* Check if we must disable SPEED_MODE bit on PCI-E */
2411		if ((adapter->link_speed != SPEED_1000) &&
2412		    ((hw->mac.type == e1000_82571) ||
2413		    (hw->mac.type == e1000_82572))) {
2414			int tarc0;
2415			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2416			tarc0 &= ~TARC_SPEED_MODE_BIT;
2417			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2418		}
2419		if (bootverbose)
2420			device_printf(dev, "Link is up %d Mbps %s\n",
2421			    adapter->link_speed,
2422			    ((adapter->link_duplex == FULL_DUPLEX) ?
2423			    "Full Duplex" : "Half Duplex"));
2424		adapter->link_active = 1;
2425		adapter->smartspeed = 0;
2426		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2427		if_link_state_change(ifp, LINK_STATE_UP);
2428	} else if (!link_check && (adapter->link_active == 1)) {
2429		if_setbaudrate(ifp, 0);
2430		adapter->link_speed = 0;
2431		adapter->link_duplex = 0;
2432		if (bootverbose)
2433			device_printf(dev, "Link is Down\n");
2434		adapter->link_active = 0;
2435		/* Link down, disable hang detection */
2436		for (int i = 0; i < adapter->num_queues; i++, txr++)
2437			txr->busy = EM_TX_IDLE;
2438		if_link_state_change(ifp, LINK_STATE_DOWN);
2439	}
2440}
2441
2442/*********************************************************************
2443 *
2444 *  This routine disables all traffic on the adapter by issuing a
2445 *  global reset on the MAC and deallocates TX/RX buffers.
2446 *
2447 *  This routine should always be called with BOTH the CORE
2448 *  and TX locks.
2449 **********************************************************************/
2450
2451static void
2452em_stop(void *arg)
2453{
2454	struct adapter	*adapter = arg;
2455	if_t ifp = adapter->ifp;
2456	struct tx_ring	*txr = adapter->tx_rings;
2457
2458	EM_CORE_LOCK_ASSERT(adapter);
2459
2460	INIT_DEBUGOUT("em_stop: begin");
2461
2462	em_disable_intr(adapter);
2463	callout_stop(&adapter->timer);
2464
2465	/* Tell the stack that the interface is no longer active */
2466	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2467
2468        /* Disarm Hang Detection. */
2469	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2470		EM_TX_LOCK(txr);
2471		txr->busy = EM_TX_IDLE;
2472		EM_TX_UNLOCK(txr);
2473	}
2474
2475	/* I219 needs some special flushing to avoid hangs */
2476	if (adapter->hw.mac.type == e1000_pch_spt)
2477		em_flush_desc_rings(adapter);
2478
2479	e1000_reset_hw(&adapter->hw);
2480	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2481
2482	e1000_led_off(&adapter->hw);
2483	e1000_cleanup_led(&adapter->hw);
2484}
2485
2486
2487/*********************************************************************
2488 *
2489 *  Determine hardware revision.
2490 *
2491 **********************************************************************/
2492static void
2493em_identify_hardware(struct adapter *adapter)
2494{
2495	device_t dev = adapter->dev;
2496
2497	/* Make sure our PCI config space has the necessary stuff set */
2498	pci_enable_busmaster(dev);
2499	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2500
2501	/* Save off the information about this board */
2502	adapter->hw.vendor_id = pci_get_vendor(dev);
2503	adapter->hw.device_id = pci_get_device(dev);
2504	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2505	adapter->hw.subsystem_vendor_id =
2506	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2507	adapter->hw.subsystem_device_id =
2508	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2509
2510	/* Do Shared Code Init and Setup */
2511	if (e1000_set_mac_type(&adapter->hw)) {
2512		device_printf(dev, "Setup init failure\n");
2513		return;
2514	}
2515}
2516
2517static int
2518em_allocate_pci_resources(struct adapter *adapter)
2519{
2520	device_t	dev = adapter->dev;
2521	int		rid;
2522
2523	rid = PCIR_BAR(0);
2524	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2525	    &rid, RF_ACTIVE);
2526	if (adapter->memory == NULL) {
2527		device_printf(dev, "Unable to allocate bus resource: memory\n");
2528		return (ENXIO);
2529	}
2530	adapter->osdep.mem_bus_space_tag =
2531	    rman_get_bustag(adapter->memory);
2532	adapter->osdep.mem_bus_space_handle =
2533	    rman_get_bushandle(adapter->memory);
2534	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2535
2536	adapter->hw.back = &adapter->osdep;
2537
2538	return (0);
2539}
2540
2541/*********************************************************************
2542 *
2543 *  Setup the Legacy or MSI Interrupt handler
2544 *
2545 **********************************************************************/
2546int
2547em_allocate_legacy(struct adapter *adapter)
2548{
2549	device_t dev = adapter->dev;
2550	struct tx_ring	*txr = adapter->tx_rings;
2551	int error, rid = 0;
2552
2553	/* Manually turn off all interrupts */
2554	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2555
2556	if (adapter->msix == 1) /* using MSI */
2557		rid = 1;
2558	/* We allocate a single interrupt resource */
2559	adapter->res = bus_alloc_resource_any(dev,
2560	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2561	if (adapter->res == NULL) {
2562		device_printf(dev, "Unable to allocate bus resource: "
2563		    "interrupt\n");
2564		return (ENXIO);
2565	}
2566
2567	/*
2568	 * Allocate a fast interrupt and the associated
2569	 * deferred processing contexts.
2570	 */
2571	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2572	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2573	    taskqueue_thread_enqueue, &adapter->tq);
2574	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2575	    device_get_nameunit(adapter->dev));
2576	/* Use a TX only tasklet for local timer */
2577	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2578	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2579	    taskqueue_thread_enqueue, &txr->tq);
2580	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2581	    device_get_nameunit(adapter->dev));
2582	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2583	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2584	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2585		device_printf(dev, "Failed to register fast interrupt "
2586			    "handler: %d\n", error);
2587		taskqueue_free(adapter->tq);
2588		adapter->tq = NULL;
2589		return (error);
2590	}
2591
2592	return (0);
2593}
2594
2595/*********************************************************************
2596 *
2597 *  Setup the MSIX Interrupt handlers
2598 *   This is not really Multiqueue, rather
2599 *   its just separate interrupt vectors
2600 *   for TX, RX, and Link.
2601 *
2602 **********************************************************************/
2603int
2604em_allocate_msix(struct adapter *adapter)
2605{
2606	device_t	dev = adapter->dev;
2607	struct		tx_ring *txr = adapter->tx_rings;
2608	struct		rx_ring *rxr = adapter->rx_rings;
2609	int		error, rid, vector = 0;
2610	int		cpu_id = 0;
2611
2612
2613	/* Make sure all interrupts are disabled */
2614	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2615
2616	/* First set up ring resources */
2617	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2618
2619		/* RX ring */
2620		rid = vector + 1;
2621
2622		rxr->res = bus_alloc_resource_any(dev,
2623		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2624		if (rxr->res == NULL) {
2625			device_printf(dev,
2626			    "Unable to allocate bus resource: "
2627			    "RX MSIX Interrupt %d\n", i);
2628			return (ENXIO);
2629		}
2630		if ((error = bus_setup_intr(dev, rxr->res,
2631		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2632		    rxr, &rxr->tag)) != 0) {
2633			device_printf(dev, "Failed to register RX handler");
2634			return (error);
2635		}
2636#if __FreeBSD_version >= 800504
2637		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2638#endif
2639		rxr->msix = vector;
2640
2641		if (em_last_bind_cpu < 0)
2642			em_last_bind_cpu = CPU_FIRST();
2643		cpu_id = em_last_bind_cpu;
2644		bus_bind_intr(dev, rxr->res, cpu_id);
2645
2646		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2647		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2648		    taskqueue_thread_enqueue, &rxr->tq);
2649		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2650		    device_get_nameunit(adapter->dev), cpu_id);
2651		/*
2652		** Set the bit to enable interrupt
2653		** in E1000_IMS -- bits 20 and 21
2654		** are for RX0 and RX1, note this has
2655		** NOTHING to do with the MSIX vector
2656		*/
2657		rxr->ims = 1 << (20 + i);
2658		adapter->ims |= rxr->ims;
2659		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2660
2661		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2662	}
2663
2664	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2665		/* TX ring */
2666		rid = vector + 1;
2667		txr->res = bus_alloc_resource_any(dev,
2668		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2669		if (txr->res == NULL) {
2670			device_printf(dev,
2671			    "Unable to allocate bus resource: "
2672			    "TX MSIX Interrupt %d\n", i);
2673			return (ENXIO);
2674		}
2675		if ((error = bus_setup_intr(dev, txr->res,
2676		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2677		    txr, &txr->tag)) != 0) {
2678			device_printf(dev, "Failed to register TX handler");
2679			return (error);
2680		}
2681#if __FreeBSD_version >= 800504
2682		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2683#endif
2684		txr->msix = vector;
2685
2686                if (em_last_bind_cpu < 0)
2687                        em_last_bind_cpu = CPU_FIRST();
2688                cpu_id = em_last_bind_cpu;
2689                bus_bind_intr(dev, txr->res, cpu_id);
2690
2691		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2692		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2693		    taskqueue_thread_enqueue, &txr->tq);
2694		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2695		    device_get_nameunit(adapter->dev), cpu_id);
2696		/*
2697		** Set the bit to enable interrupt
2698		** in E1000_IMS -- bits 22 and 23
2699		** are for TX0 and TX1, note this has
2700		** NOTHING to do with the MSIX vector
2701		*/
2702		txr->ims = 1 << (22 + i);
2703		adapter->ims |= txr->ims;
2704		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2705
2706		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2707	}
2708
2709	/* Link interrupt */
2710	rid = vector + 1;
2711	adapter->res = bus_alloc_resource_any(dev,
2712	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2713	if (!adapter->res) {
2714		device_printf(dev,"Unable to allocate "
2715		    "bus resource: Link interrupt [%d]\n", rid);
2716		return (ENXIO);
2717        }
2718	/* Set the link handler function */
2719	error = bus_setup_intr(dev, adapter->res,
2720	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2721	    em_msix_link, adapter, &adapter->tag);
2722	if (error) {
2723		adapter->res = NULL;
2724		device_printf(dev, "Failed to register LINK handler");
2725		return (error);
2726	}
2727#if __FreeBSD_version >= 800504
2728	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2729#endif
2730	adapter->linkvec = vector;
2731	adapter->ivars |=  (8 | vector) << 16;
2732	adapter->ivars |= 0x80000000;
2733
2734	return (0);
2735}
2736
2737
2738static void
2739em_free_pci_resources(struct adapter *adapter)
2740{
2741	device_t	dev = adapter->dev;
2742	struct tx_ring	*txr;
2743	struct rx_ring	*rxr;
2744	int		rid;
2745
2746
2747	/*
2748	** Release all the queue interrupt resources:
2749	*/
2750	for (int i = 0; i < adapter->num_queues; i++) {
2751		txr = &adapter->tx_rings[i];
2752		/* an early abort? */
2753		if (txr == NULL)
2754			break;
2755		rid = txr->msix +1;
2756		if (txr->tag != NULL) {
2757			bus_teardown_intr(dev, txr->res, txr->tag);
2758			txr->tag = NULL;
2759		}
2760		if (txr->res != NULL)
2761			bus_release_resource(dev, SYS_RES_IRQ,
2762			    rid, txr->res);
2763
2764		rxr = &adapter->rx_rings[i];
2765		/* an early abort? */
2766		if (rxr == NULL)
2767			break;
2768		rid = rxr->msix +1;
2769		if (rxr->tag != NULL) {
2770			bus_teardown_intr(dev, rxr->res, rxr->tag);
2771			rxr->tag = NULL;
2772		}
2773		if (rxr->res != NULL)
2774			bus_release_resource(dev, SYS_RES_IRQ,
2775			    rid, rxr->res);
2776	}
2777
2778        if (adapter->linkvec) /* we are doing MSIX */
2779                rid = adapter->linkvec + 1;
2780        else
2781                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2782
2783	if (adapter->tag != NULL) {
2784		bus_teardown_intr(dev, adapter->res, adapter->tag);
2785		adapter->tag = NULL;
2786	}
2787
2788	if (adapter->res != NULL)
2789		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2790
2791
2792	if (adapter->msix)
2793		pci_release_msi(dev);
2794
2795	if (adapter->msix_mem != NULL)
2796		bus_release_resource(dev, SYS_RES_MEMORY,
2797		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2798
2799	if (adapter->memory != NULL)
2800		bus_release_resource(dev, SYS_RES_MEMORY,
2801		    PCIR_BAR(0), adapter->memory);
2802
2803	if (adapter->flash != NULL)
2804		bus_release_resource(dev, SYS_RES_MEMORY,
2805		    EM_FLASH, adapter->flash);
2806}
2807
2808/*
2809 * Setup MSI or MSI/X
2810 */
2811static int
2812em_setup_msix(struct adapter *adapter)
2813{
2814	device_t dev = adapter->dev;
2815	int val;
2816
2817	/* Nearly always going to use one queue */
2818	adapter->num_queues = 1;
2819
2820	/*
2821	** Try using MSI-X for Hartwell adapters
2822	*/
2823	if ((adapter->hw.mac.type == e1000_82574) &&
2824	    (em_enable_msix == TRUE)) {
2825#ifdef EM_MULTIQUEUE
2826		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2827		if (adapter->num_queues > 1)
2828			em_enable_vectors_82574(adapter);
2829#endif
2830		/* Map the MSIX BAR */
2831		int rid = PCIR_BAR(EM_MSIX_BAR);
2832		adapter->msix_mem = bus_alloc_resource_any(dev,
2833		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2834       		if (adapter->msix_mem == NULL) {
2835			/* May not be enabled */
2836               		device_printf(adapter->dev,
2837			    "Unable to map MSIX table \n");
2838			goto msi;
2839       		}
2840		val = pci_msix_count(dev);
2841
2842#ifdef EM_MULTIQUEUE
2843		/* We need 5 vectors in the multiqueue case */
2844		if (adapter->num_queues > 1 ) {
2845			if (val >= 5)
2846				val = 5;
2847			else {
2848				adapter->num_queues = 1;
2849				device_printf(adapter->dev,
2850				    "Insufficient MSIX vectors for >1 queue, "
2851				    "using single queue...\n");
2852				goto msix_one;
2853			}
2854		} else {
2855msix_one:
2856#endif
2857			if (val >= 3)
2858				val = 3;
2859			else {
2860				device_printf(adapter->dev,
2861			    	"Insufficient MSIX vectors, using MSI\n");
2862				goto msi;
2863			}
2864#ifdef EM_MULTIQUEUE
2865		}
2866#endif
2867
2868		if ((pci_alloc_msix(dev, &val) == 0)) {
2869			device_printf(adapter->dev,
2870			    "Using MSIX interrupts "
2871			    "with %d vectors\n", val);
2872			return (val);
2873		}
2874
2875		/*
2876		** If MSIX alloc failed or provided us with
2877		** less than needed, free and fall through to MSI
2878		*/
2879		pci_release_msi(dev);
2880	}
2881msi:
2882	if (adapter->msix_mem != NULL) {
2883		bus_release_resource(dev, SYS_RES_MEMORY,
2884		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2885		adapter->msix_mem = NULL;
2886	}
2887       	val = 1;
2888       	if (pci_alloc_msi(dev, &val) == 0) {
2889               	device_printf(adapter->dev, "Using an MSI interrupt\n");
2890		return (val);
2891	}
2892	/* Should only happen due to manual configuration */
2893	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2894	return (0);
2895}
2896
2897
2898/*
2899** The 3 following flush routines are used as a workaround in the
2900** I219 client parts and only for them.
2901**
2902** em_flush_tx_ring - remove all descriptors from the tx_ring
2903**
2904** We want to clear all pending descriptors from the TX ring.
2905** zeroing happens when the HW reads the regs. We  assign the ring itself as
2906** the data of the next descriptor. We don't care about the data we are about
2907** to reset the HW.
2908*/
2909static void
2910em_flush_tx_ring(struct adapter *adapter)
2911{
2912	struct e1000_hw		*hw = &adapter->hw;
2913	struct tx_ring		*txr = adapter->tx_rings;
2914	struct e1000_tx_desc	*txd;
2915	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2916	u16			size = 512;
2917
2918	tctl = E1000_READ_REG(hw, E1000_TCTL);
2919	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2920
2921	txd = &txr->tx_base[txr->next_avail_desc++];
2922	if (txr->next_avail_desc == adapter->num_tx_desc)
2923		txr->next_avail_desc = 0;
2924
2925	/* Just use the ring as a dummy buffer addr */
2926	txd->buffer_addr = txr->txdma.dma_paddr;
2927	txd->lower.data = htole32(txd_lower | size);
2928	txd->upper.data = 0;
2929
2930	/* flush descriptors to memory before notifying the HW */
2931	wmb();
2932
2933	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2934	mb();
2935	usec_delay(250);
2936}
2937
2938/*
2939** em_flush_rx_ring - remove all descriptors from the rx_ring
2940**
2941** Mark all descriptors in the RX ring as consumed and disable the rx ring
2942*/
2943static void
2944em_flush_rx_ring(struct adapter *adapter)
2945{
2946	struct e1000_hw	*hw = &adapter->hw;
2947	u32		rctl, rxdctl;
2948
2949	rctl = E1000_READ_REG(hw, E1000_RCTL);
2950	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2951	E1000_WRITE_FLUSH(hw);
2952	usec_delay(150);
2953
2954	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2955	/* zero the lower 14 bits (prefetch and host thresholds) */
2956	rxdctl &= 0xffffc000;
2957	/*
2958	 * update thresholds: prefetch threshold to 31, host threshold to 1
2959	 * and make sure the granularity is "descriptors" and not "cache lines"
2960	 */
2961	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2962	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2963
2964	/* momentarily enable the RX ring for the changes to take effect */
2965	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2966	E1000_WRITE_FLUSH(hw);
2967	usec_delay(150);
2968	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2969}
2970
2971/*
2972** em_flush_desc_rings - remove all descriptors from the descriptor rings
2973**
2974** In i219, the descriptor rings must be emptied before resetting the HW
2975** or before changing the device state to D3 during runtime (runtime PM).
2976**
2977** Failure to do this will cause the HW to enter a unit hang state which can
2978** only be released by PCI reset on the device
2979**
2980*/
2981static void
2982em_flush_desc_rings(struct adapter *adapter)
2983{
2984	struct e1000_hw	*hw = &adapter->hw;
2985	device_t	dev = adapter->dev;
2986	u16		hang_state;
2987	u32		fext_nvm11, tdlen;
2988
2989	/* First, disable MULR fix in FEXTNVM11 */
2990	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2991	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2992	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2993
2994	/* do nothing if we're not in faulty state, or if the queue is empty */
2995	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2996	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2997	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2998		return;
2999	em_flush_tx_ring(adapter);
3000
3001	/* recheck, maybe the fault is caused by the rx ring */
3002	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3003	if (hang_state & FLUSH_DESC_REQUIRED)
3004		em_flush_rx_ring(adapter);
3005}
3006
3007
3008/*********************************************************************
3009 *
3010 *  Initialize the hardware to a configuration
3011 *  as specified by the adapter structure.
3012 *
3013 **********************************************************************/
3014static void
3015em_reset(struct adapter *adapter)
3016{
3017	device_t	dev = adapter->dev;
3018	if_t ifp = adapter->ifp;
3019	struct e1000_hw	*hw = &adapter->hw;
3020	u16		rx_buffer_size;
3021	u32		pba;
3022
3023	INIT_DEBUGOUT("em_reset: begin");
3024
3025	/* Set up smart power down as default off on newer adapters. */
3026	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3027	    hw->mac.type == e1000_82572)) {
3028		u16 phy_tmp = 0;
3029
3030		/* Speed up time to link by disabling smart power down. */
3031		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3032		phy_tmp &= ~IGP02E1000_PM_SPD;
3033		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3034	}
3035
3036	/*
3037	 * Packet Buffer Allocation (PBA)
3038	 * Writing PBA sets the receive portion of the buffer
3039	 * the remainder is used for the transmit buffer.
3040	 */
3041	switch (hw->mac.type) {
3042	/* Total Packet Buffer on these is 48K */
3043	case e1000_82571:
3044	case e1000_82572:
3045	case e1000_80003es2lan:
3046			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3047		break;
3048	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3049			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3050		break;
3051	case e1000_82574:
3052	case e1000_82583:
3053			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3054		break;
3055	case e1000_ich8lan:
3056		pba = E1000_PBA_8K;
3057		break;
3058	case e1000_ich9lan:
3059	case e1000_ich10lan:
3060		/* Boost Receive side for jumbo frames */
3061		if (adapter->hw.mac.max_frame_size > 4096)
3062			pba = E1000_PBA_14K;
3063		else
3064			pba = E1000_PBA_10K;
3065		break;
3066	case e1000_pchlan:
3067	case e1000_pch2lan:
3068	case e1000_pch_lpt:
3069	case e1000_pch_spt:
3070		pba = E1000_PBA_26K;
3071		break;
3072	default:
3073		if (adapter->hw.mac.max_frame_size > 8192)
3074			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3075		else
3076			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3077	}
3078	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3079
3080	/*
3081	 * These parameters control the automatic generation (Tx) and
3082	 * response (Rx) to Ethernet PAUSE frames.
3083	 * - High water mark should allow for at least two frames to be
3084	 *   received after sending an XOFF.
3085	 * - Low water mark works best when it is very near the high water mark.
3086	 *   This allows the receiver to restart by sending XON when it has
3087	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3088	 *   restart after one full frame is pulled from the buffer. There
3089	 *   could be several smaller frames in the buffer and if so they will
3090	 *   not trigger the XON until their total number reduces the buffer
3091	 *   by 1500.
3092	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3093	 */
3094	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3095	hw->fc.high_water = rx_buffer_size -
3096	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3097	hw->fc.low_water = hw->fc.high_water - 1500;
3098
3099	if (adapter->fc) /* locally set flow control value? */
3100		hw->fc.requested_mode = adapter->fc;
3101	else
3102		hw->fc.requested_mode = e1000_fc_full;
3103
3104	if (hw->mac.type == e1000_80003es2lan)
3105		hw->fc.pause_time = 0xFFFF;
3106	else
3107		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3108
3109	hw->fc.send_xon = TRUE;
3110
3111	/* Device specific overrides/settings */
3112	switch (hw->mac.type) {
3113	case e1000_pchlan:
3114		/* Workaround: no TX flow ctrl for PCH */
3115                hw->fc.requested_mode = e1000_fc_rx_pause;
3116		hw->fc.pause_time = 0xFFFF; /* override */
3117		if (if_getmtu(ifp) > ETHERMTU) {
3118			hw->fc.high_water = 0x3500;
3119			hw->fc.low_water = 0x1500;
3120		} else {
3121			hw->fc.high_water = 0x5000;
3122			hw->fc.low_water = 0x3000;
3123		}
3124		hw->fc.refresh_time = 0x1000;
3125		break;
3126	case e1000_pch2lan:
3127	case e1000_pch_lpt:
3128	case e1000_pch_spt:
3129		hw->fc.high_water = 0x5C20;
3130		hw->fc.low_water = 0x5048;
3131		hw->fc.pause_time = 0x0650;
3132		hw->fc.refresh_time = 0x0400;
3133		/* Jumbos need adjusted PBA */
3134		if (if_getmtu(ifp) > ETHERMTU)
3135			E1000_WRITE_REG(hw, E1000_PBA, 12);
3136		else
3137			E1000_WRITE_REG(hw, E1000_PBA, 26);
3138		break;
3139        case e1000_ich9lan:
3140        case e1000_ich10lan:
3141		if (if_getmtu(ifp) > ETHERMTU) {
3142			hw->fc.high_water = 0x2800;
3143			hw->fc.low_water = hw->fc.high_water - 8;
3144			break;
3145		}
3146		/* else fall thru */
3147	default:
3148		if (hw->mac.type == e1000_80003es2lan)
3149			hw->fc.pause_time = 0xFFFF;
3150		break;
3151	}
3152
3153	/* I219 needs some special flushing to avoid hangs */
3154	if (hw->mac.type == e1000_pch_spt)
3155		em_flush_desc_rings(adapter);
3156
3157	/* Issue a global reset */
3158	e1000_reset_hw(hw);
3159	E1000_WRITE_REG(hw, E1000_WUC, 0);
3160	em_disable_aspm(adapter);
3161	/* and a re-init */
3162	if (e1000_init_hw(hw) < 0) {
3163		device_printf(dev, "Hardware Initialization Failed\n");
3164		return;
3165	}
3166
3167	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3168	e1000_get_phy_info(hw);
3169	e1000_check_for_link(hw);
3170	return;
3171}
3172
3173/*********************************************************************
3174 *
3175 *  Setup networking device structure and register an interface.
3176 *
3177 **********************************************************************/
3178static int
3179em_setup_interface(device_t dev, struct adapter *adapter)
3180{
3181	if_t ifp;
3182
3183	INIT_DEBUGOUT("em_setup_interface: begin");
3184
3185	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3186	if (ifp == 0) {
3187		device_printf(dev, "can not allocate ifnet structure\n");
3188		return (-1);
3189	}
3190	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3191	if_setdev(ifp, dev);
3192	if_setinitfn(ifp, em_init);
3193	if_setsoftc(ifp, adapter);
3194	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3195	if_setioctlfn(ifp, em_ioctl);
3196	if_setgetcounterfn(ifp, em_get_counter);
3197
3198	/* TSO parameters */
3199	ifp->if_hw_tsomax = IP_MAXPACKET;
3200	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3201	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3202	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3203
3204#ifdef EM_MULTIQUEUE
3205	/* Multiqueue stack interface */
3206	if_settransmitfn(ifp, em_mq_start);
3207	if_setqflushfn(ifp, em_qflush);
3208#else
3209	if_setstartfn(ifp, em_start);
3210	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3211	if_setsendqready(ifp);
3212#endif
3213
3214	ether_ifattach(ifp, adapter->hw.mac.addr);
3215
3216	if_setcapabilities(ifp, 0);
3217	if_setcapenable(ifp, 0);
3218
3219
3220	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3221	    IFCAP_TSO4, 0);
3222	/*
3223	 * Tell the upper layer(s) we
3224	 * support full VLAN capability
3225	 */
3226	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3227	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3228	    IFCAP_VLAN_MTU, 0);
3229	if_setcapenable(ifp, if_getcapabilities(ifp));
3230
3231	/*
3232	** Don't turn this on by default, if vlans are
3233	** created on another pseudo device (eg. lagg)
3234	** then vlan events are not passed thru, breaking
3235	** operation, but with HW FILTER off it works. If
3236	** using vlans directly on the em driver you can
3237	** enable this and get full hardware tag filtering.
3238	*/
3239	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3240
3241#ifdef DEVICE_POLLING
3242	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3243#endif
3244
3245	/* Enable only WOL MAGIC by default */
3246	if (adapter->wol) {
3247		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3248		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3249	}
3250
3251	/*
3252	 * Specify the media types supported by this adapter and register
3253	 * callbacks to update media and link information
3254	 */
3255	ifmedia_init(&adapter->media, IFM_IMASK,
3256	    em_media_change, em_media_status);
3257	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3258	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3259		u_char fiber_type = IFM_1000_SX;	/* default type */
3260
3261		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3262			    0, NULL);
3263		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3264	} else {
3265		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3266		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3267			    0, NULL);
3268		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3269			    0, NULL);
3270		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3271			    0, NULL);
3272		if (adapter->hw.phy.type != e1000_phy_ife) {
3273			ifmedia_add(&adapter->media,
3274				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3275			ifmedia_add(&adapter->media,
3276				IFM_ETHER | IFM_1000_T, 0, NULL);
3277		}
3278	}
3279	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3280	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3281	return (0);
3282}
3283
3284
3285/*
3286 * Manage DMA'able memory.
3287 */
3288static void
3289em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3290{
3291	if (error)
3292		return;
3293	*(bus_addr_t *) arg = segs[0].ds_addr;
3294}
3295
3296static int
3297em_dma_malloc(struct adapter *adapter, bus_size_t size,
3298        struct em_dma_alloc *dma, int mapflags)
3299{
3300	int error;
3301
3302	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3303				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3304				BUS_SPACE_MAXADDR,	/* lowaddr */
3305				BUS_SPACE_MAXADDR,	/* highaddr */
3306				NULL, NULL,		/* filter, filterarg */
3307				size,			/* maxsize */
3308				1,			/* nsegments */
3309				size,			/* maxsegsize */
3310				0,			/* flags */
3311				NULL,			/* lockfunc */
3312				NULL,			/* lockarg */
3313				&dma->dma_tag);
3314	if (error) {
3315		device_printf(adapter->dev,
3316		    "%s: bus_dma_tag_create failed: %d\n",
3317		    __func__, error);
3318		goto fail_0;
3319	}
3320
3321	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3322	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3323	if (error) {
3324		device_printf(adapter->dev,
3325		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3326		    __func__, (uintmax_t)size, error);
3327		goto fail_2;
3328	}
3329
3330	dma->dma_paddr = 0;
3331	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3332	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3333	if (error || dma->dma_paddr == 0) {
3334		device_printf(adapter->dev,
3335		    "%s: bus_dmamap_load failed: %d\n",
3336		    __func__, error);
3337		goto fail_3;
3338	}
3339
3340	return (0);
3341
3342fail_3:
3343	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3344fail_2:
3345	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3346	bus_dma_tag_destroy(dma->dma_tag);
3347fail_0:
3348	dma->dma_tag = NULL;
3349
3350	return (error);
3351}
3352
3353static void
3354em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3355{
3356	if (dma->dma_tag == NULL)
3357		return;
3358	if (dma->dma_paddr != 0) {
3359		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3360		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3361		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3362		dma->dma_paddr = 0;
3363	}
3364	if (dma->dma_vaddr != NULL) {
3365		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3366		dma->dma_vaddr = NULL;
3367	}
3368	bus_dma_tag_destroy(dma->dma_tag);
3369	dma->dma_tag = NULL;
3370}
3371
3372
3373/*********************************************************************
3374 *
3375 *  Allocate memory for the transmit and receive rings, and then
3376 *  the descriptors associated with each, called only once at attach.
3377 *
3378 **********************************************************************/
3379static int
3380em_allocate_queues(struct adapter *adapter)
3381{
3382	device_t		dev = adapter->dev;
3383	struct tx_ring		*txr = NULL;
3384	struct rx_ring		*rxr = NULL;
3385	int rsize, tsize, error = E1000_SUCCESS;
3386	int txconf = 0, rxconf = 0;
3387
3388
3389	/* Allocate the TX ring struct memory */
3390	if (!(adapter->tx_rings =
3391	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3392	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3393		device_printf(dev, "Unable to allocate TX ring memory\n");
3394		error = ENOMEM;
3395		goto fail;
3396	}
3397
3398	/* Now allocate the RX */
3399	if (!(adapter->rx_rings =
3400	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3401	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3402		device_printf(dev, "Unable to allocate RX ring memory\n");
3403		error = ENOMEM;
3404		goto rx_fail;
3405	}
3406
3407	tsize = roundup2(adapter->num_tx_desc *
3408	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3409	/*
3410	 * Now set up the TX queues, txconf is needed to handle the
3411	 * possibility that things fail midcourse and we need to
3412	 * undo memory gracefully
3413	 */
3414	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3415		/* Set up some basics */
3416		txr = &adapter->tx_rings[i];
3417		txr->adapter = adapter;
3418		txr->me = i;
3419
3420		/* Initialize the TX lock */
3421		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3422		    device_get_nameunit(dev), txr->me);
3423		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3424
3425		if (em_dma_malloc(adapter, tsize,
3426			&txr->txdma, BUS_DMA_NOWAIT)) {
3427			device_printf(dev,
3428			    "Unable to allocate TX Descriptor memory\n");
3429			error = ENOMEM;
3430			goto err_tx_desc;
3431		}
3432		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3433		bzero((void *)txr->tx_base, tsize);
3434
3435        	if (em_allocate_transmit_buffers(txr)) {
3436			device_printf(dev,
3437			    "Critical Failure setting up transmit buffers\n");
3438			error = ENOMEM;
3439			goto err_tx_desc;
3440        	}
3441#if __FreeBSD_version >= 800000
3442		/* Allocate a buf ring */
3443		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3444		    M_WAITOK, &txr->tx_mtx);
3445#endif
3446	}
3447
3448	/*
3449	 * Next the RX queues...
3450	 */
3451	rsize = roundup2(adapter->num_rx_desc *
3452	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3453	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3454		rxr = &adapter->rx_rings[i];
3455		rxr->adapter = adapter;
3456		rxr->me = i;
3457
3458		/* Initialize the RX lock */
3459		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3460		    device_get_nameunit(dev), txr->me);
3461		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3462
3463		if (em_dma_malloc(adapter, rsize,
3464			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3465			device_printf(dev,
3466			    "Unable to allocate RxDescriptor memory\n");
3467			error = ENOMEM;
3468			goto err_rx_desc;
3469		}
3470		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3471		bzero((void *)rxr->rx_base, rsize);
3472
3473        	/* Allocate receive buffers for the ring*/
3474		if (em_allocate_receive_buffers(rxr)) {
3475			device_printf(dev,
3476			    "Critical Failure setting up receive buffers\n");
3477			error = ENOMEM;
3478			goto err_rx_desc;
3479		}
3480	}
3481
3482	return (0);
3483
3484err_rx_desc:
3485	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3486		em_dma_free(adapter, &rxr->rxdma);
3487err_tx_desc:
3488	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3489		em_dma_free(adapter, &txr->txdma);
3490	free(adapter->rx_rings, M_DEVBUF);
3491rx_fail:
3492#if __FreeBSD_version >= 800000
3493	buf_ring_free(txr->br, M_DEVBUF);
3494#endif
3495	free(adapter->tx_rings, M_DEVBUF);
3496fail:
3497	return (error);
3498}
3499
3500
3501/*********************************************************************
3502 *
3503 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3504 *  the information needed to transmit a packet on the wire. This is
3505 *  called only once at attach, setup is done every reset.
3506 *
3507 **********************************************************************/
3508static int
3509em_allocate_transmit_buffers(struct tx_ring *txr)
3510{
3511	struct adapter *adapter = txr->adapter;
3512	device_t dev = adapter->dev;
3513	struct em_txbuffer *txbuf;
3514	int error, i;
3515
3516	/*
3517	 * Setup DMA descriptor areas.
3518	 */
3519	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3520			       1, 0,			/* alignment, bounds */
3521			       BUS_SPACE_MAXADDR,	/* lowaddr */
3522			       BUS_SPACE_MAXADDR,	/* highaddr */
3523			       NULL, NULL,		/* filter, filterarg */
3524			       EM_TSO_SIZE,		/* maxsize */
3525			       EM_MAX_SCATTER,		/* nsegments */
3526			       PAGE_SIZE,		/* maxsegsize */
3527			       0,			/* flags */
3528			       NULL,			/* lockfunc */
3529			       NULL,			/* lockfuncarg */
3530			       &txr->txtag))) {
3531		device_printf(dev,"Unable to allocate TX DMA tag\n");
3532		goto fail;
3533	}
3534
3535	if (!(txr->tx_buffers =
3536	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3537	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3538		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3539		error = ENOMEM;
3540		goto fail;
3541	}
3542
3543        /* Create the descriptor buffer dma maps */
3544	txbuf = txr->tx_buffers;
3545	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3546		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3547		if (error != 0) {
3548			device_printf(dev, "Unable to create TX DMA map\n");
3549			goto fail;
3550		}
3551	}
3552
3553	return 0;
3554fail:
3555	/* We free all, it handles case where we are in the middle */
3556	em_free_transmit_structures(adapter);
3557	return (error);
3558}
3559
3560/*********************************************************************
3561 *
3562 *  Initialize a transmit ring.
3563 *
3564 **********************************************************************/
3565static void
3566em_setup_transmit_ring(struct tx_ring *txr)
3567{
3568	struct adapter *adapter = txr->adapter;
3569	struct em_txbuffer *txbuf;
3570	int i;
3571#ifdef DEV_NETMAP
3572	struct netmap_slot *slot;
3573	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3574#endif /* DEV_NETMAP */
3575
3576	/* Clear the old descriptor contents */
3577	EM_TX_LOCK(txr);
3578#ifdef DEV_NETMAP
3579	slot = netmap_reset(na, NR_TX, txr->me, 0);
3580#endif /* DEV_NETMAP */
3581
3582	bzero((void *)txr->tx_base,
3583	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3584	/* Reset indices */
3585	txr->next_avail_desc = 0;
3586	txr->next_to_clean = 0;
3587
3588	/* Free any existing tx buffers. */
3589        txbuf = txr->tx_buffers;
3590	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3591		if (txbuf->m_head != NULL) {
3592			bus_dmamap_sync(txr->txtag, txbuf->map,
3593			    BUS_DMASYNC_POSTWRITE);
3594			bus_dmamap_unload(txr->txtag, txbuf->map);
3595			m_freem(txbuf->m_head);
3596			txbuf->m_head = NULL;
3597		}
3598#ifdef DEV_NETMAP
3599		if (slot) {
3600			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3601			uint64_t paddr;
3602			void *addr;
3603
3604			addr = PNMB(na, slot + si, &paddr);
3605			txr->tx_base[i].buffer_addr = htole64(paddr);
3606			/* reload the map for netmap mode */
3607			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3608		}
3609#endif /* DEV_NETMAP */
3610
3611		/* clear the watch index */
3612		txbuf->next_eop = -1;
3613        }
3614
3615	/* Set number of descriptors available */
3616	txr->tx_avail = adapter->num_tx_desc;
3617	txr->busy = EM_TX_IDLE;
3618
3619	/* Clear checksum offload context. */
3620	txr->last_hw_offload = 0;
3621	txr->last_hw_ipcss = 0;
3622	txr->last_hw_ipcso = 0;
3623	txr->last_hw_tucss = 0;
3624	txr->last_hw_tucso = 0;
3625
3626	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3627	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3628	EM_TX_UNLOCK(txr);
3629}
3630
3631/*********************************************************************
3632 *
3633 *  Initialize all transmit rings.
3634 *
3635 **********************************************************************/
3636static void
3637em_setup_transmit_structures(struct adapter *adapter)
3638{
3639	struct tx_ring *txr = adapter->tx_rings;
3640
3641	for (int i = 0; i < adapter->num_queues; i++, txr++)
3642		em_setup_transmit_ring(txr);
3643
3644	return;
3645}
3646
3647/*********************************************************************
3648 *
3649 *  Enable transmit unit.
3650 *
3651 **********************************************************************/
3652static void
3653em_initialize_transmit_unit(struct adapter *adapter)
3654{
3655	struct tx_ring	*txr = adapter->tx_rings;
3656	struct e1000_hw	*hw = &adapter->hw;
3657	u32	tctl, txdctl = 0, tarc, tipg = 0;
3658
3659	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3660
3661	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3662		u64 bus_addr = txr->txdma.dma_paddr;
3663		/* Base and Len of TX Ring */
3664		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3665	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3666		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3667	    	    (u32)(bus_addr >> 32));
3668		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3669	    	    (u32)bus_addr);
3670		/* Init the HEAD/TAIL indices */
3671		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3672		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3673
3674		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3675		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3676		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3677
3678		txr->busy = EM_TX_IDLE;
3679		txdctl = 0; /* clear txdctl */
3680                txdctl |= 0x1f; /* PTHRESH */
3681                txdctl |= 1 << 8; /* HTHRESH */
3682                txdctl |= 1 << 16;/* WTHRESH */
3683		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3684		txdctl |= E1000_TXDCTL_GRAN;
3685                txdctl |= 1 << 25; /* LWTHRESH */
3686
3687                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3688	}
3689
3690	/* Set the default values for the Tx Inter Packet Gap timer */
3691	switch (adapter->hw.mac.type) {
3692	case e1000_80003es2lan:
3693		tipg = DEFAULT_82543_TIPG_IPGR1;
3694		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3695		    E1000_TIPG_IPGR2_SHIFT;
3696		break;
3697	default:
3698		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3699		    (adapter->hw.phy.media_type ==
3700		    e1000_media_type_internal_serdes))
3701			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3702		else
3703			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3704		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3705		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3706	}
3707
3708	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3709	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3710
3711	if(adapter->hw.mac.type >= e1000_82540)
3712		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3713		    adapter->tx_abs_int_delay.value);
3714
3715	if ((adapter->hw.mac.type == e1000_82571) ||
3716	    (adapter->hw.mac.type == e1000_82572)) {
3717		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3718		tarc |= TARC_SPEED_MODE_BIT;
3719		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3720	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3721		/* errata: program both queues to unweighted RR */
3722		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3723		tarc |= 1;
3724		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3725		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3726		tarc |= 1;
3727		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3728	} else if (adapter->hw.mac.type == e1000_82574) {
3729		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3730		tarc |= TARC_ERRATA_BIT;
3731		if ( adapter->num_queues > 1) {
3732			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3733			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3734			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3735		} else
3736			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3737	}
3738
3739	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3740	if (adapter->tx_int_delay.value > 0)
3741		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3742
3743	/* Program the Transmit Control Register */
3744	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3745	tctl &= ~E1000_TCTL_CT;
3746	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3747		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3748
3749	if (adapter->hw.mac.type >= e1000_82571)
3750		tctl |= E1000_TCTL_MULR;
3751
3752	/* This write will effectively turn on the transmit unit. */
3753	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3754
3755	if (hw->mac.type == e1000_pch_spt) {
3756		u32 reg;
3757		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3758		reg |= E1000_RCTL_RDMTS_HEX;
3759		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3760		reg = E1000_READ_REG(hw, E1000_TARC(0));
3761		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3762		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3763	}
3764}
3765
3766
3767/*********************************************************************
3768 *
3769 *  Free all transmit rings.
3770 *
3771 **********************************************************************/
3772static void
3773em_free_transmit_structures(struct adapter *adapter)
3774{
3775	struct tx_ring *txr = adapter->tx_rings;
3776
3777	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3778		EM_TX_LOCK(txr);
3779		em_free_transmit_buffers(txr);
3780		em_dma_free(adapter, &txr->txdma);
3781		EM_TX_UNLOCK(txr);
3782		EM_TX_LOCK_DESTROY(txr);
3783	}
3784
3785	free(adapter->tx_rings, M_DEVBUF);
3786}
3787
3788/*********************************************************************
3789 *
3790 *  Free transmit ring related data structures.
3791 *
3792 **********************************************************************/
3793static void
3794em_free_transmit_buffers(struct tx_ring *txr)
3795{
3796	struct adapter		*adapter = txr->adapter;
3797	struct em_txbuffer	*txbuf;
3798
3799	INIT_DEBUGOUT("free_transmit_ring: begin");
3800
3801	if (txr->tx_buffers == NULL)
3802		return;
3803
3804	for (int i = 0; i < adapter->num_tx_desc; i++) {
3805		txbuf = &txr->tx_buffers[i];
3806		if (txbuf->m_head != NULL) {
3807			bus_dmamap_sync(txr->txtag, txbuf->map,
3808			    BUS_DMASYNC_POSTWRITE);
3809			bus_dmamap_unload(txr->txtag,
3810			    txbuf->map);
3811			m_freem(txbuf->m_head);
3812			txbuf->m_head = NULL;
3813			if (txbuf->map != NULL) {
3814				bus_dmamap_destroy(txr->txtag,
3815				    txbuf->map);
3816				txbuf->map = NULL;
3817			}
3818		} else if (txbuf->map != NULL) {
3819			bus_dmamap_unload(txr->txtag,
3820			    txbuf->map);
3821			bus_dmamap_destroy(txr->txtag,
3822			    txbuf->map);
3823			txbuf->map = NULL;
3824		}
3825	}
3826#if __FreeBSD_version >= 800000
3827	if (txr->br != NULL)
3828		buf_ring_free(txr->br, M_DEVBUF);
3829#endif
3830	if (txr->tx_buffers != NULL) {
3831		free(txr->tx_buffers, M_DEVBUF);
3832		txr->tx_buffers = NULL;
3833	}
3834	if (txr->txtag != NULL) {
3835		bus_dma_tag_destroy(txr->txtag);
3836		txr->txtag = NULL;
3837	}
3838	return;
3839}
3840
3841
3842/*********************************************************************
3843 *  The offload context is protocol specific (TCP/UDP) and thus
3844 *  only needs to be set when the protocol changes. The occasion
3845 *  of a context change can be a performance detriment, and
3846 *  might be better just disabled. The reason arises in the way
3847 *  in which the controller supports pipelined requests from the
3848 *  Tx data DMA. Up to four requests can be pipelined, and they may
3849 *  belong to the same packet or to multiple packets. However all
3850 *  requests for one packet are issued before a request is issued
3851 *  for a subsequent packet and if a request for the next packet
3852 *  requires a context change, that request will be stalled
3853 *  until the previous request completes. This means setting up
3854 *  a new context effectively disables pipelined Tx data DMA which
3855 *  in turn greatly slow down performance to send small sized
3856 *  frames.
3857 **********************************************************************/
3858static void
3859em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3860    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3861{
3862	struct adapter			*adapter = txr->adapter;
3863	struct e1000_context_desc	*TXD = NULL;
3864	struct em_txbuffer		*tx_buffer;
3865	int				cur, hdr_len;
3866	u32				cmd = 0;
3867	u16				offload = 0;
3868	u8				ipcso, ipcss, tucso, tucss;
3869
3870	ipcss = ipcso = tucss = tucso = 0;
3871	hdr_len = ip_off + (ip->ip_hl << 2);
3872	cur = txr->next_avail_desc;
3873
3874	/* Setup of IP header checksum. */
3875	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3876		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3877		offload |= CSUM_IP;
3878		ipcss = ip_off;
3879		ipcso = ip_off + offsetof(struct ip, ip_sum);
3880		/*
3881		 * Start offset for header checksum calculation.
3882		 * End offset for header checksum calculation.
3883		 * Offset of place to put the checksum.
3884		 */
3885		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3886		TXD->lower_setup.ip_fields.ipcss = ipcss;
3887		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3888		TXD->lower_setup.ip_fields.ipcso = ipcso;
3889		cmd |= E1000_TXD_CMD_IP;
3890	}
3891
3892	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3893 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3894 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3895 		offload |= CSUM_TCP;
3896 		tucss = hdr_len;
3897 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3898		/*
3899		 * The 82574L can only remember the *last* context used
3900		 * regardless of queue that it was use for.  We cannot reuse
3901		 * contexts on this hardware platform and must generate a new
3902		 * context every time.  82574L hardware spec, section 7.2.6,
3903		 * second note.
3904		 */
3905		if (adapter->num_queues < 2) {
3906 			/*
3907 		 	* Setting up new checksum offload context for every
3908			* frames takes a lot of processing time for hardware.
3909			* This also reduces performance a lot for small sized
3910			* frames so avoid it if driver can use previously
3911			* configured checksum offload context.
3912 		 	*/
3913 			if (txr->last_hw_offload == offload) {
3914 				if (offload & CSUM_IP) {
3915 					if (txr->last_hw_ipcss == ipcss &&
3916 				    	txr->last_hw_ipcso == ipcso &&
3917 				    	txr->last_hw_tucss == tucss &&
3918 				    	txr->last_hw_tucso == tucso)
3919 						return;
3920 				} else {
3921 					if (txr->last_hw_tucss == tucss &&
3922 				    	txr->last_hw_tucso == tucso)
3923 						return;
3924 				}
3925  			}
3926 			txr->last_hw_offload = offload;
3927 			txr->last_hw_tucss = tucss;
3928 			txr->last_hw_tucso = tucso;
3929		}
3930 		/*
3931 		 * Start offset for payload checksum calculation.
3932 		 * End offset for payload checksum calculation.
3933 		 * Offset of place to put the checksum.
3934 		 */
3935		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3936 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3937 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3938 		TXD->upper_setup.tcp_fields.tucso = tucso;
3939 		cmd |= E1000_TXD_CMD_TCP;
3940 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3941 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3942 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3943 		tucss = hdr_len;
3944 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3945		/*
3946		 * The 82574L can only remember the *last* context used
3947		 * regardless of queue that it was use for.  We cannot reuse
3948		 * contexts on this hardware platform and must generate a new
3949		 * context every time.  82574L hardware spec, section 7.2.6,
3950		 * second note.
3951		 */
3952		if (adapter->num_queues < 2) {
3953 			/*
3954 		 	* Setting up new checksum offload context for every
3955			* frames takes a lot of processing time for hardware.
3956			* This also reduces performance a lot for small sized
3957			* frames so avoid it if driver can use previously
3958			* configured checksum offload context.
3959 		 	*/
3960 			if (txr->last_hw_offload == offload) {
3961 				if (offload & CSUM_IP) {
3962 					if (txr->last_hw_ipcss == ipcss &&
3963 				    	txr->last_hw_ipcso == ipcso &&
3964 				    	txr->last_hw_tucss == tucss &&
3965 				    	txr->last_hw_tucso == tucso)
3966 						return;
3967 				} else {
3968 					if (txr->last_hw_tucss == tucss &&
3969 				    	txr->last_hw_tucso == tucso)
3970 						return;
3971 				}
3972 			}
3973 			txr->last_hw_offload = offload;
3974 			txr->last_hw_tucss = tucss;
3975 			txr->last_hw_tucso = tucso;
3976		}
3977 		/*
3978 		 * Start offset for header checksum calculation.
3979 		 * End offset for header checksum calculation.
3980 		 * Offset of place to put the checksum.
3981 		 */
3982		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3983 		TXD->upper_setup.tcp_fields.tucss = tucss;
3984 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3985 		TXD->upper_setup.tcp_fields.tucso = tucso;
3986  	}
3987
3988 	if (offload & CSUM_IP) {
3989 		txr->last_hw_ipcss = ipcss;
3990 		txr->last_hw_ipcso = ipcso;
3991  	}
3992
3993	TXD->tcp_seg_setup.data = htole32(0);
3994	TXD->cmd_and_length =
3995	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3996	tx_buffer = &txr->tx_buffers[cur];
3997	tx_buffer->m_head = NULL;
3998	tx_buffer->next_eop = -1;
3999
4000	if (++cur == adapter->num_tx_desc)
4001		cur = 0;
4002
4003	txr->tx_avail--;
4004	txr->next_avail_desc = cur;
4005}
4006
4007
4008/**********************************************************************
4009 *
4010 *  Setup work for hardware segmentation offload (TSO)
4011 *
4012 **********************************************************************/
4013static void
4014em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4015    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4016{
4017	struct adapter			*adapter = txr->adapter;
4018	struct e1000_context_desc	*TXD;
4019	struct em_txbuffer		*tx_buffer;
4020	int cur, hdr_len;
4021
4022	/*
4023	 * In theory we can use the same TSO context if and only if
4024	 * frame is the same type(IP/TCP) and the same MSS. However
4025	 * checking whether a frame has the same IP/TCP structure is
4026	 * hard thing so just ignore that and always restablish a
4027	 * new TSO context.
4028	 */
4029	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4030	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4031		      E1000_TXD_DTYP_D |	/* Data descr type */
4032		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4033
4034	/* IP and/or TCP header checksum calculation and insertion. */
4035	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4036
4037	cur = txr->next_avail_desc;
4038	tx_buffer = &txr->tx_buffers[cur];
4039	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4040
4041	/*
4042	 * Start offset for header checksum calculation.
4043	 * End offset for header checksum calculation.
4044	 * Offset of place put the checksum.
4045	 */
4046	TXD->lower_setup.ip_fields.ipcss = ip_off;
4047	TXD->lower_setup.ip_fields.ipcse =
4048	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4049	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4050	/*
4051	 * Start offset for payload checksum calculation.
4052	 * End offset for payload checksum calculation.
4053	 * Offset of place to put the checksum.
4054	 */
4055	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4056	TXD->upper_setup.tcp_fields.tucse = 0;
4057	TXD->upper_setup.tcp_fields.tucso =
4058	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4059	/*
4060	 * Payload size per packet w/o any headers.
4061	 * Length of all headers up to payload.
4062	 */
4063	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4064	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4065
4066	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4067				E1000_TXD_CMD_DEXT |	/* Extended descr */
4068				E1000_TXD_CMD_TSE |	/* TSE context */
4069				E1000_TXD_CMD_IP |	/* Do IP csum */
4070				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4071				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4072
4073	tx_buffer->m_head = NULL;
4074	tx_buffer->next_eop = -1;
4075
4076	if (++cur == adapter->num_tx_desc)
4077		cur = 0;
4078
4079	txr->tx_avail--;
4080	txr->next_avail_desc = cur;
4081	txr->tx_tso = TRUE;
4082}
4083
4084
4085/**********************************************************************
4086 *
4087 *  Examine each tx_buffer in the used queue. If the hardware is done
4088 *  processing the packet then free associated resources. The
4089 *  tx_buffer is put back on the free queue.
4090 *
4091 **********************************************************************/
4092static void
4093em_txeof(struct tx_ring *txr)
4094{
4095	struct adapter	*adapter = txr->adapter;
4096        int first, last, done, processed;
4097        struct em_txbuffer *tx_buffer;
4098        struct e1000_tx_desc   *tx_desc, *eop_desc;
4099	if_t ifp = adapter->ifp;
4100
4101	EM_TX_LOCK_ASSERT(txr);
4102#ifdef DEV_NETMAP
4103	if (netmap_tx_irq(ifp, txr->me))
4104		return;
4105#endif /* DEV_NETMAP */
4106
4107	/* No work, make sure hang detection is disabled */
4108        if (txr->tx_avail == adapter->num_tx_desc) {
4109		txr->busy = EM_TX_IDLE;
4110                return;
4111	}
4112
4113	processed = 0;
4114        first = txr->next_to_clean;
4115        tx_desc = &txr->tx_base[first];
4116        tx_buffer = &txr->tx_buffers[first];
4117	last = tx_buffer->next_eop;
4118        eop_desc = &txr->tx_base[last];
4119
4120	/*
4121	 * What this does is get the index of the
4122	 * first descriptor AFTER the EOP of the
4123	 * first packet, that way we can do the
4124	 * simple comparison on the inner while loop.
4125	 */
4126	if (++last == adapter->num_tx_desc)
4127 		last = 0;
4128	done = last;
4129
4130        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4131            BUS_DMASYNC_POSTREAD);
4132
4133        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4134		/* We clean the range of the packet */
4135		while (first != done) {
4136                	tx_desc->upper.data = 0;
4137                	tx_desc->lower.data = 0;
4138                	tx_desc->buffer_addr = 0;
4139                	++txr->tx_avail;
4140			++processed;
4141
4142			if (tx_buffer->m_head) {
4143				bus_dmamap_sync(txr->txtag,
4144				    tx_buffer->map,
4145				    BUS_DMASYNC_POSTWRITE);
4146				bus_dmamap_unload(txr->txtag,
4147				    tx_buffer->map);
4148                        	m_freem(tx_buffer->m_head);
4149                        	tx_buffer->m_head = NULL;
4150                	}
4151			tx_buffer->next_eop = -1;
4152
4153	                if (++first == adapter->num_tx_desc)
4154				first = 0;
4155
4156	                tx_buffer = &txr->tx_buffers[first];
4157			tx_desc = &txr->tx_base[first];
4158		}
4159		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4160		/* See if we can continue to the next packet */
4161		last = tx_buffer->next_eop;
4162		if (last != -1) {
4163        		eop_desc = &txr->tx_base[last];
4164			/* Get new done point */
4165			if (++last == adapter->num_tx_desc) last = 0;
4166			done = last;
4167		} else
4168			break;
4169        }
4170        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4171            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4172
4173        txr->next_to_clean = first;
4174
4175	/*
4176	** Hang detection: we know there's work outstanding
4177	** or the entry return would have been taken, so no
4178	** descriptor processed here indicates a potential hang.
4179	** The local timer will examine this and do a reset if needed.
4180	*/
4181	if (processed == 0) {
4182		if (txr->busy != EM_TX_HUNG)
4183			++txr->busy;
4184	} else /* At least one descriptor was cleaned */
4185		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4186
4187        /*
4188         * If we have a minimum free, clear IFF_DRV_OACTIVE
4189         * to tell the stack that it is OK to send packets.
4190	 * Notice that all writes of OACTIVE happen under the
4191	 * TX lock which, with a single queue, guarantees
4192	 * sanity.
4193         */
4194        if (txr->tx_avail >= EM_MAX_SCATTER) {
4195		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4196	}
4197
4198	/* Disable hang detection if all clean */
4199	if (txr->tx_avail == adapter->num_tx_desc)
4200		txr->busy = EM_TX_IDLE;
4201}
4202
4203/*********************************************************************
4204 *
4205 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4206 *
4207 **********************************************************************/
4208static void
4209em_refresh_mbufs(struct rx_ring *rxr, int limit)
4210{
4211	struct adapter		*adapter = rxr->adapter;
4212	struct mbuf		*m;
4213	bus_dma_segment_t	segs;
4214	struct em_rxbuffer	*rxbuf;
4215	int			i, j, error, nsegs;
4216	bool			cleaned = FALSE;
4217
4218	i = j = rxr->next_to_refresh;
4219	/*
4220	** Get one descriptor beyond
4221	** our work mark to control
4222	** the loop.
4223	*/
4224	if (++j == adapter->num_rx_desc)
4225		j = 0;
4226
4227	while (j != limit) {
4228		rxbuf = &rxr->rx_buffers[i];
4229		if (rxbuf->m_head == NULL) {
4230			m = m_getjcl(M_NOWAIT, MT_DATA,
4231			    M_PKTHDR, adapter->rx_mbuf_sz);
4232			/*
4233			** If we have a temporary resource shortage
4234			** that causes a failure, just abort refresh
4235			** for now, we will return to this point when
4236			** reinvoked from em_rxeof.
4237			*/
4238			if (m == NULL)
4239				goto update;
4240		} else
4241			m = rxbuf->m_head;
4242
4243		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4244		m->m_flags |= M_PKTHDR;
4245		m->m_data = m->m_ext.ext_buf;
4246
4247		/* Use bus_dma machinery to setup the memory mapping  */
4248		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4249		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4250		if (error != 0) {
4251			printf("Refresh mbufs: hdr dmamap load"
4252			    " failure - %d\n", error);
4253			m_free(m);
4254			rxbuf->m_head = NULL;
4255			goto update;
4256		}
4257		rxbuf->m_head = m;
4258		rxbuf->paddr = segs.ds_addr;
4259		bus_dmamap_sync(rxr->rxtag,
4260		    rxbuf->map, BUS_DMASYNC_PREREAD);
4261		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4262		cleaned = TRUE;
4263
4264		i = j; /* Next is precalulated for us */
4265		rxr->next_to_refresh = i;
4266		/* Calculate next controlling index */
4267		if (++j == adapter->num_rx_desc)
4268			j = 0;
4269	}
4270update:
4271	/*
4272	** Update the tail pointer only if,
4273	** and as far as we have refreshed.
4274	*/
4275	if (cleaned)
4276		E1000_WRITE_REG(&adapter->hw,
4277		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4278
4279	return;
4280}
4281
4282
4283/*********************************************************************
4284 *
4285 *  Allocate memory for rx_buffer structures. Since we use one
4286 *  rx_buffer per received packet, the maximum number of rx_buffer's
4287 *  that we'll need is equal to the number of receive descriptors
4288 *  that we've allocated.
4289 *
4290 **********************************************************************/
4291static int
4292em_allocate_receive_buffers(struct rx_ring *rxr)
4293{
4294	struct adapter		*adapter = rxr->adapter;
4295	device_t		dev = adapter->dev;
4296	struct em_rxbuffer	*rxbuf;
4297	int			error;
4298
4299	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4300	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4301	if (rxr->rx_buffers == NULL) {
4302		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4303		return (ENOMEM);
4304	}
4305
4306	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4307				1, 0,			/* alignment, bounds */
4308				BUS_SPACE_MAXADDR,	/* lowaddr */
4309				BUS_SPACE_MAXADDR,	/* highaddr */
4310				NULL, NULL,		/* filter, filterarg */
4311				MJUM9BYTES,		/* maxsize */
4312				1,			/* nsegments */
4313				MJUM9BYTES,		/* maxsegsize */
4314				0,			/* flags */
4315				NULL,			/* lockfunc */
4316				NULL,			/* lockarg */
4317				&rxr->rxtag);
4318	if (error) {
4319		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4320		    __func__, error);
4321		goto fail;
4322	}
4323
4324	rxbuf = rxr->rx_buffers;
4325	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4326		rxbuf = &rxr->rx_buffers[i];
4327		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4328		if (error) {
4329			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4330			    __func__, error);
4331			goto fail;
4332		}
4333	}
4334
4335	return (0);
4336
4337fail:
4338	em_free_receive_structures(adapter);
4339	return (error);
4340}
4341
4342
4343/*********************************************************************
4344 *
4345 *  Initialize a receive ring and its buffers.
4346 *
4347 **********************************************************************/
4348static int
4349em_setup_receive_ring(struct rx_ring *rxr)
4350{
4351	struct	adapter 	*adapter = rxr->adapter;
4352	struct em_rxbuffer	*rxbuf;
4353	bus_dma_segment_t	seg[1];
4354	int			rsize, nsegs, error = 0;
4355#ifdef DEV_NETMAP
4356	struct netmap_slot *slot;
4357	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4358#endif
4359
4360
4361	/* Clear the ring contents */
4362	EM_RX_LOCK(rxr);
4363	rsize = roundup2(adapter->num_rx_desc *
4364	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4365	bzero((void *)rxr->rx_base, rsize);
4366#ifdef DEV_NETMAP
4367	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4368#endif
4369
4370	/*
4371	** Free current RX buffer structs and their mbufs
4372	*/
4373	for (int i = 0; i < adapter->num_rx_desc; i++) {
4374		rxbuf = &rxr->rx_buffers[i];
4375		if (rxbuf->m_head != NULL) {
4376			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4377			    BUS_DMASYNC_POSTREAD);
4378			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4379			m_freem(rxbuf->m_head);
4380			rxbuf->m_head = NULL; /* mark as freed */
4381		}
4382	}
4383
4384	/* Now replenish the mbufs */
4385        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4386		rxbuf = &rxr->rx_buffers[j];
4387#ifdef DEV_NETMAP
4388		if (slot) {
4389			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4390			uint64_t paddr;
4391			void *addr;
4392
4393			addr = PNMB(na, slot + si, &paddr);
4394			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4395			rxbuf->paddr = paddr;
4396			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4397			continue;
4398		}
4399#endif /* DEV_NETMAP */
4400		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4401		    M_PKTHDR, adapter->rx_mbuf_sz);
4402		if (rxbuf->m_head == NULL) {
4403			error = ENOBUFS;
4404			goto fail;
4405		}
4406		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4407		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4408		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4409
4410		/* Get the memory mapping */
4411		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4412		    rxbuf->map, rxbuf->m_head, seg,
4413		    &nsegs, BUS_DMA_NOWAIT);
4414		if (error != 0) {
4415			m_freem(rxbuf->m_head);
4416			rxbuf->m_head = NULL;
4417			goto fail;
4418		}
4419		bus_dmamap_sync(rxr->rxtag,
4420		    rxbuf->map, BUS_DMASYNC_PREREAD);
4421
4422		rxbuf->paddr = seg[0].ds_addr;
4423		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4424	}
4425	rxr->next_to_check = 0;
4426	rxr->next_to_refresh = 0;
4427	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4428	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4429
4430fail:
4431	EM_RX_UNLOCK(rxr);
4432	return (error);
4433}
4434
4435/*********************************************************************
4436 *
4437 *  Initialize all receive rings.
4438 *
4439 **********************************************************************/
4440static int
4441em_setup_receive_structures(struct adapter *adapter)
4442{
4443	struct rx_ring *rxr = adapter->rx_rings;
4444	int q;
4445
4446	for (q = 0; q < adapter->num_queues; q++, rxr++)
4447		if (em_setup_receive_ring(rxr))
4448			goto fail;
4449
4450	return (0);
4451fail:
4452	/*
4453	 * Free RX buffers allocated so far, we will only handle
4454	 * the rings that completed, the failing case will have
4455	 * cleaned up for itself. 'q' failed, so its the terminus.
4456	 */
4457	for (int i = 0; i < q; ++i) {
4458		rxr = &adapter->rx_rings[i];
4459		for (int n = 0; n < adapter->num_rx_desc; n++) {
4460			struct em_rxbuffer *rxbuf;
4461			rxbuf = &rxr->rx_buffers[n];
4462			if (rxbuf->m_head != NULL) {
4463				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4464			  	  BUS_DMASYNC_POSTREAD);
4465				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4466				m_freem(rxbuf->m_head);
4467				rxbuf->m_head = NULL;
4468			}
4469		}
4470		rxr->next_to_check = 0;
4471		rxr->next_to_refresh = 0;
4472	}
4473
4474	return (ENOBUFS);
4475}
4476
4477/*********************************************************************
4478 *
4479 *  Free all receive rings.
4480 *
4481 **********************************************************************/
4482static void
4483em_free_receive_structures(struct adapter *adapter)
4484{
4485	struct rx_ring *rxr = adapter->rx_rings;
4486
4487	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4488		em_free_receive_buffers(rxr);
4489		/* Free the ring memory as well */
4490		em_dma_free(adapter, &rxr->rxdma);
4491		EM_RX_LOCK_DESTROY(rxr);
4492	}
4493
4494	free(adapter->rx_rings, M_DEVBUF);
4495}
4496
4497
4498/*********************************************************************
4499 *
4500 *  Free receive ring data structures
4501 *
4502 **********************************************************************/
4503static void
4504em_free_receive_buffers(struct rx_ring *rxr)
4505{
4506	struct adapter		*adapter = rxr->adapter;
4507	struct em_rxbuffer	*rxbuf = NULL;
4508
4509	INIT_DEBUGOUT("free_receive_buffers: begin");
4510
4511	if (rxr->rx_buffers != NULL) {
4512		for (int i = 0; i < adapter->num_rx_desc; i++) {
4513			rxbuf = &rxr->rx_buffers[i];
4514			if (rxbuf->map != NULL) {
4515				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4516				    BUS_DMASYNC_POSTREAD);
4517				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4518				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4519			}
4520			if (rxbuf->m_head != NULL) {
4521				m_freem(rxbuf->m_head);
4522				rxbuf->m_head = NULL;
4523			}
4524		}
4525		free(rxr->rx_buffers, M_DEVBUF);
4526		rxr->rx_buffers = NULL;
4527		rxr->next_to_check = 0;
4528		rxr->next_to_refresh = 0;
4529	}
4530
4531	if (rxr->rxtag != NULL) {
4532		bus_dma_tag_destroy(rxr->rxtag);
4533		rxr->rxtag = NULL;
4534	}
4535
4536	return;
4537}
4538
4539
4540/*********************************************************************
4541 *
4542 *  Enable receive unit.
4543 *
4544 **********************************************************************/
4545
4546static void
4547em_initialize_receive_unit(struct adapter *adapter)
4548{
4549	struct rx_ring *rxr = adapter->rx_rings;
4550	if_t ifp = adapter->ifp;
4551	struct e1000_hw	*hw = &adapter->hw;
4552	u32	rctl, rxcsum, rfctl;
4553
4554	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4555
4556	/*
4557	 * Make sure receives are disabled while setting
4558	 * up the descriptor ring
4559	 */
4560	rctl = E1000_READ_REG(hw, E1000_RCTL);
4561	/* Do not disable if ever enabled on this hardware */
4562	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4563		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4564
4565	/* Setup the Receive Control Register */
4566	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4567	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4568	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4569	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4570
4571	/* Do not store bad packets */
4572	rctl &= ~E1000_RCTL_SBP;
4573
4574	/* Enable Long Packet receive */
4575	if (if_getmtu(ifp) > ETHERMTU)
4576		rctl |= E1000_RCTL_LPE;
4577	else
4578		rctl &= ~E1000_RCTL_LPE;
4579
4580        /* Strip the CRC */
4581        if (!em_disable_crc_stripping)
4582		rctl |= E1000_RCTL_SECRC;
4583
4584	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4585	    adapter->rx_abs_int_delay.value);
4586
4587	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4588	    adapter->rx_int_delay.value);
4589	/*
4590	 * Set the interrupt throttling rate. Value is calculated
4591	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4592	 */
4593	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4594
4595	/* Use extended rx descriptor formats */
4596	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4597	rfctl |= E1000_RFCTL_EXTEN;
4598	/*
4599	** When using MSIX interrupts we need to throttle
4600	** using the EITR register (82574 only)
4601	*/
4602	if (hw->mac.type == e1000_82574) {
4603		for (int i = 0; i < 4; i++)
4604			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4605			    DEFAULT_ITR);
4606		/* Disable accelerated acknowledge */
4607		rfctl |= E1000_RFCTL_ACK_DIS;
4608	}
4609	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4610
4611	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4612	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4613#ifdef EM_MULTIQUEUE
4614		rxcsum |= E1000_RXCSUM_TUOFL |
4615			  E1000_RXCSUM_IPOFL |
4616			  E1000_RXCSUM_PCSD;
4617#else
4618		rxcsum |= E1000_RXCSUM_TUOFL;
4619#endif
4620	} else
4621		rxcsum &= ~E1000_RXCSUM_TUOFL;
4622
4623	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4624
4625#ifdef EM_MULTIQUEUE
4626#define RSSKEYLEN 10
4627	if (adapter->num_queues > 1) {
4628		uint8_t  rss_key[4 * RSSKEYLEN];
4629		uint32_t reta = 0;
4630		int i;
4631
4632		/*
4633		* Configure RSS key
4634		*/
4635		arc4rand(rss_key, sizeof(rss_key), 0);
4636		for (i = 0; i < RSSKEYLEN; ++i) {
4637			uint32_t rssrk = 0;
4638
4639			rssrk = EM_RSSRK_VAL(rss_key, i);
4640			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4641		}
4642
4643		/*
4644		* Configure RSS redirect table in following fashion:
4645		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4646		*/
4647		for (i = 0; i < sizeof(reta); ++i) {
4648			uint32_t q;
4649
4650			q = (i % adapter->num_queues) << 7;
4651			reta |= q << (8 * i);
4652		}
4653
4654		for (i = 0; i < 32; ++i) {
4655			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4656		}
4657
4658		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4659				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4660				E1000_MRQC_RSS_FIELD_IPV4 |
4661				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4662				E1000_MRQC_RSS_FIELD_IPV6_EX |
4663				E1000_MRQC_RSS_FIELD_IPV6);
4664	}
4665#endif
4666	/*
4667	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4668	** long latencies are observed, like Lenovo X60. This
4669	** change eliminates the problem, but since having positive
4670	** values in RDTR is a known source of problems on other
4671	** platforms another solution is being sought.
4672	*/
4673	if (hw->mac.type == e1000_82573)
4674		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4675
4676	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4677		/* Setup the Base and Length of the Rx Descriptor Ring */
4678		u64 bus_addr = rxr->rxdma.dma_paddr;
4679		u32 rdt = adapter->num_rx_desc - 1; /* default */
4680
4681		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4682		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4683		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4684		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4685		/* Setup the Head and Tail Descriptor Pointers */
4686		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4687#ifdef DEV_NETMAP
4688		/*
4689		 * an init() while a netmap client is active must
4690		 * preserve the rx buffers passed to userspace.
4691		 */
4692		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4693			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4694			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4695		}
4696#endif /* DEV_NETMAP */
4697		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4698	}
4699
4700	/*
4701	 * Set PTHRESH for improved jumbo performance
4702	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4703	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4704	 * Only write to RXDCTL(1) if there is a need for different
4705	 * settings.
4706	 */
4707	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4708	    (adapter->hw.mac.type == e1000_pch2lan) ||
4709	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4710	    (if_getmtu(ifp) > ETHERMTU)) {
4711		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4712		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4713	} else if (adapter->hw.mac.type == e1000_82574) {
4714		for (int i = 0; i < adapter->num_queues; i++) {
4715			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4716
4717			rxdctl |= 0x20; /* PTHRESH */
4718			rxdctl |= 4 << 8; /* HTHRESH */
4719			rxdctl |= 4 << 16;/* WTHRESH */
4720			rxdctl |= 1 << 24; /* Switch to granularity */
4721			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4722		}
4723	}
4724
4725	if (adapter->hw.mac.type >= e1000_pch2lan) {
4726		if (if_getmtu(ifp) > ETHERMTU)
4727			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4728		else
4729			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4730	}
4731
4732        /* Make sure VLAN Filters are off */
4733        rctl &= ~E1000_RCTL_VFE;
4734
4735	if (adapter->rx_mbuf_sz == MCLBYTES)
4736		rctl |= E1000_RCTL_SZ_2048;
4737	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4738		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4739	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4740		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4741
4742	/* ensure we clear use DTYPE of 00 here */
4743	rctl &= ~0x00000C00;
4744	/* Write out the settings */
4745	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4746
4747	return;
4748}
4749
4750
4751/*********************************************************************
4752 *
4753 *  This routine executes in interrupt context. It replenishes
4754 *  the mbufs in the descriptor and sends data which has been
4755 *  dma'ed into host memory to upper layer.
4756 *
4757 *  We loop at most count times if count is > 0, or until done if
4758 *  count < 0.
4759 *
4760 *  For polling we also now return the number of cleaned packets
4761 *********************************************************************/
4762static bool
4763em_rxeof(struct rx_ring *rxr, int count, int *done)
4764{
4765	struct adapter		*adapter = rxr->adapter;
4766	if_t ifp = adapter->ifp;
4767	struct mbuf		*mp, *sendmp;
4768	u32			status = 0;
4769	u16 			len;
4770	int			i, processed, rxdone = 0;
4771	bool			eop;
4772	union e1000_rx_desc_extended	*cur;
4773
4774	EM_RX_LOCK(rxr);
4775
4776	/* Sync the ring */
4777	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4778	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4779
4780
4781#ifdef DEV_NETMAP
4782	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4783		EM_RX_UNLOCK(rxr);
4784		return (FALSE);
4785	}
4786#endif /* DEV_NETMAP */
4787
4788	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4789		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4790			break;
4791
4792		cur = &rxr->rx_base[i];
4793		status = le32toh(cur->wb.upper.status_error);
4794		mp = sendmp = NULL;
4795
4796		if ((status & E1000_RXD_STAT_DD) == 0)
4797			break;
4798
4799		len = le16toh(cur->wb.upper.length);
4800		eop = (status & E1000_RXD_STAT_EOP) != 0;
4801
4802		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4803		    (rxr->discard == TRUE)) {
4804			adapter->dropped_pkts++;
4805			++rxr->rx_discarded;
4806			if (!eop) /* Catch subsequent segs */
4807				rxr->discard = TRUE;
4808			else
4809				rxr->discard = FALSE;
4810			em_rx_discard(rxr, i);
4811			goto next_desc;
4812		}
4813		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4814
4815		/* Assign correct length to the current fragment */
4816		mp = rxr->rx_buffers[i].m_head;
4817		mp->m_len = len;
4818
4819		/* Trigger for refresh */
4820		rxr->rx_buffers[i].m_head = NULL;
4821
4822		/* First segment? */
4823		if (rxr->fmp == NULL) {
4824			mp->m_pkthdr.len = len;
4825			rxr->fmp = rxr->lmp = mp;
4826		} else {
4827			/* Chain mbuf's together */
4828			mp->m_flags &= ~M_PKTHDR;
4829			rxr->lmp->m_next = mp;
4830			rxr->lmp = mp;
4831			rxr->fmp->m_pkthdr.len += len;
4832		}
4833
4834		if (eop) {
4835			--count;
4836			sendmp = rxr->fmp;
4837			if_setrcvif(sendmp, ifp);
4838			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4839			em_receive_checksum(status, sendmp);
4840#ifndef __NO_STRICT_ALIGNMENT
4841			if (adapter->hw.mac.max_frame_size >
4842			    (MCLBYTES - ETHER_ALIGN) &&
4843			    em_fixup_rx(rxr) != 0)
4844				goto skip;
4845#endif
4846			if (status & E1000_RXD_STAT_VP) {
4847				if_setvtag(sendmp,
4848				    le16toh(cur->wb.upper.vlan));
4849				sendmp->m_flags |= M_VLANTAG;
4850			}
4851#ifndef __NO_STRICT_ALIGNMENT
4852skip:
4853#endif
4854			rxr->fmp = rxr->lmp = NULL;
4855		}
4856next_desc:
4857		/* Sync the ring */
4858		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4859	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4860
4861		/* Zero out the receive descriptors status. */
4862		cur->wb.upper.status_error &= htole32(~0xFF);
4863		++rxdone;	/* cumulative for POLL */
4864		++processed;
4865
4866		/* Advance our pointers to the next descriptor. */
4867		if (++i == adapter->num_rx_desc)
4868			i = 0;
4869
4870		/* Send to the stack */
4871		if (sendmp != NULL) {
4872			rxr->next_to_check = i;
4873			EM_RX_UNLOCK(rxr);
4874			if_input(ifp, sendmp);
4875			EM_RX_LOCK(rxr);
4876			i = rxr->next_to_check;
4877		}
4878
4879		/* Only refresh mbufs every 8 descriptors */
4880		if (processed == 8) {
4881			em_refresh_mbufs(rxr, i);
4882			processed = 0;
4883		}
4884	}
4885
4886	/* Catch any remaining refresh work */
4887	if (e1000_rx_unrefreshed(rxr))
4888		em_refresh_mbufs(rxr, i);
4889
4890	rxr->next_to_check = i;
4891	if (done != NULL)
4892		*done = rxdone;
4893	EM_RX_UNLOCK(rxr);
4894
4895	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4896}
4897
4898static __inline void
4899em_rx_discard(struct rx_ring *rxr, int i)
4900{
4901	struct em_rxbuffer	*rbuf;
4902
4903	rbuf = &rxr->rx_buffers[i];
4904	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4905
4906	/* Free any previous pieces */
4907	if (rxr->fmp != NULL) {
4908		rxr->fmp->m_flags |= M_PKTHDR;
4909		m_freem(rxr->fmp);
4910		rxr->fmp = NULL;
4911		rxr->lmp = NULL;
4912	}
4913	/*
4914	** Free buffer and allow em_refresh_mbufs()
4915	** to clean up and recharge buffer.
4916	*/
4917	if (rbuf->m_head) {
4918		m_free(rbuf->m_head);
4919		rbuf->m_head = NULL;
4920	}
4921	return;
4922}
4923
4924#ifndef __NO_STRICT_ALIGNMENT
4925/*
4926 * When jumbo frames are enabled we should realign entire payload on
4927 * architecures with strict alignment. This is serious design mistake of 8254x
4928 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4929 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4930 * payload. On architecures without strict alignment restrictions 8254x still
4931 * performs unaligned memory access which would reduce the performance too.
4932 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4933 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4934 * existing mbuf chain.
4935 *
4936 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4937 * not used at all on architectures with strict alignment.
4938 */
4939static int
4940em_fixup_rx(struct rx_ring *rxr)
4941{
4942	struct adapter *adapter = rxr->adapter;
4943	struct mbuf *m, *n;
4944	int error;
4945
4946	error = 0;
4947	m = rxr->fmp;
4948	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4949		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4950		m->m_data += ETHER_HDR_LEN;
4951	} else {
4952		MGETHDR(n, M_NOWAIT, MT_DATA);
4953		if (n != NULL) {
4954			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4955			m->m_data += ETHER_HDR_LEN;
4956			m->m_len -= ETHER_HDR_LEN;
4957			n->m_len = ETHER_HDR_LEN;
4958			M_MOVE_PKTHDR(n, m);
4959			n->m_next = m;
4960			rxr->fmp = n;
4961		} else {
4962			adapter->dropped_pkts++;
4963			m_freem(rxr->fmp);
4964			rxr->fmp = NULL;
4965			error = ENOMEM;
4966		}
4967	}
4968
4969	return (error);
4970}
4971#endif
4972
4973static void
4974em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4975{
4976	rxd->read.buffer_addr = htole64(rxbuf->paddr);
4977	/* DD bits must be cleared */
4978	rxd->wb.upper.status_error= 0;
4979}
4980
4981/*********************************************************************
4982 *
4983 *  Verify that the hardware indicated that the checksum is valid.
4984 *  Inform the stack about the status of checksum so that stack
4985 *  doesn't spend time verifying the checksum.
4986 *
4987 *********************************************************************/
4988static void
4989em_receive_checksum(uint32_t status, struct mbuf *mp)
4990{
4991	mp->m_pkthdr.csum_flags = 0;
4992
4993	/* Ignore Checksum bit is set */
4994	if (status & E1000_RXD_STAT_IXSM)
4995		return;
4996
4997	/* If the IP checksum exists and there is no IP Checksum error */
4998	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4999		E1000_RXD_STAT_IPCS) {
5000		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5001	}
5002
5003	/* TCP or UDP checksum */
5004	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5005	    E1000_RXD_STAT_TCPCS) {
5006		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5007		mp->m_pkthdr.csum_data = htons(0xffff);
5008	}
5009	if (status & E1000_RXD_STAT_UDPCS) {
5010		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5011		mp->m_pkthdr.csum_data = htons(0xffff);
5012	}
5013}
5014
5015/*
5016 * This routine is run via an vlan
5017 * config EVENT
5018 */
5019static void
5020em_register_vlan(void *arg, if_t ifp, u16 vtag)
5021{
5022	struct adapter	*adapter = if_getsoftc(ifp);
5023	u32		index, bit;
5024
5025	if ((void*)adapter !=  arg)   /* Not our event */
5026		return;
5027
5028	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5029                return;
5030
5031	EM_CORE_LOCK(adapter);
5032	index = (vtag >> 5) & 0x7F;
5033	bit = vtag & 0x1F;
5034	adapter->shadow_vfta[index] |= (1 << bit);
5035	++adapter->num_vlans;
5036	/* Re-init to load the changes */
5037	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5038		em_init_locked(adapter);
5039	EM_CORE_UNLOCK(adapter);
5040}
5041
5042/*
5043 * This routine is run via an vlan
5044 * unconfig EVENT
5045 */
5046static void
5047em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5048{
5049	struct adapter	*adapter = if_getsoftc(ifp);
5050	u32		index, bit;
5051
5052	if (adapter != arg)
5053		return;
5054
5055	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5056                return;
5057
5058	EM_CORE_LOCK(adapter);
5059	index = (vtag >> 5) & 0x7F;
5060	bit = vtag & 0x1F;
5061	adapter->shadow_vfta[index] &= ~(1 << bit);
5062	--adapter->num_vlans;
5063	/* Re-init to load the changes */
5064	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5065		em_init_locked(adapter);
5066	EM_CORE_UNLOCK(adapter);
5067}
5068
5069static void
5070em_setup_vlan_hw_support(struct adapter *adapter)
5071{
5072	struct e1000_hw *hw = &adapter->hw;
5073	u32             reg;
5074
5075	/*
5076	** We get here thru init_locked, meaning
5077	** a soft reset, this has already cleared
5078	** the VFTA and other state, so if there
5079	** have been no vlan's registered do nothing.
5080	*/
5081	if (adapter->num_vlans == 0)
5082                return;
5083
5084	/*
5085	** A soft reset zero's out the VFTA, so
5086	** we need to repopulate it now.
5087	*/
5088	for (int i = 0; i < EM_VFTA_SIZE; i++)
5089                if (adapter->shadow_vfta[i] != 0)
5090			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5091                            i, adapter->shadow_vfta[i]);
5092
5093	reg = E1000_READ_REG(hw, E1000_CTRL);
5094	reg |= E1000_CTRL_VME;
5095	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5096
5097	/* Enable the Filter Table */
5098	reg = E1000_READ_REG(hw, E1000_RCTL);
5099	reg &= ~E1000_RCTL_CFIEN;
5100	reg |= E1000_RCTL_VFE;
5101	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5102}
5103
5104static void
5105em_enable_intr(struct adapter *adapter)
5106{
5107	struct e1000_hw *hw = &adapter->hw;
5108	u32 ims_mask = IMS_ENABLE_MASK;
5109
5110	if (hw->mac.type == e1000_82574) {
5111		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5112		ims_mask |= adapter->ims;
5113	}
5114	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5115}
5116
5117static void
5118em_disable_intr(struct adapter *adapter)
5119{
5120	struct e1000_hw *hw = &adapter->hw;
5121
5122	if (hw->mac.type == e1000_82574)
5123		E1000_WRITE_REG(hw, EM_EIAC, 0);
5124	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5125}
5126
5127/*
5128 * Bit of a misnomer, what this really means is
5129 * to enable OS management of the system... aka
5130 * to disable special hardware management features
5131 */
5132static void
5133em_init_manageability(struct adapter *adapter)
5134{
5135	/* A shared code workaround */
5136#define E1000_82542_MANC2H E1000_MANC2H
5137	if (adapter->has_manage) {
5138		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5139		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5140
5141		/* disable hardware interception of ARP */
5142		manc &= ~(E1000_MANC_ARP_EN);
5143
5144                /* enable receiving management packets to the host */
5145		manc |= E1000_MANC_EN_MNG2HOST;
5146#define E1000_MNG2HOST_PORT_623 (1 << 5)
5147#define E1000_MNG2HOST_PORT_664 (1 << 6)
5148		manc2h |= E1000_MNG2HOST_PORT_623;
5149		manc2h |= E1000_MNG2HOST_PORT_664;
5150		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5151		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5152	}
5153}
5154
5155/*
5156 * Give control back to hardware management
5157 * controller if there is one.
5158 */
5159static void
5160em_release_manageability(struct adapter *adapter)
5161{
5162	if (adapter->has_manage) {
5163		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5164
5165		/* re-enable hardware interception of ARP */
5166		manc |= E1000_MANC_ARP_EN;
5167		manc &= ~E1000_MANC_EN_MNG2HOST;
5168
5169		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5170	}
5171}
5172
5173/*
5174 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5175 * For ASF and Pass Through versions of f/w this means
5176 * that the driver is loaded. For AMT version type f/w
5177 * this means that the network i/f is open.
5178 */
5179static void
5180em_get_hw_control(struct adapter *adapter)
5181{
5182	u32 ctrl_ext, swsm;
5183
5184	if (adapter->hw.mac.type == e1000_82573) {
5185		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5186		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5187		    swsm | E1000_SWSM_DRV_LOAD);
5188		return;
5189	}
5190	/* else */
5191	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5192	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5193	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5194	return;
5195}
5196
5197/*
5198 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5199 * For ASF and Pass Through versions of f/w this means that
5200 * the driver is no longer loaded. For AMT versions of the
5201 * f/w this means that the network i/f is closed.
5202 */
5203static void
5204em_release_hw_control(struct adapter *adapter)
5205{
5206	u32 ctrl_ext, swsm;
5207
5208	if (!adapter->has_manage)
5209		return;
5210
5211	if (adapter->hw.mac.type == e1000_82573) {
5212		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5213		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5214		    swsm & ~E1000_SWSM_DRV_LOAD);
5215		return;
5216	}
5217	/* else */
5218	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5219	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5220	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5221	return;
5222}
5223
5224static int
5225em_is_valid_ether_addr(u8 *addr)
5226{
5227	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5228
5229	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5230		return (FALSE);
5231	}
5232
5233	return (TRUE);
5234}
5235
5236/*
5237** Parse the interface capabilities with regard
5238** to both system management and wake-on-lan for
5239** later use.
5240*/
5241static void
5242em_get_wakeup(device_t dev)
5243{
5244	struct adapter	*adapter = device_get_softc(dev);
5245	u16		eeprom_data = 0, device_id, apme_mask;
5246
5247	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5248	apme_mask = EM_EEPROM_APME;
5249
5250	switch (adapter->hw.mac.type) {
5251	case e1000_82573:
5252	case e1000_82583:
5253		adapter->has_amt = TRUE;
5254		/* Falls thru */
5255	case e1000_82571:
5256	case e1000_82572:
5257	case e1000_80003es2lan:
5258		if (adapter->hw.bus.func == 1) {
5259			e1000_read_nvm(&adapter->hw,
5260			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5261			break;
5262		} else
5263			e1000_read_nvm(&adapter->hw,
5264			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5265		break;
5266	case e1000_ich8lan:
5267	case e1000_ich9lan:
5268	case e1000_ich10lan:
5269	case e1000_pchlan:
5270	case e1000_pch2lan:
5271		apme_mask = E1000_WUC_APME;
5272		adapter->has_amt = TRUE;
5273		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5274		break;
5275	default:
5276		e1000_read_nvm(&adapter->hw,
5277		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5278		break;
5279	}
5280	if (eeprom_data & apme_mask)
5281		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5282	/*
5283         * We have the eeprom settings, now apply the special cases
5284         * where the eeprom may be wrong or the board won't support
5285         * wake on lan on a particular port
5286	 */
5287	device_id = pci_get_device(dev);
5288        switch (device_id) {
5289	case E1000_DEV_ID_82571EB_FIBER:
5290		/* Wake events only supported on port A for dual fiber
5291		 * regardless of eeprom setting */
5292		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5293		    E1000_STATUS_FUNC_1)
5294			adapter->wol = 0;
5295		break;
5296	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5297	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5298	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5299                /* if quad port adapter, disable WoL on all but port A */
5300		if (global_quad_port_a != 0)
5301			adapter->wol = 0;
5302		/* Reset for multiple quad port adapters */
5303		if (++global_quad_port_a == 4)
5304			global_quad_port_a = 0;
5305                break;
5306	}
5307	return;
5308}
5309
5310
5311/*
5312 * Enable PCI Wake On Lan capability
5313 */
5314static void
5315em_enable_wakeup(device_t dev)
5316{
5317	struct adapter	*adapter = device_get_softc(dev);
5318	if_t ifp = adapter->ifp;
5319	u32		pmc, ctrl, ctrl_ext, rctl;
5320	u16     	status;
5321
5322	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5323		return;
5324
5325	/* Advertise the wakeup capability */
5326	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5327	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5328	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5329	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5330
5331	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5332	    (adapter->hw.mac.type == e1000_pchlan) ||
5333	    (adapter->hw.mac.type == e1000_ich9lan) ||
5334	    (adapter->hw.mac.type == e1000_ich10lan))
5335		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5336
5337	/* Keep the laser running on Fiber adapters */
5338	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5339	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5340		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5341		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5342		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5343	}
5344
5345	/*
5346	** Determine type of Wakeup: note that wol
5347	** is set with all bits on by default.
5348	*/
5349	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5350		adapter->wol &= ~E1000_WUFC_MAG;
5351
5352	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5353		adapter->wol &= ~E1000_WUFC_MC;
5354	else {
5355		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5356		rctl |= E1000_RCTL_MPE;
5357		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5358	}
5359
5360	if ((adapter->hw.mac.type == e1000_pchlan) ||
5361	    (adapter->hw.mac.type == e1000_pch2lan)) {
5362		if (em_enable_phy_wakeup(adapter))
5363			return;
5364	} else {
5365		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5366		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5367	}
5368
5369	if (adapter->hw.phy.type == e1000_phy_igp_3)
5370		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5371
5372        /* Request PME */
5373        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5374	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5375	if (if_getcapenable(ifp) & IFCAP_WOL)
5376		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5377        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5378
5379	return;
5380}
5381
5382/*
5383** WOL in the newer chipset interfaces (pchlan)
5384** require thing to be copied into the phy
5385*/
5386static int
5387em_enable_phy_wakeup(struct adapter *adapter)
5388{
5389	struct e1000_hw *hw = &adapter->hw;
5390	u32 mreg, ret = 0;
5391	u16 preg;
5392
5393	/* copy MAC RARs to PHY RARs */
5394	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5395
5396	/* copy MAC MTA to PHY MTA */
5397	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5398		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5399		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5400		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5401		    (u16)((mreg >> 16) & 0xFFFF));
5402	}
5403
5404	/* configure PHY Rx Control register */
5405	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5406	mreg = E1000_READ_REG(hw, E1000_RCTL);
5407	if (mreg & E1000_RCTL_UPE)
5408		preg |= BM_RCTL_UPE;
5409	if (mreg & E1000_RCTL_MPE)
5410		preg |= BM_RCTL_MPE;
5411	preg &= ~(BM_RCTL_MO_MASK);
5412	if (mreg & E1000_RCTL_MO_3)
5413		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5414				<< BM_RCTL_MO_SHIFT);
5415	if (mreg & E1000_RCTL_BAM)
5416		preg |= BM_RCTL_BAM;
5417	if (mreg & E1000_RCTL_PMCF)
5418		preg |= BM_RCTL_PMCF;
5419	mreg = E1000_READ_REG(hw, E1000_CTRL);
5420	if (mreg & E1000_CTRL_RFCE)
5421		preg |= BM_RCTL_RFCE;
5422	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5423
5424	/* enable PHY wakeup in MAC register */
5425	E1000_WRITE_REG(hw, E1000_WUC,
5426	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5427	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5428
5429	/* configure and enable PHY wakeup in PHY registers */
5430	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5431	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5432
5433	/* activate PHY wakeup */
5434	ret = hw->phy.ops.acquire(hw);
5435	if (ret) {
5436		printf("Could not acquire PHY\n");
5437		return ret;
5438	}
5439	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5440	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5441	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5442	if (ret) {
5443		printf("Could not read PHY page 769\n");
5444		goto out;
5445	}
5446	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5447	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5448	if (ret)
5449		printf("Could not set PHY Host Wakeup bit\n");
5450out:
5451	hw->phy.ops.release(hw);
5452
5453	return ret;
5454}
5455
5456static void
5457em_led_func(void *arg, int onoff)
5458{
5459	struct adapter	*adapter = arg;
5460
5461	EM_CORE_LOCK(adapter);
5462	if (onoff) {
5463		e1000_setup_led(&adapter->hw);
5464		e1000_led_on(&adapter->hw);
5465	} else {
5466		e1000_led_off(&adapter->hw);
5467		e1000_cleanup_led(&adapter->hw);
5468	}
5469	EM_CORE_UNLOCK(adapter);
5470}
5471
5472/*
5473** Disable the L0S and L1 LINK states
5474*/
5475static void
5476em_disable_aspm(struct adapter *adapter)
5477{
5478	int		base, reg;
5479	u16		link_cap,link_ctrl;
5480	device_t	dev = adapter->dev;
5481
5482	switch (adapter->hw.mac.type) {
5483		case e1000_82573:
5484		case e1000_82574:
5485		case e1000_82583:
5486			break;
5487		default:
5488			return;
5489	}
5490	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5491		return;
5492	reg = base + PCIER_LINK_CAP;
5493	link_cap = pci_read_config(dev, reg, 2);
5494	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5495		return;
5496	reg = base + PCIER_LINK_CTL;
5497	link_ctrl = pci_read_config(dev, reg, 2);
5498	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5499	pci_write_config(dev, reg, link_ctrl, 2);
5500	return;
5501}
5502
5503/**********************************************************************
5504 *
5505 *  Update the board statistics counters.
5506 *
5507 **********************************************************************/
5508static void
5509em_update_stats_counters(struct adapter *adapter)
5510{
5511
5512	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5513	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5514		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5515		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5516	}
5517	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5518	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5519	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5520	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5521
5522	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5523	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5524	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5525	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5526	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5527	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5528	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5529	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5530	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5531	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5532	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5533	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5534	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5535	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5536	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5537	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5538	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5539	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5540	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5541	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5542
5543	/* For the 64-bit byte counters the low dword must be read first. */
5544	/* Both registers clear on the read of the high dword */
5545
5546	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5547	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5548	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5549	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5550
5551	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5552	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5553	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5554	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5555	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5556
5557	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5558	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5559
5560	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5561	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5562	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5563	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5564	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5565	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5566	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5567	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5568	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5569	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5570
5571	/* Interrupt Counts */
5572
5573	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5574	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5575	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5576	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5577	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5578	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5579	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5580	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5581	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5582
5583	if (adapter->hw.mac.type >= e1000_82543) {
5584		adapter->stats.algnerrc +=
5585		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5586		adapter->stats.rxerrc +=
5587		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5588		adapter->stats.tncrs +=
5589		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5590		adapter->stats.cexterr +=
5591		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5592		adapter->stats.tsctc +=
5593		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5594		adapter->stats.tsctfc +=
5595		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5596	}
5597}
5598
5599static uint64_t
5600em_get_counter(if_t ifp, ift_counter cnt)
5601{
5602	struct adapter *adapter;
5603
5604	adapter = if_getsoftc(ifp);
5605
5606	switch (cnt) {
5607	case IFCOUNTER_COLLISIONS:
5608		return (adapter->stats.colc);
5609	case IFCOUNTER_IERRORS:
5610		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5611		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5612		    adapter->stats.ruc + adapter->stats.roc +
5613		    adapter->stats.mpc + adapter->stats.cexterr);
5614	case IFCOUNTER_OERRORS:
5615		return (adapter->stats.ecol + adapter->stats.latecol +
5616		    adapter->watchdog_events);
5617	default:
5618		return (if_get_counter_default(ifp, cnt));
5619	}
5620}
5621
5622/* Export a single 32-bit register via a read-only sysctl. */
5623static int
5624em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5625{
5626	struct adapter *adapter;
5627	u_int val;
5628
5629	adapter = oidp->oid_arg1;
5630	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5631	return (sysctl_handle_int(oidp, &val, 0, req));
5632}
5633
5634/*
5635 * Add sysctl variables, one per statistic, to the system.
5636 */
5637static void
5638em_add_hw_stats(struct adapter *adapter)
5639{
5640	device_t dev = adapter->dev;
5641
5642	struct tx_ring *txr = adapter->tx_rings;
5643	struct rx_ring *rxr = adapter->rx_rings;
5644
5645	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5646	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5647	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5648	struct e1000_hw_stats *stats = &adapter->stats;
5649
5650	struct sysctl_oid *stat_node, *queue_node, *int_node;
5651	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5652
5653#define QUEUE_NAME_LEN 32
5654	char namebuf[QUEUE_NAME_LEN];
5655
5656	/* Driver Statistics */
5657	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5658			CTLFLAG_RD, &adapter->dropped_pkts,
5659			"Driver dropped packets");
5660	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5661			CTLFLAG_RD, &adapter->link_irq,
5662			"Link MSIX IRQ Handled");
5663	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5664			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5665			 "Defragmenting mbuf chain failed");
5666	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5667			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5668			"Driver tx dma failure in xmit");
5669	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5670			CTLFLAG_RD, &adapter->rx_overruns,
5671			"RX overruns");
5672	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5673			CTLFLAG_RD, &adapter->watchdog_events,
5674			"Watchdog timeouts");
5675
5676	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5677			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5678			em_sysctl_reg_handler, "IU",
5679			"Device Control Register");
5680	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5681			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5682			em_sysctl_reg_handler, "IU",
5683			"Receiver Control Register");
5684	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5685			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5686			"Flow Control High Watermark");
5687	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5688			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5689			"Flow Control Low Watermark");
5690
5691	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5692		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5693		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5694					    CTLFLAG_RD, NULL, "TX Queue Name");
5695		queue_list = SYSCTL_CHILDREN(queue_node);
5696
5697		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5698				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5699				E1000_TDH(txr->me),
5700				em_sysctl_reg_handler, "IU",
5701 				"Transmit Descriptor Head");
5702		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5703				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5704				E1000_TDT(txr->me),
5705				em_sysctl_reg_handler, "IU",
5706 				"Transmit Descriptor Tail");
5707		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5708				CTLFLAG_RD, &txr->tx_irq,
5709				"Queue MSI-X Transmit Interrupts");
5710		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5711				CTLFLAG_RD, &txr->no_desc_avail,
5712				"Queue No Descriptor Available");
5713
5714		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5715		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5716					    CTLFLAG_RD, NULL, "RX Queue Name");
5717		queue_list = SYSCTL_CHILDREN(queue_node);
5718
5719		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5720				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5721				E1000_RDH(rxr->me),
5722				em_sysctl_reg_handler, "IU",
5723				"Receive Descriptor Head");
5724		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5725				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5726				E1000_RDT(rxr->me),
5727				em_sysctl_reg_handler, "IU",
5728				"Receive Descriptor Tail");
5729		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5730				CTLFLAG_RD, &rxr->rx_irq,
5731				"Queue MSI-X Receive Interrupts");
5732	}
5733
5734	/* MAC stats get their own sub node */
5735
5736	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5737				    CTLFLAG_RD, NULL, "Statistics");
5738	stat_list = SYSCTL_CHILDREN(stat_node);
5739
5740	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5741			CTLFLAG_RD, &stats->ecol,
5742			"Excessive collisions");
5743	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5744			CTLFLAG_RD, &stats->scc,
5745			"Single collisions");
5746	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5747			CTLFLAG_RD, &stats->mcc,
5748			"Multiple collisions");
5749	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5750			CTLFLAG_RD, &stats->latecol,
5751			"Late collisions");
5752	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5753			CTLFLAG_RD, &stats->colc,
5754			"Collision Count");
5755	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5756			CTLFLAG_RD, &adapter->stats.symerrs,
5757			"Symbol Errors");
5758	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5759			CTLFLAG_RD, &adapter->stats.sec,
5760			"Sequence Errors");
5761	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5762			CTLFLAG_RD, &adapter->stats.dc,
5763			"Defer Count");
5764	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5765			CTLFLAG_RD, &adapter->stats.mpc,
5766			"Missed Packets");
5767	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5768			CTLFLAG_RD, &adapter->stats.rnbc,
5769			"Receive No Buffers");
5770	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5771			CTLFLAG_RD, &adapter->stats.ruc,
5772			"Receive Undersize");
5773	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5774			CTLFLAG_RD, &adapter->stats.rfc,
5775			"Fragmented Packets Received ");
5776	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5777			CTLFLAG_RD, &adapter->stats.roc,
5778			"Oversized Packets Received");
5779	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5780			CTLFLAG_RD, &adapter->stats.rjc,
5781			"Recevied Jabber");
5782	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5783			CTLFLAG_RD, &adapter->stats.rxerrc,
5784			"Receive Errors");
5785	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5786			CTLFLAG_RD, &adapter->stats.crcerrs,
5787			"CRC errors");
5788	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5789			CTLFLAG_RD, &adapter->stats.algnerrc,
5790			"Alignment Errors");
5791	/* On 82575 these are collision counts */
5792	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5793			CTLFLAG_RD, &adapter->stats.cexterr,
5794			"Collision/Carrier extension errors");
5795	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5796			CTLFLAG_RD, &adapter->stats.xonrxc,
5797			"XON Received");
5798	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5799			CTLFLAG_RD, &adapter->stats.xontxc,
5800			"XON Transmitted");
5801	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5802			CTLFLAG_RD, &adapter->stats.xoffrxc,
5803			"XOFF Received");
5804	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5805			CTLFLAG_RD, &adapter->stats.xofftxc,
5806			"XOFF Transmitted");
5807
5808	/* Packet Reception Stats */
5809	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5810			CTLFLAG_RD, &adapter->stats.tpr,
5811			"Total Packets Received ");
5812	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5813			CTLFLAG_RD, &adapter->stats.gprc,
5814			"Good Packets Received");
5815	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5816			CTLFLAG_RD, &adapter->stats.bprc,
5817			"Broadcast Packets Received");
5818	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5819			CTLFLAG_RD, &adapter->stats.mprc,
5820			"Multicast Packets Received");
5821	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5822			CTLFLAG_RD, &adapter->stats.prc64,
5823			"64 byte frames received ");
5824	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5825			CTLFLAG_RD, &adapter->stats.prc127,
5826			"65-127 byte frames received");
5827	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5828			CTLFLAG_RD, &adapter->stats.prc255,
5829			"128-255 byte frames received");
5830	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5831			CTLFLAG_RD, &adapter->stats.prc511,
5832			"256-511 byte frames received");
5833	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5834			CTLFLAG_RD, &adapter->stats.prc1023,
5835			"512-1023 byte frames received");
5836	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5837			CTLFLAG_RD, &adapter->stats.prc1522,
5838			"1023-1522 byte frames received");
5839 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5840 			CTLFLAG_RD, &adapter->stats.gorc,
5841 			"Good Octets Received");
5842
5843	/* Packet Transmission Stats */
5844 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5845 			CTLFLAG_RD, &adapter->stats.gotc,
5846 			"Good Octets Transmitted");
5847	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5848			CTLFLAG_RD, &adapter->stats.tpt,
5849			"Total Packets Transmitted");
5850	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5851			CTLFLAG_RD, &adapter->stats.gptc,
5852			"Good Packets Transmitted");
5853	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5854			CTLFLAG_RD, &adapter->stats.bptc,
5855			"Broadcast Packets Transmitted");
5856	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5857			CTLFLAG_RD, &adapter->stats.mptc,
5858			"Multicast Packets Transmitted");
5859	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5860			CTLFLAG_RD, &adapter->stats.ptc64,
5861			"64 byte frames transmitted ");
5862	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5863			CTLFLAG_RD, &adapter->stats.ptc127,
5864			"65-127 byte frames transmitted");
5865	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5866			CTLFLAG_RD, &adapter->stats.ptc255,
5867			"128-255 byte frames transmitted");
5868	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5869			CTLFLAG_RD, &adapter->stats.ptc511,
5870			"256-511 byte frames transmitted");
5871	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5872			CTLFLAG_RD, &adapter->stats.ptc1023,
5873			"512-1023 byte frames transmitted");
5874	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5875			CTLFLAG_RD, &adapter->stats.ptc1522,
5876			"1024-1522 byte frames transmitted");
5877	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5878			CTLFLAG_RD, &adapter->stats.tsctc,
5879			"TSO Contexts Transmitted");
5880	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5881			CTLFLAG_RD, &adapter->stats.tsctfc,
5882			"TSO Contexts Failed");
5883
5884
5885	/* Interrupt Stats */
5886
5887	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5888				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5889	int_list = SYSCTL_CHILDREN(int_node);
5890
5891	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5892			CTLFLAG_RD, &adapter->stats.iac,
5893			"Interrupt Assertion Count");
5894
5895	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5896			CTLFLAG_RD, &adapter->stats.icrxptc,
5897			"Interrupt Cause Rx Pkt Timer Expire Count");
5898
5899	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5900			CTLFLAG_RD, &adapter->stats.icrxatc,
5901			"Interrupt Cause Rx Abs Timer Expire Count");
5902
5903	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5904			CTLFLAG_RD, &adapter->stats.ictxptc,
5905			"Interrupt Cause Tx Pkt Timer Expire Count");
5906
5907	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5908			CTLFLAG_RD, &adapter->stats.ictxatc,
5909			"Interrupt Cause Tx Abs Timer Expire Count");
5910
5911	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5912			CTLFLAG_RD, &adapter->stats.ictxqec,
5913			"Interrupt Cause Tx Queue Empty Count");
5914
5915	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5916			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5917			"Interrupt Cause Tx Queue Min Thresh Count");
5918
5919	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5920			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5921			"Interrupt Cause Rx Desc Min Thresh Count");
5922
5923	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5924			CTLFLAG_RD, &adapter->stats.icrxoc,
5925			"Interrupt Cause Receiver Overrun Count");
5926}
5927
5928/**********************************************************************
5929 *
5930 *  This routine provides a way to dump out the adapter eeprom,
5931 *  often a useful debug/service tool. This only dumps the first
5932 *  32 words, stuff that matters is in that extent.
5933 *
5934 **********************************************************************/
5935static int
5936em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5937{
5938	struct adapter *adapter = (struct adapter *)arg1;
5939	int error;
5940	int result;
5941
5942	result = -1;
5943	error = sysctl_handle_int(oidp, &result, 0, req);
5944
5945	if (error || !req->newptr)
5946		return (error);
5947
5948	/*
5949	 * This value will cause a hex dump of the
5950	 * first 32 16-bit words of the EEPROM to
5951	 * the screen.
5952	 */
5953	if (result == 1)
5954		em_print_nvm_info(adapter);
5955
5956	return (error);
5957}
5958
5959static void
5960em_print_nvm_info(struct adapter *adapter)
5961{
5962	u16	eeprom_data;
5963	int	i, j, row = 0;
5964
5965	/* Its a bit crude, but it gets the job done */
5966	printf("\nInterface EEPROM Dump:\n");
5967	printf("Offset\n0x0000  ");
5968	for (i = 0, j = 0; i < 32; i++, j++) {
5969		if (j == 8) { /* Make the offset block */
5970			j = 0; ++row;
5971			printf("\n0x00%x0  ",row);
5972		}
5973		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5974		printf("%04x ", eeprom_data);
5975	}
5976	printf("\n");
5977}
5978
5979static int
5980em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5981{
5982	struct em_int_delay_info *info;
5983	struct adapter *adapter;
5984	u32 regval;
5985	int error, usecs, ticks;
5986
5987	info = (struct em_int_delay_info *)arg1;
5988	usecs = info->value;
5989	error = sysctl_handle_int(oidp, &usecs, 0, req);
5990	if (error != 0 || req->newptr == NULL)
5991		return (error);
5992	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5993		return (EINVAL);
5994	info->value = usecs;
5995	ticks = EM_USECS_TO_TICKS(usecs);
5996	if (info->offset == E1000_ITR)	/* units are 256ns here */
5997		ticks *= 4;
5998
5999	adapter = info->adapter;
6000
6001	EM_CORE_LOCK(adapter);
6002	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6003	regval = (regval & ~0xffff) | (ticks & 0xffff);
6004	/* Handle a few special cases. */
6005	switch (info->offset) {
6006	case E1000_RDTR:
6007		break;
6008	case E1000_TIDV:
6009		if (ticks == 0) {
6010			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6011			/* Don't write 0 into the TIDV register. */
6012			regval++;
6013		} else
6014			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6015		break;
6016	}
6017	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6018	EM_CORE_UNLOCK(adapter);
6019	return (0);
6020}
6021
6022static void
6023em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6024	const char *description, struct em_int_delay_info *info,
6025	int offset, int value)
6026{
6027	info->adapter = adapter;
6028	info->offset = offset;
6029	info->value = value;
6030	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6031	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6032	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6033	    info, 0, em_sysctl_int_delay, "I", description);
6034}
6035
6036static void
6037em_set_sysctl_value(struct adapter *adapter, const char *name,
6038	const char *description, int *limit, int value)
6039{
6040	*limit = value;
6041	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6042	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6043	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6044}
6045
6046
6047/*
6048** Set flow control using sysctl:
6049** Flow control values:
6050**      0 - off
6051**      1 - rx pause
6052**      2 - tx pause
6053**      3 - full
6054*/
6055static int
6056em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6057{
6058        int		error;
6059	static int	input = 3; /* default is full */
6060        struct adapter	*adapter = (struct adapter *) arg1;
6061
6062        error = sysctl_handle_int(oidp, &input, 0, req);
6063
6064        if ((error) || (req->newptr == NULL))
6065                return (error);
6066
6067	if (input == adapter->fc) /* no change? */
6068		return (error);
6069
6070        switch (input) {
6071                case e1000_fc_rx_pause:
6072                case e1000_fc_tx_pause:
6073                case e1000_fc_full:
6074                case e1000_fc_none:
6075                        adapter->hw.fc.requested_mode = input;
6076			adapter->fc = input;
6077                        break;
6078                default:
6079			/* Do nothing */
6080			return (error);
6081        }
6082
6083        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6084        e1000_force_mac_fc(&adapter->hw);
6085        return (error);
6086}
6087
6088/*
6089** Manage Energy Efficient Ethernet:
6090** Control values:
6091**     0/1 - enabled/disabled
6092*/
6093static int
6094em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6095{
6096       struct adapter *adapter = (struct adapter *) arg1;
6097       int             error, value;
6098
6099       value = adapter->hw.dev_spec.ich8lan.eee_disable;
6100       error = sysctl_handle_int(oidp, &value, 0, req);
6101       if (error || req->newptr == NULL)
6102               return (error);
6103       EM_CORE_LOCK(adapter);
6104       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6105       em_init_locked(adapter);
6106       EM_CORE_UNLOCK(adapter);
6107       return (0);
6108}
6109
6110static int
6111em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6112{
6113	struct adapter *adapter;
6114	int error;
6115	int result;
6116
6117	result = -1;
6118	error = sysctl_handle_int(oidp, &result, 0, req);
6119
6120	if (error || !req->newptr)
6121		return (error);
6122
6123	if (result == 1) {
6124		adapter = (struct adapter *)arg1;
6125		em_print_debug_info(adapter);
6126        }
6127
6128	return (error);
6129}
6130
6131/*
6132** This routine is meant to be fluid, add whatever is
6133** needed for debugging a problem.  -jfv
6134*/
6135static void
6136em_print_debug_info(struct adapter *adapter)
6137{
6138	device_t dev = adapter->dev;
6139	struct tx_ring *txr = adapter->tx_rings;
6140	struct rx_ring *rxr = adapter->rx_rings;
6141
6142	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6143		printf("Interface is RUNNING ");
6144	else
6145		printf("Interface is NOT RUNNING\n");
6146
6147	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6148		printf("and INACTIVE\n");
6149	else
6150		printf("and ACTIVE\n");
6151
6152	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6153		device_printf(dev, "TX Queue %d ------\n", i);
6154		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6155	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6156	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6157		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6158		device_printf(dev, "TX descriptors avail = %d\n",
6159	    		txr->tx_avail);
6160		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6161	    		txr->no_desc_avail);
6162		device_printf(dev, "RX Queue %d ------\n", i);
6163		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6164	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6165	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6166		device_printf(dev, "RX discarded packets = %ld\n",
6167	    		rxr->rx_discarded);
6168		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6169		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6170	}
6171}
6172
6173#ifdef EM_MULTIQUEUE
6174/*
6175 * 82574 only:
6176 * Write a new value to the EEPROM increasing the number of MSIX
6177 * vectors from 3 to 5, for proper multiqueue support.
6178 */
6179static void
6180em_enable_vectors_82574(struct adapter *adapter)
6181{
6182	struct e1000_hw *hw = &adapter->hw;
6183	device_t dev = adapter->dev;
6184	u16 edata;
6185
6186	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6187	printf("Current cap: %#06x\n", edata);
6188	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6189		device_printf(dev, "Writing to eeprom: increasing "
6190		    "reported MSIX vectors from 3 to 5...\n");
6191		edata &= ~(EM_NVM_MSIX_N_MASK);
6192		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6193		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6194		e1000_update_nvm_checksum(hw);
6195		device_printf(dev, "Writing to eeprom: done\n");
6196	}
6197}
6198#endif
6199
6200#ifdef DDB
6201DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6202{
6203	devclass_t	dc;
6204	int max_em;
6205
6206	dc = devclass_find("em");
6207	max_em = devclass_get_maxunit(dc);
6208
6209	for (int index = 0; index < (max_em - 1); index++) {
6210		device_t dev;
6211		dev = devclass_get_device(dc, index);
6212		if (device_get_driver(dev) == &em_driver) {
6213			struct adapter *adapter = device_get_softc(dev);
6214			EM_CORE_LOCK(adapter);
6215			em_init_locked(adapter);
6216			EM_CORE_UNLOCK(adapter);
6217		}
6218	}
6219}
6220DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6221{
6222	devclass_t	dc;
6223	int max_em;
6224
6225	dc = devclass_find("em");
6226	max_em = devclass_get_maxunit(dc);
6227
6228	for (int index = 0; index < (max_em - 1); index++) {
6229		device_t dev;
6230		dev = devclass_get_device(dc, index);
6231		if (device_get_driver(dev) == &em_driver)
6232			em_print_debug_info(device_get_softc(dev));
6233	}
6234
6235}
6236#endif
6237