if_em.c revision 253374
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 253374 2013-07-15 23:27:48Z jfv $*/
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#ifdef HAVE_KERNEL_OPTION_HEADERS
39#include "opt_device_polling.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <machine/bus.h>
61#include <machine/resource.h>
62
63#include <net/bpf.h>
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_arp.h>
67#include <net/if_dl.h>
68#include <net/if_media.h>
69
70#include <net/if_types.h>
71#include <net/if_vlan_var.h>
72
73#include <netinet/in_systm.h>
74#include <netinet/in.h>
75#include <netinet/if_ether.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78#include <netinet/tcp.h>
79#include <netinet/udp.h>
80
81#include <machine/in_cksum.h>
82#include <dev/led/led.h>
83#include <dev/pci/pcivar.h>
84#include <dev/pci/pcireg.h>
85
86#include "e1000_api.h"
87#include "e1000_82571.h"
88#include "if_em.h"
89
90/*********************************************************************
91 *  Set this to one to display debug statistics
92 *********************************************************************/
93int	em_display_debug_stats = 0;
94
95/*********************************************************************
96 *  Driver version:
97 *********************************************************************/
98char em_driver_version[] = "7.3.8";
99
100/*********************************************************************
101 *  PCI Device ID Table
102 *
103 *  Used by probe to select devices to load on
104 *  Last field stores an index into e1000_strings
105 *  Last entry must be all 0s
106 *
107 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108 *********************************************************************/
109
110static em_vendor_info_t em_vendor_info_array[] =
111{
112	/* Intel(R) PRO/1000 Network Connection */
113	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127						PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132
133	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144						PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179						PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181						PCI_ANY_ID, PCI_ANY_ID, 0},
182	/* required last entry */
183	{ 0, 0, 0, 0, 0}
184};
185
186/*********************************************************************
187 *  Table of branding strings for all supported NICs.
188 *********************************************************************/
189
190static char *em_strings[] = {
191	"Intel(R) PRO/1000 Network Connection"
192};
193
194/*********************************************************************
195 *  Function prototypes
196 *********************************************************************/
197static int	em_probe(device_t);
198static int	em_attach(device_t);
199static int	em_detach(device_t);
200static int	em_shutdown(device_t);
201static int	em_suspend(device_t);
202static int	em_resume(device_t);
203#ifdef EM_MULTIQUEUE
204static int	em_mq_start(struct ifnet *, struct mbuf *);
205static int	em_mq_start_locked(struct ifnet *,
206		    struct tx_ring *, struct mbuf *);
207static void	em_qflush(struct ifnet *);
208#else
209static void	em_start(struct ifnet *);
210static void	em_start_locked(struct ifnet *, struct tx_ring *);
211#endif
212static int	em_ioctl(struct ifnet *, u_long, caddr_t);
213static void	em_init(void *);
214static void	em_init_locked(struct adapter *);
215static void	em_stop(void *);
216static void	em_media_status(struct ifnet *, struct ifmediareq *);
217static int	em_media_change(struct ifnet *);
218static void	em_identify_hardware(struct adapter *);
219static int	em_allocate_pci_resources(struct adapter *);
220static int	em_allocate_legacy(struct adapter *);
221static int	em_allocate_msix(struct adapter *);
222static int	em_allocate_queues(struct adapter *);
223static int	em_setup_msix(struct adapter *);
224static void	em_free_pci_resources(struct adapter *);
225static void	em_local_timer(void *);
226static void	em_reset(struct adapter *);
227static int	em_setup_interface(device_t, struct adapter *);
228
229static void	em_setup_transmit_structures(struct adapter *);
230static void	em_initialize_transmit_unit(struct adapter *);
231static int	em_allocate_transmit_buffers(struct tx_ring *);
232static void	em_free_transmit_structures(struct adapter *);
233static void	em_free_transmit_buffers(struct tx_ring *);
234
235static int	em_setup_receive_structures(struct adapter *);
236static int	em_allocate_receive_buffers(struct rx_ring *);
237static void	em_initialize_receive_unit(struct adapter *);
238static void	em_free_receive_structures(struct adapter *);
239static void	em_free_receive_buffers(struct rx_ring *);
240
241static void	em_enable_intr(struct adapter *);
242static void	em_disable_intr(struct adapter *);
243static void	em_update_stats_counters(struct adapter *);
244static void	em_add_hw_stats(struct adapter *adapter);
245static void	em_txeof(struct tx_ring *);
246static bool	em_rxeof(struct rx_ring *, int, int *);
247#ifndef __NO_STRICT_ALIGNMENT
248static int	em_fixup_rx(struct rx_ring *);
249#endif
250static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
251static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
252		    struct ip *, u32 *, u32 *);
253static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
254		    struct tcphdr *, u32 *, u32 *);
255static void	em_set_promisc(struct adapter *);
256static void	em_disable_promisc(struct adapter *);
257static void	em_set_multi(struct adapter *);
258static void	em_update_link_status(struct adapter *);
259static void	em_refresh_mbufs(struct rx_ring *, int);
260static void	em_register_vlan(void *, struct ifnet *, u16);
261static void	em_unregister_vlan(void *, struct ifnet *, u16);
262static void	em_setup_vlan_hw_support(struct adapter *);
263static int	em_xmit(struct tx_ring *, struct mbuf **);
264static int	em_dma_malloc(struct adapter *, bus_size_t,
265		    struct em_dma_alloc *, int);
266static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
267static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
268static void	em_print_nvm_info(struct adapter *);
269static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
270static void	em_print_debug_info(struct adapter *);
271static int 	em_is_valid_ether_addr(u8 *);
272static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
273static void	em_add_int_delay_sysctl(struct adapter *, const char *,
274		    const char *, struct em_int_delay_info *, int, int);
275/* Management and WOL Support */
276static void	em_init_manageability(struct adapter *);
277static void	em_release_manageability(struct adapter *);
278static void     em_get_hw_control(struct adapter *);
279static void     em_release_hw_control(struct adapter *);
280static void	em_get_wakeup(device_t);
281static void     em_enable_wakeup(device_t);
282static int	em_enable_phy_wakeup(struct adapter *);
283static void	em_led_func(void *, int);
284static void	em_disable_aspm(struct adapter *);
285
286static int	em_irq_fast(void *);
287
288/* MSIX handlers */
289static void	em_msix_tx(void *);
290static void	em_msix_rx(void *);
291static void	em_msix_link(void *);
292static void	em_handle_tx(void *context, int pending);
293static void	em_handle_rx(void *context, int pending);
294static void	em_handle_link(void *context, int pending);
295
296static void	em_set_sysctl_value(struct adapter *, const char *,
297		    const char *, int *, int);
298static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
299static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
300
301static __inline void em_rx_discard(struct rx_ring *, int);
302
303#ifdef DEVICE_POLLING
304static poll_handler_t em_poll;
305#endif /* POLLING */
306
307/*********************************************************************
308 *  FreeBSD Device Interface Entry Points
309 *********************************************************************/
310
311static device_method_t em_methods[] = {
312	/* Device interface */
313	DEVMETHOD(device_probe, em_probe),
314	DEVMETHOD(device_attach, em_attach),
315	DEVMETHOD(device_detach, em_detach),
316	DEVMETHOD(device_shutdown, em_shutdown),
317	DEVMETHOD(device_suspend, em_suspend),
318	DEVMETHOD(device_resume, em_resume),
319	DEVMETHOD_END
320};
321
322static driver_t em_driver = {
323	"em", em_methods, sizeof(struct adapter),
324};
325
326devclass_t em_devclass;
327DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
328MODULE_DEPEND(em, pci, 1, 1, 1);
329MODULE_DEPEND(em, ether, 1, 1, 1);
330
331/*********************************************************************
332 *  Tunable default values.
333 *********************************************************************/
334
335#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
336#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
337#define M_TSO_LEN			66
338
339#define MAX_INTS_PER_SEC	8000
340#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
341
342/* Allow common code without TSO */
343#ifndef CSUM_TSO
344#define CSUM_TSO	0
345#endif
346
347static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
348
349static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
350static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
351TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
352TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
353SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
354    0, "Default transmit interrupt delay in usecs");
355SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
356    0, "Default receive interrupt delay in usecs");
357
358static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
359static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
360TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
361TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
362SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
363    &em_tx_abs_int_delay_dflt, 0,
364    "Default transmit interrupt delay limit in usecs");
365SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
366    &em_rx_abs_int_delay_dflt, 0,
367    "Default receive interrupt delay limit in usecs");
368
369static int em_rxd = EM_DEFAULT_RXD;
370static int em_txd = EM_DEFAULT_TXD;
371TUNABLE_INT("hw.em.rxd", &em_rxd);
372TUNABLE_INT("hw.em.txd", &em_txd);
373SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
374    "Number of receive descriptors per queue");
375SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
376    "Number of transmit descriptors per queue");
377
378static int em_smart_pwr_down = FALSE;
379TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
380SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
381    0, "Set to true to leave smart power down enabled on newer adapters");
382
383/* Controls whether promiscuous also shows bad packets */
384static int em_debug_sbp = FALSE;
385TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
386SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
387    "Show bad packets in promiscuous mode");
388
389static int em_enable_msix = TRUE;
390TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
391SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
392    "Enable MSI-X interrupts");
393
394/* How many packets rxeof tries to clean at a time */
395static int em_rx_process_limit = 100;
396TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
397SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
398    &em_rx_process_limit, 0,
399    "Maximum number of received packets to process "
400    "at a time, -1 means unlimited");
401
402/* Energy efficient ethernet - default to OFF */
403static int eee_setting = 1;
404TUNABLE_INT("hw.em.eee_setting", &eee_setting);
405SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
406    "Enable Energy Efficient Ethernet");
407
408/* Global used in WOL setup with multiport cards */
409static int global_quad_port_a = 0;
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_em_netmap.h>
413#endif /* DEV_NETMAP */
414
415/*********************************************************************
416 *  Device identification routine
417 *
418 *  em_probe determines if the driver should be loaded on
419 *  adapter based on PCI vendor/device id of the adapter.
420 *
421 *  return BUS_PROBE_DEFAULT on success, positive on failure
422 *********************************************************************/
423
424static int
425em_probe(device_t dev)
426{
427	char		adapter_name[60];
428	u16		pci_vendor_id = 0;
429	u16		pci_device_id = 0;
430	u16		pci_subvendor_id = 0;
431	u16		pci_subdevice_id = 0;
432	em_vendor_info_t *ent;
433
434	INIT_DEBUGOUT("em_probe: begin");
435
436	pci_vendor_id = pci_get_vendor(dev);
437	if (pci_vendor_id != EM_VENDOR_ID)
438		return (ENXIO);
439
440	pci_device_id = pci_get_device(dev);
441	pci_subvendor_id = pci_get_subvendor(dev);
442	pci_subdevice_id = pci_get_subdevice(dev);
443
444	ent = em_vendor_info_array;
445	while (ent->vendor_id != 0) {
446		if ((pci_vendor_id == ent->vendor_id) &&
447		    (pci_device_id == ent->device_id) &&
448
449		    ((pci_subvendor_id == ent->subvendor_id) ||
450		    (ent->subvendor_id == PCI_ANY_ID)) &&
451
452		    ((pci_subdevice_id == ent->subdevice_id) ||
453		    (ent->subdevice_id == PCI_ANY_ID))) {
454			sprintf(adapter_name, "%s %s",
455				em_strings[ent->index],
456				em_driver_version);
457			device_set_desc_copy(dev, adapter_name);
458			return (BUS_PROBE_DEFAULT);
459		}
460		ent++;
461	}
462
463	return (ENXIO);
464}
465
466/*********************************************************************
467 *  Device initialization routine
468 *
469 *  The attach entry point is called when the driver is being loaded.
470 *  This routine identifies the type of hardware, allocates all resources
471 *  and initializes the hardware.
472 *
473 *  return 0 on success, positive on failure
474 *********************************************************************/
475
476static int
477em_attach(device_t dev)
478{
479	struct adapter	*adapter;
480	struct e1000_hw	*hw;
481	int		error = 0;
482
483	INIT_DEBUGOUT("em_attach: begin");
484
485	if (resource_disabled("em", device_get_unit(dev))) {
486		device_printf(dev, "Disabled by device hint\n");
487		return (ENXIO);
488	}
489
490	adapter = device_get_softc(dev);
491	adapter->dev = adapter->osdep.dev = dev;
492	hw = &adapter->hw;
493	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
494
495	/* SYSCTL stuff */
496	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
498	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
499	    em_sysctl_nvm_info, "I", "NVM Information");
500
501	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
502	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
503	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
504	    em_sysctl_debug_info, "I", "Debug Information");
505
506	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
508	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
509	    em_set_flowcntl, "I", "Flow Control");
510
511	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
512
513	/* Determine hardware and mac info */
514	em_identify_hardware(adapter);
515
516	/* Setup PCI resources */
517	if (em_allocate_pci_resources(adapter)) {
518		device_printf(dev, "Allocation of PCI resources failed\n");
519		error = ENXIO;
520		goto err_pci;
521	}
522
523	/*
524	** For ICH8 and family we need to
525	** map the flash memory, and this
526	** must happen after the MAC is
527	** identified
528	*/
529	if ((hw->mac.type == e1000_ich8lan) ||
530	    (hw->mac.type == e1000_ich9lan) ||
531	    (hw->mac.type == e1000_ich10lan) ||
532	    (hw->mac.type == e1000_pchlan) ||
533	    (hw->mac.type == e1000_pch2lan) ||
534	    (hw->mac.type == e1000_pch_lpt)) {
535		int rid = EM_BAR_TYPE_FLASH;
536		adapter->flash = bus_alloc_resource_any(dev,
537		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
538		if (adapter->flash == NULL) {
539			device_printf(dev, "Mapping of Flash failed\n");
540			error = ENXIO;
541			goto err_pci;
542		}
543		/* This is used in the shared code */
544		hw->flash_address = (u8 *)adapter->flash;
545		adapter->osdep.flash_bus_space_tag =
546		    rman_get_bustag(adapter->flash);
547		adapter->osdep.flash_bus_space_handle =
548		    rman_get_bushandle(adapter->flash);
549	}
550
551	/* Do Shared Code initialization */
552	if (e1000_setup_init_funcs(hw, TRUE)) {
553		device_printf(dev, "Setup of Shared code failed\n");
554		error = ENXIO;
555		goto err_pci;
556	}
557
558	e1000_get_bus_info(hw);
559
560	/* Set up some sysctls for the tunable interrupt delays */
561	em_add_int_delay_sysctl(adapter, "rx_int_delay",
562	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
563	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
564	em_add_int_delay_sysctl(adapter, "tx_int_delay",
565	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
566	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
567	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
568	    "receive interrupt delay limit in usecs",
569	    &adapter->rx_abs_int_delay,
570	    E1000_REGISTER(hw, E1000_RADV),
571	    em_rx_abs_int_delay_dflt);
572	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
573	    "transmit interrupt delay limit in usecs",
574	    &adapter->tx_abs_int_delay,
575	    E1000_REGISTER(hw, E1000_TADV),
576	    em_tx_abs_int_delay_dflt);
577	em_add_int_delay_sysctl(adapter, "itr",
578	    "interrupt delay limit in usecs/4",
579	    &adapter->tx_itr,
580	    E1000_REGISTER(hw, E1000_ITR),
581	    DEFAULT_ITR);
582
583	/* Sysctl for limiting the amount of work done in the taskqueue */
584	em_set_sysctl_value(adapter, "rx_processing_limit",
585	    "max number of rx packets to process", &adapter->rx_process_limit,
586	    em_rx_process_limit);
587
588	/*
589	 * Validate number of transmit and receive descriptors. It
590	 * must not exceed hardware maximum, and must be multiple
591	 * of E1000_DBA_ALIGN.
592	 */
593	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
594	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
595		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
596		    EM_DEFAULT_TXD, em_txd);
597		adapter->num_tx_desc = EM_DEFAULT_TXD;
598	} else
599		adapter->num_tx_desc = em_txd;
600
601	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
602	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
603		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
604		    EM_DEFAULT_RXD, em_rxd);
605		adapter->num_rx_desc = EM_DEFAULT_RXD;
606	} else
607		adapter->num_rx_desc = em_rxd;
608
609	hw->mac.autoneg = DO_AUTO_NEG;
610	hw->phy.autoneg_wait_to_complete = FALSE;
611	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
612
613	/* Copper options */
614	if (hw->phy.media_type == e1000_media_type_copper) {
615		hw->phy.mdix = AUTO_ALL_MODES;
616		hw->phy.disable_polarity_correction = FALSE;
617		hw->phy.ms_type = EM_MASTER_SLAVE;
618	}
619
620	/*
621	 * Set the frame limits assuming
622	 * standard ethernet sized frames.
623	 */
624	adapter->hw.mac.max_frame_size =
625	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
626
627	/*
628	 * This controls when hardware reports transmit completion
629	 * status.
630	 */
631	hw->mac.report_tx_early = 1;
632
633	/*
634	** Get queue/ring memory
635	*/
636	if (em_allocate_queues(adapter)) {
637		error = ENOMEM;
638		goto err_pci;
639	}
640
641	/* Allocate multicast array memory. */
642	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
643	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
644	if (adapter->mta == NULL) {
645		device_printf(dev, "Can not allocate multicast setup array\n");
646		error = ENOMEM;
647		goto err_late;
648	}
649
650	/* Check SOL/IDER usage */
651	if (e1000_check_reset_block(hw))
652		device_printf(dev, "PHY reset is blocked"
653		    " due to SOL/IDER session.\n");
654
655	/* Sysctl for setting Energy Efficient Ethernet */
656	hw->dev_spec.ich8lan.eee_disable = eee_setting;
657	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
658	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
659	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
660	    adapter, 0, em_sysctl_eee, "I",
661	    "Disable Energy Efficient Ethernet");
662
663	/*
664	** Start from a known state, this is
665	** important in reading the nvm and
666	** mac from that.
667	*/
668	e1000_reset_hw(hw);
669
670
671	/* Make sure we have a good EEPROM before we read from it */
672	if (e1000_validate_nvm_checksum(hw) < 0) {
673		/*
674		** Some PCI-E parts fail the first check due to
675		** the link being in sleep state, call it again,
676		** if it fails a second time its a real issue.
677		*/
678		if (e1000_validate_nvm_checksum(hw) < 0) {
679			device_printf(dev,
680			    "The EEPROM Checksum Is Not Valid\n");
681			error = EIO;
682			goto err_late;
683		}
684	}
685
686	/* Copy the permanent MAC address out of the EEPROM */
687	if (e1000_read_mac_addr(hw) < 0) {
688		device_printf(dev, "EEPROM read error while reading MAC"
689		    " address\n");
690		error = EIO;
691		goto err_late;
692	}
693
694	if (!em_is_valid_ether_addr(hw->mac.addr)) {
695		device_printf(dev, "Invalid MAC address\n");
696		error = EIO;
697		goto err_late;
698	}
699
700	/*
701	**  Do interrupt configuration
702	*/
703	if (adapter->msix > 1) /* Do MSIX */
704		error = em_allocate_msix(adapter);
705	else  /* MSI or Legacy */
706		error = em_allocate_legacy(adapter);
707	if (error)
708		goto err_late;
709
710	/*
711	 * Get Wake-on-Lan and Management info for later use
712	 */
713	em_get_wakeup(dev);
714
715	/* Setup OS specific network interface */
716	if (em_setup_interface(dev, adapter) != 0)
717		goto err_late;
718
719	em_reset(adapter);
720
721	/* Initialize statistics */
722	em_update_stats_counters(adapter);
723
724	hw->mac.get_link_status = 1;
725	em_update_link_status(adapter);
726
727	/* Register for VLAN events */
728	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
729	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
730	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
731	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732
733	em_add_hw_stats(adapter);
734
735	/* Non-AMT based hardware can now take control from firmware */
736	if (adapter->has_manage && !adapter->has_amt)
737		em_get_hw_control(adapter);
738
739	/* Tell the stack that the interface is not active */
740	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
741	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
742
743	adapter->led_dev = led_create(em_led_func, adapter,
744	    device_get_nameunit(dev));
745#ifdef DEV_NETMAP
746	em_netmap_attach(adapter);
747#endif /* DEV_NETMAP */
748
749	INIT_DEBUGOUT("em_attach: end");
750
751	return (0);
752
753err_late:
754	em_free_transmit_structures(adapter);
755	em_free_receive_structures(adapter);
756	em_release_hw_control(adapter);
757	if (adapter->ifp != NULL)
758		if_free(adapter->ifp);
759err_pci:
760	em_free_pci_resources(adapter);
761	free(adapter->mta, M_DEVBUF);
762	EM_CORE_LOCK_DESTROY(adapter);
763
764	return (error);
765}
766
767/*********************************************************************
768 *  Device removal routine
769 *
770 *  The detach entry point is called when the driver is being removed.
771 *  This routine stops the adapter and deallocates all the resources
772 *  that were allocated for driver operation.
773 *
774 *  return 0 on success, positive on failure
775 *********************************************************************/
776
777static int
778em_detach(device_t dev)
779{
780	struct adapter	*adapter = device_get_softc(dev);
781	struct ifnet	*ifp = adapter->ifp;
782
783	INIT_DEBUGOUT("em_detach: begin");
784
785	/* Make sure VLANS are not using driver */
786	if (adapter->ifp->if_vlantrunk != NULL) {
787		device_printf(dev,"Vlan in use, detach first\n");
788		return (EBUSY);
789	}
790
791#ifdef DEVICE_POLLING
792	if (ifp->if_capenable & IFCAP_POLLING)
793		ether_poll_deregister(ifp);
794#endif
795
796	if (adapter->led_dev != NULL)
797		led_destroy(adapter->led_dev);
798
799	EM_CORE_LOCK(adapter);
800	adapter->in_detach = 1;
801	em_stop(adapter);
802	EM_CORE_UNLOCK(adapter);
803	EM_CORE_LOCK_DESTROY(adapter);
804
805	e1000_phy_hw_reset(&adapter->hw);
806
807	em_release_manageability(adapter);
808	em_release_hw_control(adapter);
809
810	/* Unregister VLAN events */
811	if (adapter->vlan_attach != NULL)
812		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
813	if (adapter->vlan_detach != NULL)
814		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
815
816	ether_ifdetach(adapter->ifp);
817	callout_drain(&adapter->timer);
818
819#ifdef DEV_NETMAP
820	netmap_detach(ifp);
821#endif /* DEV_NETMAP */
822
823	em_free_pci_resources(adapter);
824	bus_generic_detach(dev);
825	if_free(ifp);
826
827	em_free_transmit_structures(adapter);
828	em_free_receive_structures(adapter);
829
830	em_release_hw_control(adapter);
831	free(adapter->mta, M_DEVBUF);
832
833	return (0);
834}
835
836/*********************************************************************
837 *
838 *  Shutdown entry point
839 *
840 **********************************************************************/
841
842static int
843em_shutdown(device_t dev)
844{
845	return em_suspend(dev);
846}
847
848/*
849 * Suspend/resume device methods.
850 */
851static int
852em_suspend(device_t dev)
853{
854	struct adapter *adapter = device_get_softc(dev);
855
856	EM_CORE_LOCK(adapter);
857
858        em_release_manageability(adapter);
859	em_release_hw_control(adapter);
860	em_enable_wakeup(dev);
861
862	EM_CORE_UNLOCK(adapter);
863
864	return bus_generic_suspend(dev);
865}
866
867static int
868em_resume(device_t dev)
869{
870	struct adapter *adapter = device_get_softc(dev);
871	struct tx_ring	*txr = adapter->tx_rings;
872	struct ifnet *ifp = adapter->ifp;
873
874	EM_CORE_LOCK(adapter);
875	if (adapter->hw.mac.type == e1000_pch2lan)
876		e1000_resume_workarounds_pchlan(&adapter->hw);
877	em_init_locked(adapter);
878	em_init_manageability(adapter);
879
880	if ((ifp->if_flags & IFF_UP) &&
881	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
882		for (int i = 0; i < adapter->num_queues; i++, txr++) {
883			EM_TX_LOCK(txr);
884#ifdef EM_MULTIQUEUE
885			if (!drbr_empty(ifp, txr->br))
886				em_mq_start_locked(ifp, txr, NULL);
887#else
888			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
889				em_start_locked(ifp, txr);
890#endif
891			EM_TX_UNLOCK(txr);
892		}
893	}
894	EM_CORE_UNLOCK(adapter);
895
896	return bus_generic_resume(dev);
897}
898
899
900#ifdef EM_MULTIQUEUE
901/*********************************************************************
902 *  Multiqueue Transmit routines
903 *
904 *  em_mq_start is called by the stack to initiate a transmit.
905 *  however, if busy the driver can queue the request rather
906 *  than do an immediate send. It is this that is an advantage
907 *  in this driver, rather than also having multiple tx queues.
908 **********************************************************************/
909static int
910em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911{
912	struct adapter  *adapter = txr->adapter;
913        struct mbuf     *next;
914        int             err = 0, enq = 0;
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m != NULL) {
925		err = drbr_enqueue(ifp, txr->br, m);
926		if (err)
927			return (err);
928	}
929
930	/* Process the queue */
931	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
932		if ((err = em_xmit(txr, &next)) != 0) {
933			if (next == NULL)
934				drbr_advance(ifp, txr->br);
935			else
936				drbr_putback(ifp, txr->br, next);
937			break;
938		}
939		drbr_advance(ifp, txr->br);
940		enq++;
941		ifp->if_obytes += next->m_pkthdr.len;
942		if (next->m_flags & M_MCAST)
943			ifp->if_omcasts++;
944		ETHER_BPF_MTAP(ifp, next);
945		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                        break;
947	}
948
949	if (enq > 0) {
950                /* Set the watchdog */
951                txr->queue_status = EM_QUEUE_WORKING;
952		txr->watchdog_time = ticks;
953	}
954
955	if (txr->tx_avail < EM_MAX_SCATTER)
956		em_txeof(txr);
957	if (txr->tx_avail < EM_MAX_SCATTER)
958		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
959	return (err);
960}
961
962/*
963** Multiqueue capable stack interface
964*/
965static int
966em_mq_start(struct ifnet *ifp, struct mbuf *m)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970	int 		error;
971
972	if (EM_TX_TRYLOCK(txr)) {
973		error = em_mq_start_locked(ifp, txr, m);
974		EM_TX_UNLOCK(txr);
975	} else
976		error = drbr_enqueue(ifp, txr->br, m);
977
978	return (error);
979}
980
981/*
982** Flush all ring buffers
983*/
984static void
985em_qflush(struct ifnet *ifp)
986{
987	struct adapter  *adapter = ifp->if_softc;
988	struct tx_ring  *txr = adapter->tx_rings;
989	struct mbuf     *m;
990
991	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992		EM_TX_LOCK(txr);
993		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994			m_freem(m);
995		EM_TX_UNLOCK(txr);
996	}
997	if_qflush(ifp);
998}
999#else  /* !EM_MULTIQUEUE */
1000
1001static void
1002em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1003{
1004	struct adapter	*adapter = ifp->if_softc;
1005	struct mbuf	*m_head;
1006
1007	EM_TX_LOCK_ASSERT(txr);
1008
1009	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1010	    IFF_DRV_RUNNING)
1011		return;
1012
1013	if (!adapter->link_active)
1014		return;
1015
1016	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1017        	/* Call cleanup if number of TX descriptors low */
1018		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1019			em_txeof(txr);
1020		if (txr->tx_avail < EM_MAX_SCATTER) {
1021			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1022			break;
1023		}
1024                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1025		if (m_head == NULL)
1026			break;
1027		/*
1028		 *  Encapsulation can modify our pointer, and or make it
1029		 *  NULL on failure.  In that event, we can't requeue.
1030		 */
1031		if (em_xmit(txr, &m_head)) {
1032			if (m_head == NULL)
1033				break;
1034			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1035			break;
1036		}
1037
1038		/* Send a copy of the frame to the BPF listener */
1039		ETHER_BPF_MTAP(ifp, m_head);
1040
1041		/* Set timeout in case hardware has problems transmitting. */
1042		txr->watchdog_time = ticks;
1043                txr->queue_status = EM_QUEUE_WORKING;
1044	}
1045
1046	return;
1047}
1048
1049static void
1050em_start(struct ifnet *ifp)
1051{
1052	struct adapter	*adapter = ifp->if_softc;
1053	struct tx_ring	*txr = adapter->tx_rings;
1054
1055	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1056		EM_TX_LOCK(txr);
1057		em_start_locked(ifp, txr);
1058		EM_TX_UNLOCK(txr);
1059	}
1060	return;
1061}
1062#endif /* EM_MULTIQUEUE */
1063
1064/*********************************************************************
1065 *  Ioctl entry point
1066 *
1067 *  em_ioctl is called when the user wants to configure the
1068 *  interface.
1069 *
1070 *  return 0 on success, positive on failure
1071 **********************************************************************/
1072
1073static int
1074em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1075{
1076	struct adapter	*adapter = ifp->if_softc;
1077	struct ifreq	*ifr = (struct ifreq *)data;
1078#if defined(INET) || defined(INET6)
1079	struct ifaddr	*ifa = (struct ifaddr *)data;
1080#endif
1081	bool		avoid_reset = FALSE;
1082	int		error = 0;
1083
1084	if (adapter->in_detach)
1085		return (error);
1086
1087	switch (command) {
1088	case SIOCSIFADDR:
1089#ifdef INET
1090		if (ifa->ifa_addr->sa_family == AF_INET)
1091			avoid_reset = TRUE;
1092#endif
1093#ifdef INET6
1094		if (ifa->ifa_addr->sa_family == AF_INET6)
1095			avoid_reset = TRUE;
1096#endif
1097		/*
1098		** Calling init results in link renegotiation,
1099		** so we avoid doing it when possible.
1100		*/
1101		if (avoid_reset) {
1102			ifp->if_flags |= IFF_UP;
1103			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1104				em_init(adapter);
1105#ifdef INET
1106			if (!(ifp->if_flags & IFF_NOARP))
1107				arp_ifinit(ifp, ifa);
1108#endif
1109		} else
1110			error = ether_ioctl(ifp, command, data);
1111		break;
1112	case SIOCSIFMTU:
1113	    {
1114		int max_frame_size;
1115
1116		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1117
1118		EM_CORE_LOCK(adapter);
1119		switch (adapter->hw.mac.type) {
1120		case e1000_82571:
1121		case e1000_82572:
1122		case e1000_ich9lan:
1123		case e1000_ich10lan:
1124		case e1000_pch2lan:
1125		case e1000_pch_lpt:
1126		case e1000_82574:
1127		case e1000_82583:
1128		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1129			max_frame_size = 9234;
1130			break;
1131		case e1000_pchlan:
1132			max_frame_size = 4096;
1133			break;
1134			/* Adapters that do not support jumbo frames */
1135		case e1000_ich8lan:
1136			max_frame_size = ETHER_MAX_LEN;
1137			break;
1138		default:
1139			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1140		}
1141		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1142		    ETHER_CRC_LEN) {
1143			EM_CORE_UNLOCK(adapter);
1144			error = EINVAL;
1145			break;
1146		}
1147
1148		ifp->if_mtu = ifr->ifr_mtu;
1149		adapter->hw.mac.max_frame_size =
1150		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1151		em_init_locked(adapter);
1152		EM_CORE_UNLOCK(adapter);
1153		break;
1154	    }
1155	case SIOCSIFFLAGS:
1156		IOCTL_DEBUGOUT("ioctl rcv'd:\
1157		    SIOCSIFFLAGS (Set Interface Flags)");
1158		EM_CORE_LOCK(adapter);
1159		if (ifp->if_flags & IFF_UP) {
1160			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1161				if ((ifp->if_flags ^ adapter->if_flags) &
1162				    (IFF_PROMISC | IFF_ALLMULTI)) {
1163					em_disable_promisc(adapter);
1164					em_set_promisc(adapter);
1165				}
1166			} else
1167				em_init_locked(adapter);
1168		} else
1169			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1170				em_stop(adapter);
1171		adapter->if_flags = ifp->if_flags;
1172		EM_CORE_UNLOCK(adapter);
1173		break;
1174	case SIOCADDMULTI:
1175	case SIOCDELMULTI:
1176		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1177		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1178			EM_CORE_LOCK(adapter);
1179			em_disable_intr(adapter);
1180			em_set_multi(adapter);
1181#ifdef DEVICE_POLLING
1182			if (!(ifp->if_capenable & IFCAP_POLLING))
1183#endif
1184				em_enable_intr(adapter);
1185			EM_CORE_UNLOCK(adapter);
1186		}
1187		break;
1188	case SIOCSIFMEDIA:
1189		/* Check SOL/IDER usage */
1190		EM_CORE_LOCK(adapter);
1191		if (e1000_check_reset_block(&adapter->hw)) {
1192			EM_CORE_UNLOCK(adapter);
1193			device_printf(adapter->dev, "Media change is"
1194			    " blocked due to SOL/IDER session.\n");
1195			break;
1196		}
1197		EM_CORE_UNLOCK(adapter);
1198		/* falls thru */
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(em_poll, ifp);
1215				if (error)
1216					return (error);
1217				EM_CORE_LOCK(adapter);
1218				em_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				EM_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				EM_CORE_LOCK(adapter);
1225				em_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				EM_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_VLAN_HWTAGGING) {
1240			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWFILTER) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWTSO) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1249			reinit = 1;
1250		}
1251		if ((mask & IFCAP_WOL) &&
1252		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1253			if (mask & IFCAP_WOL_MCAST)
1254				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1255			if (mask & IFCAP_WOL_MAGIC)
1256				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1257		}
1258		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1259			em_init(adapter);
1260		VLAN_CAPABILITIES(ifp);
1261		break;
1262	    }
1263
1264	default:
1265		error = ether_ioctl(ifp, command, data);
1266		break;
1267	}
1268
1269	return (error);
1270}
1271
1272
1273/*********************************************************************
1274 *  Init entry point
1275 *
1276 *  This routine is used in two ways. It is used by the stack as
1277 *  init entry point in network interface structure. It is also used
1278 *  by the driver as a hw/sw initialization routine to get to a
1279 *  consistent state.
1280 *
1281 *  return 0 on success, positive on failure
1282 **********************************************************************/
1283
1284static void
1285em_init_locked(struct adapter *adapter)
1286{
1287	struct ifnet	*ifp = adapter->ifp;
1288	device_t	dev = adapter->dev;
1289
1290	INIT_DEBUGOUT("em_init: begin");
1291
1292	EM_CORE_LOCK_ASSERT(adapter);
1293
1294	em_disable_intr(adapter);
1295	callout_stop(&adapter->timer);
1296
1297	/* Get the latest mac address, User can use a LAA */
1298        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1299              ETHER_ADDR_LEN);
1300
1301	/* Put the address into the Receive Address Array */
1302	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1303
1304	/*
1305	 * With the 82571 adapter, RAR[0] may be overwritten
1306	 * when the other port is reset, we make a duplicate
1307	 * in RAR[14] for that eventuality, this assures
1308	 * the interface continues to function.
1309	 */
1310	if (adapter->hw.mac.type == e1000_82571) {
1311		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1312		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1313		    E1000_RAR_ENTRIES - 1);
1314	}
1315
1316	/* Initialize the hardware */
1317	em_reset(adapter);
1318	em_update_link_status(adapter);
1319
1320	/* Setup VLAN support, basic and offload if available */
1321	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1322
1323	/* Set hardware offload abilities */
1324	ifp->if_hwassist = 0;
1325	if (ifp->if_capenable & IFCAP_TXCSUM)
1326		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1327	if (ifp->if_capenable & IFCAP_TSO4)
1328		ifp->if_hwassist |= CSUM_TSO;
1329
1330	/* Configure for OS presence */
1331	em_init_manageability(adapter);
1332
1333	/* Prepare transmit descriptors and buffers */
1334	em_setup_transmit_structures(adapter);
1335	em_initialize_transmit_unit(adapter);
1336
1337	/* Setup Multicast table */
1338	em_set_multi(adapter);
1339
1340	/*
1341	** Figure out the desired mbuf
1342	** pool for doing jumbos
1343	*/
1344	if (adapter->hw.mac.max_frame_size <= 2048)
1345		adapter->rx_mbuf_sz = MCLBYTES;
1346	else if (adapter->hw.mac.max_frame_size <= 4096)
1347		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1348	else
1349		adapter->rx_mbuf_sz = MJUM9BYTES;
1350
1351	/* Prepare receive descriptors and buffers */
1352	if (em_setup_receive_structures(adapter)) {
1353		device_printf(dev, "Could not setup receive structures\n");
1354		em_stop(adapter);
1355		return;
1356	}
1357	em_initialize_receive_unit(adapter);
1358
1359	/* Use real VLAN Filter support? */
1360	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1361		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1362			/* Use real VLAN Filter support */
1363			em_setup_vlan_hw_support(adapter);
1364		else {
1365			u32 ctrl;
1366			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1367			ctrl |= E1000_CTRL_VME;
1368			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1369		}
1370	}
1371
1372	/* Don't lose promiscuous settings */
1373	em_set_promisc(adapter);
1374
1375	/* Set the interface as ACTIVE */
1376	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1377	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1378
1379	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1380	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1381
1382	/* MSI/X configuration for 82574 */
1383	if (adapter->hw.mac.type == e1000_82574) {
1384		int tmp;
1385		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1386		tmp |= E1000_CTRL_EXT_PBA_CLR;
1387		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1388		/* Set the IVAR - interrupt vector routing. */
1389		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1390	}
1391
1392#ifdef DEVICE_POLLING
1393	/*
1394	 * Only enable interrupts if we are not polling, make sure
1395	 * they are off otherwise.
1396	 */
1397	if (ifp->if_capenable & IFCAP_POLLING)
1398		em_disable_intr(adapter);
1399	else
1400#endif /* DEVICE_POLLING */
1401		em_enable_intr(adapter);
1402
1403	/* AMT based hardware can now take control from firmware */
1404	if (adapter->has_manage && adapter->has_amt)
1405		em_get_hw_control(adapter);
1406}
1407
1408static void
1409em_init(void *arg)
1410{
1411	struct adapter *adapter = arg;
1412
1413	EM_CORE_LOCK(adapter);
1414	em_init_locked(adapter);
1415	EM_CORE_UNLOCK(adapter);
1416}
1417
1418
1419#ifdef DEVICE_POLLING
1420/*********************************************************************
1421 *
1422 *  Legacy polling routine: note this only works with single queue
1423 *
1424 *********************************************************************/
1425static int
1426em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1427{
1428	struct adapter *adapter = ifp->if_softc;
1429	struct tx_ring	*txr = adapter->tx_rings;
1430	struct rx_ring	*rxr = adapter->rx_rings;
1431	u32		reg_icr;
1432	int		rx_done;
1433
1434	EM_CORE_LOCK(adapter);
1435	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1436		EM_CORE_UNLOCK(adapter);
1437		return (0);
1438	}
1439
1440	if (cmd == POLL_AND_CHECK_STATUS) {
1441		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1442		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1443			callout_stop(&adapter->timer);
1444			adapter->hw.mac.get_link_status = 1;
1445			em_update_link_status(adapter);
1446			callout_reset(&adapter->timer, hz,
1447			    em_local_timer, adapter);
1448		}
1449	}
1450	EM_CORE_UNLOCK(adapter);
1451
1452	em_rxeof(rxr, count, &rx_done);
1453
1454	EM_TX_LOCK(txr);
1455	em_txeof(txr);
1456#ifdef EM_MULTIQUEUE
1457	if (!drbr_empty(ifp, txr->br))
1458		em_mq_start_locked(ifp, txr, NULL);
1459#else
1460	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1461		em_start_locked(ifp, txr);
1462#endif
1463	EM_TX_UNLOCK(txr);
1464
1465	return (rx_done);
1466}
1467#endif /* DEVICE_POLLING */
1468
1469
1470/*********************************************************************
1471 *
1472 *  Fast Legacy/MSI Combined Interrupt Service routine
1473 *
1474 *********************************************************************/
1475static int
1476em_irq_fast(void *arg)
1477{
1478	struct adapter	*adapter = arg;
1479	struct ifnet	*ifp;
1480	u32		reg_icr;
1481
1482	ifp = adapter->ifp;
1483
1484	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485
1486	/* Hot eject?  */
1487	if (reg_icr == 0xffffffff)
1488		return FILTER_STRAY;
1489
1490	/* Definitely not our interrupt.  */
1491	if (reg_icr == 0x0)
1492		return FILTER_STRAY;
1493
1494	/*
1495	 * Starting with the 82571 chip, bit 31 should be used to
1496	 * determine whether the interrupt belongs to us.
1497	 */
1498	if (adapter->hw.mac.type >= e1000_82571 &&
1499	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1500		return FILTER_STRAY;
1501
1502	em_disable_intr(adapter);
1503	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1504
1505	/* Link status change */
1506	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507		adapter->hw.mac.get_link_status = 1;
1508		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509	}
1510
1511	if (reg_icr & E1000_ICR_RXO)
1512		adapter->rx_overruns++;
1513	return FILTER_HANDLED;
1514}
1515
1516/* Combined RX/TX handler, used by Legacy and MSI */
1517static void
1518em_handle_que(void *context, int pending)
1519{
1520	struct adapter	*adapter = context;
1521	struct ifnet	*ifp = adapter->ifp;
1522	struct tx_ring	*txr = adapter->tx_rings;
1523	struct rx_ring	*rxr = adapter->rx_rings;
1524
1525
1526	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1527		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1528		EM_TX_LOCK(txr);
1529		em_txeof(txr);
1530#ifdef EM_MULTIQUEUE
1531		if (!drbr_empty(ifp, txr->br))
1532			em_mq_start_locked(ifp, txr, NULL);
1533#else
1534		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1535			em_start_locked(ifp, txr);
1536#endif
1537		EM_TX_UNLOCK(txr);
1538		if (more) {
1539			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1540			return;
1541		}
1542	}
1543
1544	em_enable_intr(adapter);
1545	return;
1546}
1547
1548
1549/*********************************************************************
1550 *
1551 *  MSIX Interrupt Service Routines
1552 *
1553 **********************************************************************/
1554static void
1555em_msix_tx(void *arg)
1556{
1557	struct tx_ring *txr = arg;
1558	struct adapter *adapter = txr->adapter;
1559	struct ifnet	*ifp = adapter->ifp;
1560
1561	++txr->tx_irq;
1562	EM_TX_LOCK(txr);
1563	em_txeof(txr);
1564#ifdef EM_MULTIQUEUE
1565	if (!drbr_empty(ifp, txr->br))
1566		em_mq_start_locked(ifp, txr, NULL);
1567#else
1568	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569		em_start_locked(ifp, txr);
1570#endif
1571	/* Reenable this interrupt */
1572	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1573	EM_TX_UNLOCK(txr);
1574	return;
1575}
1576
1577/*********************************************************************
1578 *
1579 *  MSIX RX Interrupt Service routine
1580 *
1581 **********************************************************************/
1582
1583static void
1584em_msix_rx(void *arg)
1585{
1586	struct rx_ring	*rxr = arg;
1587	struct adapter	*adapter = rxr->adapter;
1588	bool		more;
1589
1590	++rxr->rx_irq;
1591	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1592		return;
1593	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1594	if (more)
1595		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1596	else
1597		/* Reenable this interrupt */
1598		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1599	return;
1600}
1601
1602/*********************************************************************
1603 *
1604 *  MSIX Link Fast Interrupt Service routine
1605 *
1606 **********************************************************************/
1607static void
1608em_msix_link(void *arg)
1609{
1610	struct adapter	*adapter = arg;
1611	u32		reg_icr;
1612
1613	++adapter->link_irq;
1614	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1615
1616	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1617		adapter->hw.mac.get_link_status = 1;
1618		em_handle_link(adapter, 0);
1619	} else
1620		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1621		    EM_MSIX_LINK | E1000_IMS_LSC);
1622	return;
1623}
1624
1625static void
1626em_handle_rx(void *context, int pending)
1627{
1628	struct rx_ring	*rxr = context;
1629	struct adapter	*adapter = rxr->adapter;
1630        bool            more;
1631
1632	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1633	if (more)
1634		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1635	else
1636		/* Reenable this interrupt */
1637		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1638}
1639
1640static void
1641em_handle_tx(void *context, int pending)
1642{
1643	struct tx_ring	*txr = context;
1644	struct adapter	*adapter = txr->adapter;
1645	struct ifnet	*ifp = adapter->ifp;
1646
1647	EM_TX_LOCK(txr);
1648	em_txeof(txr);
1649#ifdef EM_MULTIQUEUE
1650	if (!drbr_empty(ifp, txr->br))
1651		em_mq_start_locked(ifp, txr, NULL);
1652#else
1653	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1654		em_start_locked(ifp, txr);
1655#endif
1656	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657	EM_TX_UNLOCK(txr);
1658}
1659
1660static void
1661em_handle_link(void *context, int pending)
1662{
1663	struct adapter	*adapter = context;
1664	struct tx_ring	*txr = adapter->tx_rings;
1665	struct ifnet *ifp = adapter->ifp;
1666
1667	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1668		return;
1669
1670	EM_CORE_LOCK(adapter);
1671	callout_stop(&adapter->timer);
1672	em_update_link_status(adapter);
1673	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1674	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1675	    EM_MSIX_LINK | E1000_IMS_LSC);
1676	if (adapter->link_active) {
1677		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1678			EM_TX_LOCK(txr);
1679#ifdef EM_MULTIQUEUE
1680			if (!drbr_empty(ifp, txr->br))
1681				em_mq_start_locked(ifp, txr, NULL);
1682#else
1683			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1684				em_start_locked(ifp, txr);
1685#endif
1686			EM_TX_UNLOCK(txr);
1687		}
1688	}
1689	EM_CORE_UNLOCK(adapter);
1690}
1691
1692
1693/*********************************************************************
1694 *
1695 *  Media Ioctl callback
1696 *
1697 *  This routine is called whenever the user queries the status of
1698 *  the interface using ifconfig.
1699 *
1700 **********************************************************************/
1701static void
1702em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1703{
1704	struct adapter *adapter = ifp->if_softc;
1705	u_char fiber_type = IFM_1000_SX;
1706
1707	INIT_DEBUGOUT("em_media_status: begin");
1708
1709	EM_CORE_LOCK(adapter);
1710	em_update_link_status(adapter);
1711
1712	ifmr->ifm_status = IFM_AVALID;
1713	ifmr->ifm_active = IFM_ETHER;
1714
1715	if (!adapter->link_active) {
1716		EM_CORE_UNLOCK(adapter);
1717		return;
1718	}
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1724		ifmr->ifm_active |= fiber_type | IFM_FDX;
1725	} else {
1726		switch (adapter->link_speed) {
1727		case 10:
1728			ifmr->ifm_active |= IFM_10_T;
1729			break;
1730		case 100:
1731			ifmr->ifm_active |= IFM_100_TX;
1732			break;
1733		case 1000:
1734			ifmr->ifm_active |= IFM_1000_T;
1735			break;
1736		}
1737		if (adapter->link_duplex == FULL_DUPLEX)
1738			ifmr->ifm_active |= IFM_FDX;
1739		else
1740			ifmr->ifm_active |= IFM_HDX;
1741	}
1742	EM_CORE_UNLOCK(adapter);
1743}
1744
1745/*********************************************************************
1746 *
1747 *  Media Ioctl callback
1748 *
1749 *  This routine is called when the user changes speed/duplex using
1750 *  media/mediopt option with ifconfig.
1751 *
1752 **********************************************************************/
1753static int
1754em_media_change(struct ifnet *ifp)
1755{
1756	struct adapter *adapter = ifp->if_softc;
1757	struct ifmedia  *ifm = &adapter->media;
1758
1759	INIT_DEBUGOUT("em_media_change: begin");
1760
1761	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1762		return (EINVAL);
1763
1764	EM_CORE_LOCK(adapter);
1765	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1766	case IFM_AUTO:
1767		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1768		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1769		break;
1770	case IFM_1000_LX:
1771	case IFM_1000_SX:
1772	case IFM_1000_T:
1773		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1774		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1775		break;
1776	case IFM_100_TX:
1777		adapter->hw.mac.autoneg = FALSE;
1778		adapter->hw.phy.autoneg_advertised = 0;
1779		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1780			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1781		else
1782			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1783		break;
1784	case IFM_10_T:
1785		adapter->hw.mac.autoneg = FALSE;
1786		adapter->hw.phy.autoneg_advertised = 0;
1787		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1788			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1789		else
1790			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1791		break;
1792	default:
1793		device_printf(adapter->dev, "Unsupported media type\n");
1794	}
1795
1796	em_init_locked(adapter);
1797	EM_CORE_UNLOCK(adapter);
1798
1799	return (0);
1800}
1801
1802/*********************************************************************
1803 *
1804 *  This routine maps the mbufs to tx descriptors.
1805 *
1806 *  return 0 on success, positive on failure
1807 **********************************************************************/
1808
1809static int
1810em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1811{
1812	struct adapter		*adapter = txr->adapter;
1813	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1814	bus_dmamap_t		map;
1815	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1816	struct e1000_tx_desc	*ctxd = NULL;
1817	struct mbuf		*m_head;
1818	struct ether_header	*eh;
1819	struct ip		*ip = NULL;
1820	struct tcphdr		*tp = NULL;
1821	u32			txd_upper, txd_lower, txd_used, txd_saved;
1822	int			ip_off, poff;
1823	int			nsegs, i, j, first, last = 0;
1824	int			error, do_tso, tso_desc = 0, remap = 1;
1825
1826retry:
1827	m_head = *m_headp;
1828	txd_upper = txd_lower = txd_used = txd_saved = 0;
1829	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1830	ip_off = poff = 0;
1831
1832	/*
1833	 * Intel recommends entire IP/TCP header length reside in a single
1834	 * buffer. If multiple descriptors are used to describe the IP and
1835	 * TCP header, each descriptor should describe one or more
1836	 * complete headers; descriptors referencing only parts of headers
1837	 * are not supported. If all layer headers are not coalesced into
1838	 * a single buffer, each buffer should not cross a 4KB boundary,
1839	 * or be larger than the maximum read request size.
1840	 * Controller also requires modifing IP/TCP header to make TSO work
1841	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1842	 * IP/TCP header into a single buffer to meet the requirement of
1843	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1844	 * which also has similiar restrictions.
1845	 */
1846	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1847		if (do_tso || (m_head->m_next != NULL &&
1848		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1849			if (M_WRITABLE(*m_headp) == 0) {
1850				m_head = m_dup(*m_headp, M_NOWAIT);
1851				m_freem(*m_headp);
1852				if (m_head == NULL) {
1853					*m_headp = NULL;
1854					return (ENOBUFS);
1855				}
1856				*m_headp = m_head;
1857			}
1858		}
1859		/*
1860		 * XXX
1861		 * Assume IPv4, we don't have TSO/checksum offload support
1862		 * for IPv6 yet.
1863		 */
1864		ip_off = sizeof(struct ether_header);
1865		m_head = m_pullup(m_head, ip_off);
1866		if (m_head == NULL) {
1867			*m_headp = NULL;
1868			return (ENOBUFS);
1869		}
1870		eh = mtod(m_head, struct ether_header *);
1871		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1872			ip_off = sizeof(struct ether_vlan_header);
1873			m_head = m_pullup(m_head, ip_off);
1874			if (m_head == NULL) {
1875				*m_headp = NULL;
1876				return (ENOBUFS);
1877			}
1878		}
1879		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1880		if (m_head == NULL) {
1881			*m_headp = NULL;
1882			return (ENOBUFS);
1883		}
1884		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1885		poff = ip_off + (ip->ip_hl << 2);
1886		if (do_tso) {
1887			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1888			if (m_head == NULL) {
1889				*m_headp = NULL;
1890				return (ENOBUFS);
1891			}
1892			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			/*
1894			 * TSO workaround:
1895			 *   pull 4 more bytes of data into it.
1896			 */
1897			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1898			if (m_head == NULL) {
1899				*m_headp = NULL;
1900				return (ENOBUFS);
1901			}
1902			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903			ip->ip_len = 0;
1904			ip->ip_sum = 0;
1905			/*
1906			 * The pseudo TCP checksum does not include TCP payload
1907			 * length so driver should recompute the checksum here
1908			 * what hardware expect to see. This is adherence of
1909			 * Microsoft's Large Send specification.
1910			 */
1911			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1912			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1913			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1914		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1915			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1916			if (m_head == NULL) {
1917				*m_headp = NULL;
1918				return (ENOBUFS);
1919			}
1920			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1921			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1927			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1928		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1929			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1930			if (m_head == NULL) {
1931				*m_headp = NULL;
1932				return (ENOBUFS);
1933			}
1934			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1935		}
1936		*m_headp = m_head;
1937	}
1938
1939	/*
1940	 * Map the packet for DMA
1941	 *
1942	 * Capture the first descriptor index,
1943	 * this descriptor will have the index
1944	 * of the EOP which is the only one that
1945	 * now gets a DONE bit writeback.
1946	 */
1947	first = txr->next_avail_desc;
1948	tx_buffer = &txr->tx_buffers[first];
1949	tx_buffer_mapped = tx_buffer;
1950	map = tx_buffer->map;
1951
1952	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1953	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1954
1955	/*
1956	 * There are two types of errors we can (try) to handle:
1957	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1958	 *   out of segments.  Defragment the mbuf chain and try again.
1959	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1960	 *   at this point in time.  Defer sending and try again later.
1961	 * All other errors, in particular EINVAL, are fatal and prevent the
1962	 * mbuf chain from ever going through.  Drop it and report error.
1963	 */
1964	if (error == EFBIG && remap) {
1965		struct mbuf *m;
1966
1967		m = m_defrag(*m_headp, M_NOWAIT);
1968		if (m == NULL) {
1969			adapter->mbuf_alloc_failed++;
1970			m_freem(*m_headp);
1971			*m_headp = NULL;
1972			return (ENOBUFS);
1973		}
1974		*m_headp = m;
1975
1976		/* Try it again, but only once */
1977		remap = 0;
1978		goto retry;
1979	} else if (error == ENOMEM) {
1980		adapter->no_tx_dma_setup++;
1981		return (error);
1982	} else if (error != 0) {
1983		adapter->no_tx_dma_setup++;
1984		m_freem(*m_headp);
1985		*m_headp = NULL;
1986		return (error);
1987	}
1988
1989	/*
1990	 * TSO Hardware workaround, if this packet is not
1991	 * TSO, and is only a single descriptor long, and
1992	 * it follows a TSO burst, then we need to add a
1993	 * sentinel descriptor to prevent premature writeback.
1994	 */
1995	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1996		if (nsegs == 1)
1997			tso_desc = TRUE;
1998		txr->tx_tso = FALSE;
1999	}
2000
2001        if (nsegs > (txr->tx_avail - 2)) {
2002                txr->no_desc_avail++;
2003		bus_dmamap_unload(txr->txtag, map);
2004		return (ENOBUFS);
2005        }
2006	m_head = *m_headp;
2007
2008	/* Do hardware assists */
2009	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010		em_tso_setup(txr, m_head, ip_off, ip, tp,
2011		    &txd_upper, &txd_lower);
2012		/* we need to make a final sentinel transmit desc */
2013		tso_desc = TRUE;
2014	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2015		em_transmit_checksum_setup(txr, m_head,
2016		    ip_off, ip, &txd_upper, &txd_lower);
2017
2018	if (m_head->m_flags & M_VLANTAG) {
2019		/* Set the vlan id. */
2020		txd_upper |=
2021		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2022                /* Tell hardware to add tag */
2023                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2024        }
2025
2026	i = txr->next_avail_desc;
2027
2028	/* Set up our transmit descriptors */
2029	for (j = 0; j < nsegs; j++) {
2030		bus_size_t seg_len;
2031		bus_addr_t seg_addr;
2032
2033		tx_buffer = &txr->tx_buffers[i];
2034		ctxd = &txr->tx_base[i];
2035		seg_addr = segs[j].ds_addr;
2036		seg_len  = segs[j].ds_len;
2037		/*
2038		** TSO Workaround:
2039		** If this is the last descriptor, we want to
2040		** split it so we have a small final sentinel
2041		*/
2042		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2043			seg_len -= 4;
2044			ctxd->buffer_addr = htole64(seg_addr);
2045			ctxd->lower.data = htole32(
2046			adapter->txd_cmd | txd_lower | seg_len);
2047			ctxd->upper.data =
2048			    htole32(txd_upper);
2049			if (++i == adapter->num_tx_desc)
2050				i = 0;
2051			/* Now make the sentinel */
2052			++txd_used; /* using an extra txd */
2053			ctxd = &txr->tx_base[i];
2054			tx_buffer = &txr->tx_buffers[i];
2055			ctxd->buffer_addr =
2056			    htole64(seg_addr + seg_len);
2057			ctxd->lower.data = htole32(
2058			adapter->txd_cmd | txd_lower | 4);
2059			ctxd->upper.data =
2060			    htole32(txd_upper);
2061			last = i;
2062			if (++i == adapter->num_tx_desc)
2063				i = 0;
2064		} else {
2065			ctxd->buffer_addr = htole64(seg_addr);
2066			ctxd->lower.data = htole32(
2067			adapter->txd_cmd | txd_lower | seg_len);
2068			ctxd->upper.data =
2069			    htole32(txd_upper);
2070			last = i;
2071			if (++i == adapter->num_tx_desc)
2072				i = 0;
2073		}
2074		tx_buffer->m_head = NULL;
2075		tx_buffer->next_eop = -1;
2076	}
2077
2078	txr->next_avail_desc = i;
2079	txr->tx_avail -= nsegs;
2080	if (tso_desc) /* TSO used an extra for sentinel */
2081		txr->tx_avail -= txd_used;
2082
2083        tx_buffer->m_head = m_head;
2084	/*
2085	** Here we swap the map so the last descriptor,
2086	** which gets the completion interrupt has the
2087	** real map, and the first descriptor gets the
2088	** unused map from this descriptor.
2089	*/
2090	tx_buffer_mapped->map = tx_buffer->map;
2091	tx_buffer->map = map;
2092        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2093
2094        /*
2095         * Last Descriptor of Packet
2096	 * needs End Of Packet (EOP)
2097	 * and Report Status (RS)
2098         */
2099        ctxd->lower.data |=
2100	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2101	/*
2102	 * Keep track in the first buffer which
2103	 * descriptor will be written back
2104	 */
2105	tx_buffer = &txr->tx_buffers[first];
2106	tx_buffer->next_eop = last;
2107	/* Update the watchdog time early and often */
2108	txr->watchdog_time = ticks;
2109
2110	/*
2111	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2112	 * that this frame is available to transmit.
2113	 */
2114	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2115	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2116	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2117
2118	return (0);
2119}
2120
2121static void
2122em_set_promisc(struct adapter *adapter)
2123{
2124	struct ifnet	*ifp = adapter->ifp;
2125	u32		reg_rctl;
2126
2127	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2128
2129	if (ifp->if_flags & IFF_PROMISC) {
2130		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2131		/* Turn this on if you want to see bad packets */
2132		if (em_debug_sbp)
2133			reg_rctl |= E1000_RCTL_SBP;
2134		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2135	} else if (ifp->if_flags & IFF_ALLMULTI) {
2136		reg_rctl |= E1000_RCTL_MPE;
2137		reg_rctl &= ~E1000_RCTL_UPE;
2138		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2139	}
2140}
2141
2142static void
2143em_disable_promisc(struct adapter *adapter)
2144{
2145	struct ifnet	*ifp = adapter->ifp;
2146	u32		reg_rctl;
2147	int		mcnt = 0;
2148
2149	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2150	reg_rctl &=  (~E1000_RCTL_UPE);
2151	if (ifp->if_flags & IFF_ALLMULTI)
2152		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2153	else {
2154		struct  ifmultiaddr *ifma;
2155#if __FreeBSD_version < 800000
2156		IF_ADDR_LOCK(ifp);
2157#else
2158		if_maddr_rlock(ifp);
2159#endif
2160		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2161			if (ifma->ifma_addr->sa_family != AF_LINK)
2162				continue;
2163			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2164				break;
2165			mcnt++;
2166		}
2167#if __FreeBSD_version < 800000
2168		IF_ADDR_UNLOCK(ifp);
2169#else
2170		if_maddr_runlock(ifp);
2171#endif
2172	}
2173	/* Don't disable if in MAX groups */
2174	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2175		reg_rctl &=  (~E1000_RCTL_MPE);
2176	reg_rctl &=  (~E1000_RCTL_SBP);
2177	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2178}
2179
2180
2181/*********************************************************************
2182 *  Multicast Update
2183 *
2184 *  This routine is called whenever multicast address list is updated.
2185 *
2186 **********************************************************************/
2187
2188static void
2189em_set_multi(struct adapter *adapter)
2190{
2191	struct ifnet	*ifp = adapter->ifp;
2192	struct ifmultiaddr *ifma;
2193	u32 reg_rctl = 0;
2194	u8  *mta; /* Multicast array memory */
2195	int mcnt = 0;
2196
2197	IOCTL_DEBUGOUT("em_set_multi: begin");
2198
2199	mta = adapter->mta;
2200	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2201
2202	if (adapter->hw.mac.type == e1000_82542 &&
2203	    adapter->hw.revision_id == E1000_REVISION_2) {
2204		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2205		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2206			e1000_pci_clear_mwi(&adapter->hw);
2207		reg_rctl |= E1000_RCTL_RST;
2208		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2209		msec_delay(5);
2210	}
2211
2212#if __FreeBSD_version < 800000
2213	IF_ADDR_LOCK(ifp);
2214#else
2215	if_maddr_rlock(ifp);
2216#endif
2217	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2218		if (ifma->ifma_addr->sa_family != AF_LINK)
2219			continue;
2220
2221		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2222			break;
2223
2224		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2225		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2226		mcnt++;
2227	}
2228#if __FreeBSD_version < 800000
2229	IF_ADDR_UNLOCK(ifp);
2230#else
2231	if_maddr_runlock(ifp);
2232#endif
2233	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2234		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2235		reg_rctl |= E1000_RCTL_MPE;
2236		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2237	} else
2238		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2239
2240	if (adapter->hw.mac.type == e1000_82542 &&
2241	    adapter->hw.revision_id == E1000_REVISION_2) {
2242		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243		reg_rctl &= ~E1000_RCTL_RST;
2244		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2245		msec_delay(5);
2246		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2247			e1000_pci_set_mwi(&adapter->hw);
2248	}
2249}
2250
2251
2252/*********************************************************************
2253 *  Timer routine
2254 *
2255 *  This routine checks for link status and updates statistics.
2256 *
2257 **********************************************************************/
2258
2259static void
2260em_local_timer(void *arg)
2261{
2262	struct adapter	*adapter = arg;
2263	struct ifnet	*ifp = adapter->ifp;
2264	struct tx_ring	*txr = adapter->tx_rings;
2265	struct rx_ring	*rxr = adapter->rx_rings;
2266	u32		trigger;
2267
2268	EM_CORE_LOCK_ASSERT(adapter);
2269
2270	em_update_link_status(adapter);
2271	em_update_stats_counters(adapter);
2272
2273	/* Reset LAA into RAR[0] on 82571 */
2274	if ((adapter->hw.mac.type == e1000_82571) &&
2275	    e1000_get_laa_state_82571(&adapter->hw))
2276		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2277
2278	/* Mask to use in the irq trigger */
2279	if (adapter->msix_mem)
2280		trigger = rxr->ims; /* RX for 82574 */
2281	else
2282		trigger = E1000_ICS_RXDMT0;
2283
2284	/*
2285	** Check on the state of the TX queue(s), this
2286	** can be done without the lock because its RO
2287	** and the HUNG state will be static if set.
2288	*/
2289	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2290		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2291		    (adapter->pause_frames == 0))
2292			goto hung;
2293		/* Schedule a TX tasklet if needed */
2294		if (txr->tx_avail <= EM_MAX_SCATTER)
2295			taskqueue_enqueue(txr->tq, &txr->tx_task);
2296	}
2297
2298	adapter->pause_frames = 0;
2299	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2300#ifndef DEVICE_POLLING
2301	/* Trigger an RX interrupt to guarantee mbuf refresh */
2302	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2303#endif
2304	return;
2305hung:
2306	/* Looks like we're hung */
2307	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2308	device_printf(adapter->dev,
2309	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2310	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2311	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2312	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2313	    "Next TX to Clean = %d\n",
2314	    txr->me, txr->tx_avail, txr->next_to_clean);
2315	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2316	adapter->watchdog_events++;
2317	adapter->pause_frames = 0;
2318	em_init_locked(adapter);
2319}
2320
2321
2322static void
2323em_update_link_status(struct adapter *adapter)
2324{
2325	struct e1000_hw *hw = &adapter->hw;
2326	struct ifnet *ifp = adapter->ifp;
2327	device_t dev = adapter->dev;
2328	struct tx_ring *txr = adapter->tx_rings;
2329	u32 link_check = 0;
2330
2331	/* Get the cached link value or read phy for real */
2332	switch (hw->phy.media_type) {
2333	case e1000_media_type_copper:
2334		if (hw->mac.get_link_status) {
2335			/* Do the work to read phy */
2336			e1000_check_for_link(hw);
2337			link_check = !hw->mac.get_link_status;
2338			if (link_check) /* ESB2 fix */
2339				e1000_cfg_on_link_up(hw);
2340		} else
2341			link_check = TRUE;
2342		break;
2343	case e1000_media_type_fiber:
2344		e1000_check_for_link(hw);
2345		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2346                                 E1000_STATUS_LU);
2347		break;
2348	case e1000_media_type_internal_serdes:
2349		e1000_check_for_link(hw);
2350		link_check = adapter->hw.mac.serdes_has_link;
2351		break;
2352	default:
2353	case e1000_media_type_unknown:
2354		break;
2355	}
2356
2357	/* Now check for a transition */
2358	if (link_check && (adapter->link_active == 0)) {
2359		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2360		    &adapter->link_duplex);
2361		/* Check if we must disable SPEED_MODE bit on PCI-E */
2362		if ((adapter->link_speed != SPEED_1000) &&
2363		    ((hw->mac.type == e1000_82571) ||
2364		    (hw->mac.type == e1000_82572))) {
2365			int tarc0;
2366			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2367			tarc0 &= ~SPEED_MODE_BIT;
2368			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2369		}
2370		if (bootverbose)
2371			device_printf(dev, "Link is up %d Mbps %s\n",
2372			    adapter->link_speed,
2373			    ((adapter->link_duplex == FULL_DUPLEX) ?
2374			    "Full Duplex" : "Half Duplex"));
2375		adapter->link_active = 1;
2376		adapter->smartspeed = 0;
2377		ifp->if_baudrate = adapter->link_speed * 1000000;
2378		if_link_state_change(ifp, LINK_STATE_UP);
2379	} else if (!link_check && (adapter->link_active == 1)) {
2380		ifp->if_baudrate = adapter->link_speed = 0;
2381		adapter->link_duplex = 0;
2382		if (bootverbose)
2383			device_printf(dev, "Link is Down\n");
2384		adapter->link_active = 0;
2385		/* Link down, disable watchdog */
2386		for (int i = 0; i < adapter->num_queues; i++, txr++)
2387			txr->queue_status = EM_QUEUE_IDLE;
2388		if_link_state_change(ifp, LINK_STATE_DOWN);
2389	}
2390}
2391
2392/*********************************************************************
2393 *
2394 *  This routine disables all traffic on the adapter by issuing a
2395 *  global reset on the MAC and deallocates TX/RX buffers.
2396 *
2397 *  This routine should always be called with BOTH the CORE
2398 *  and TX locks.
2399 **********************************************************************/
2400
2401static void
2402em_stop(void *arg)
2403{
2404	struct adapter	*adapter = arg;
2405	struct ifnet	*ifp = adapter->ifp;
2406	struct tx_ring	*txr = adapter->tx_rings;
2407
2408	EM_CORE_LOCK_ASSERT(adapter);
2409
2410	INIT_DEBUGOUT("em_stop: begin");
2411
2412	em_disable_intr(adapter);
2413	callout_stop(&adapter->timer);
2414
2415	/* Tell the stack that the interface is no longer active */
2416	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2417	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2418
2419        /* Unarm watchdog timer. */
2420	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2421		EM_TX_LOCK(txr);
2422		txr->queue_status = EM_QUEUE_IDLE;
2423		EM_TX_UNLOCK(txr);
2424	}
2425
2426	e1000_reset_hw(&adapter->hw);
2427	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2428
2429	e1000_led_off(&adapter->hw);
2430	e1000_cleanup_led(&adapter->hw);
2431}
2432
2433
2434/*********************************************************************
2435 *
2436 *  Determine hardware revision.
2437 *
2438 **********************************************************************/
2439static void
2440em_identify_hardware(struct adapter *adapter)
2441{
2442	device_t dev = adapter->dev;
2443
2444	/* Make sure our PCI config space has the necessary stuff set */
2445	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2446	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2447	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2448		device_printf(dev, "Memory Access and/or Bus Master bits "
2449		    "were not set!\n");
2450		adapter->hw.bus.pci_cmd_word |=
2451		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2452		pci_write_config(dev, PCIR_COMMAND,
2453		    adapter->hw.bus.pci_cmd_word, 2);
2454	}
2455
2456	/* Save off the information about this board */
2457	adapter->hw.vendor_id = pci_get_vendor(dev);
2458	adapter->hw.device_id = pci_get_device(dev);
2459	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2460	adapter->hw.subsystem_vendor_id =
2461	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2462	adapter->hw.subsystem_device_id =
2463	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2464
2465	/* Do Shared Code Init and Setup */
2466	if (e1000_set_mac_type(&adapter->hw)) {
2467		device_printf(dev, "Setup init failure\n");
2468		return;
2469	}
2470}
2471
2472static int
2473em_allocate_pci_resources(struct adapter *adapter)
2474{
2475	device_t	dev = adapter->dev;
2476	int		rid;
2477
2478	rid = PCIR_BAR(0);
2479	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2480	    &rid, RF_ACTIVE);
2481	if (adapter->memory == NULL) {
2482		device_printf(dev, "Unable to allocate bus resource: memory\n");
2483		return (ENXIO);
2484	}
2485	adapter->osdep.mem_bus_space_tag =
2486	    rman_get_bustag(adapter->memory);
2487	adapter->osdep.mem_bus_space_handle =
2488	    rman_get_bushandle(adapter->memory);
2489	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2490
2491	/* Default to a single queue */
2492	adapter->num_queues = 1;
2493
2494	/*
2495	 * Setup MSI/X or MSI if PCI Express
2496	 */
2497	adapter->msix = em_setup_msix(adapter);
2498
2499	adapter->hw.back = &adapter->osdep;
2500
2501	return (0);
2502}
2503
2504/*********************************************************************
2505 *
2506 *  Setup the Legacy or MSI Interrupt handler
2507 *
2508 **********************************************************************/
2509int
2510em_allocate_legacy(struct adapter *adapter)
2511{
2512	device_t dev = adapter->dev;
2513	struct tx_ring	*txr = adapter->tx_rings;
2514	int error, rid = 0;
2515
2516	/* Manually turn off all interrupts */
2517	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2518
2519	if (adapter->msix == 1) /* using MSI */
2520		rid = 1;
2521	/* We allocate a single interrupt resource */
2522	adapter->res = bus_alloc_resource_any(dev,
2523	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2524	if (adapter->res == NULL) {
2525		device_printf(dev, "Unable to allocate bus resource: "
2526		    "interrupt\n");
2527		return (ENXIO);
2528	}
2529
2530	/*
2531	 * Allocate a fast interrupt and the associated
2532	 * deferred processing contexts.
2533	 */
2534	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2535	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2536	    taskqueue_thread_enqueue, &adapter->tq);
2537	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2538	    device_get_nameunit(adapter->dev));
2539	/* Use a TX only tasklet for local timer */
2540	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2541	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2542	    taskqueue_thread_enqueue, &txr->tq);
2543	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2544	    device_get_nameunit(adapter->dev));
2545	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2546	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2547	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2548		device_printf(dev, "Failed to register fast interrupt "
2549			    "handler: %d\n", error);
2550		taskqueue_free(adapter->tq);
2551		adapter->tq = NULL;
2552		return (error);
2553	}
2554
2555	return (0);
2556}
2557
2558/*********************************************************************
2559 *
2560 *  Setup the MSIX Interrupt handlers
2561 *   This is not really Multiqueue, rather
2562 *   its just seperate interrupt vectors
2563 *   for TX, RX, and Link.
2564 *
2565 **********************************************************************/
2566int
2567em_allocate_msix(struct adapter *adapter)
2568{
2569	device_t	dev = adapter->dev;
2570	struct		tx_ring *txr = adapter->tx_rings;
2571	struct		rx_ring *rxr = adapter->rx_rings;
2572	int		error, rid, vector = 0;
2573
2574
2575	/* Make sure all interrupts are disabled */
2576	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2577
2578	/* First set up ring resources */
2579	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2580
2581		/* RX ring */
2582		rid = vector + 1;
2583
2584		rxr->res = bus_alloc_resource_any(dev,
2585		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2586		if (rxr->res == NULL) {
2587			device_printf(dev,
2588			    "Unable to allocate bus resource: "
2589			    "RX MSIX Interrupt %d\n", i);
2590			return (ENXIO);
2591		}
2592		if ((error = bus_setup_intr(dev, rxr->res,
2593		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2594		    rxr, &rxr->tag)) != 0) {
2595			device_printf(dev, "Failed to register RX handler");
2596			return (error);
2597		}
2598#if __FreeBSD_version >= 800504
2599		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2600#endif
2601		rxr->msix = vector++; /* NOTE increment vector for TX */
2602		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2603		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2604		    taskqueue_thread_enqueue, &rxr->tq);
2605		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2606		    device_get_nameunit(adapter->dev));
2607		/*
2608		** Set the bit to enable interrupt
2609		** in E1000_IMS -- bits 20 and 21
2610		** are for RX0 and RX1, note this has
2611		** NOTHING to do with the MSIX vector
2612		*/
2613		rxr->ims = 1 << (20 + i);
2614		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2615
2616		/* TX ring */
2617		rid = vector + 1;
2618		txr->res = bus_alloc_resource_any(dev,
2619		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2620		if (txr->res == NULL) {
2621			device_printf(dev,
2622			    "Unable to allocate bus resource: "
2623			    "TX MSIX Interrupt %d\n", i);
2624			return (ENXIO);
2625		}
2626		if ((error = bus_setup_intr(dev, txr->res,
2627		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2628		    txr, &txr->tag)) != 0) {
2629			device_printf(dev, "Failed to register TX handler");
2630			return (error);
2631		}
2632#if __FreeBSD_version >= 800504
2633		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2634#endif
2635		txr->msix = vector++; /* Increment vector for next pass */
2636		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2637		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2638		    taskqueue_thread_enqueue, &txr->tq);
2639		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2640		    device_get_nameunit(adapter->dev));
2641		/*
2642		** Set the bit to enable interrupt
2643		** in E1000_IMS -- bits 22 and 23
2644		** are for TX0 and TX1, note this has
2645		** NOTHING to do with the MSIX vector
2646		*/
2647		txr->ims = 1 << (22 + i);
2648		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2649	}
2650
2651	/* Link interrupt */
2652	++rid;
2653	adapter->res = bus_alloc_resource_any(dev,
2654	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2655	if (!adapter->res) {
2656		device_printf(dev,"Unable to allocate "
2657		    "bus resource: Link interrupt [%d]\n", rid);
2658		return (ENXIO);
2659        }
2660	/* Set the link handler function */
2661	error = bus_setup_intr(dev, adapter->res,
2662	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2663	    em_msix_link, adapter, &adapter->tag);
2664	if (error) {
2665		adapter->res = NULL;
2666		device_printf(dev, "Failed to register LINK handler");
2667		return (error);
2668	}
2669#if __FreeBSD_version >= 800504
2670		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2671#endif
2672	adapter->linkvec = vector;
2673	adapter->ivars |=  (8 | vector) << 16;
2674	adapter->ivars |= 0x80000000;
2675
2676	return (0);
2677}
2678
2679
2680static void
2681em_free_pci_resources(struct adapter *adapter)
2682{
2683	device_t	dev = adapter->dev;
2684	struct tx_ring	*txr;
2685	struct rx_ring	*rxr;
2686	int		rid;
2687
2688
2689	/*
2690	** Release all the queue interrupt resources:
2691	*/
2692	for (int i = 0; i < adapter->num_queues; i++) {
2693		txr = &adapter->tx_rings[i];
2694		rxr = &adapter->rx_rings[i];
2695		/* an early abort? */
2696		if ((txr == NULL) || (rxr == NULL))
2697			break;
2698		rid = txr->msix +1;
2699		if (txr->tag != NULL) {
2700			bus_teardown_intr(dev, txr->res, txr->tag);
2701			txr->tag = NULL;
2702		}
2703		if (txr->res != NULL)
2704			bus_release_resource(dev, SYS_RES_IRQ,
2705			    rid, txr->res);
2706		rid = rxr->msix +1;
2707		if (rxr->tag != NULL) {
2708			bus_teardown_intr(dev, rxr->res, rxr->tag);
2709			rxr->tag = NULL;
2710		}
2711		if (rxr->res != NULL)
2712			bus_release_resource(dev, SYS_RES_IRQ,
2713			    rid, rxr->res);
2714	}
2715
2716        if (adapter->linkvec) /* we are doing MSIX */
2717                rid = adapter->linkvec + 1;
2718        else
2719                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2720
2721	if (adapter->tag != NULL) {
2722		bus_teardown_intr(dev, adapter->res, adapter->tag);
2723		adapter->tag = NULL;
2724	}
2725
2726	if (adapter->res != NULL)
2727		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2728
2729
2730	if (adapter->msix)
2731		pci_release_msi(dev);
2732
2733	if (adapter->msix_mem != NULL)
2734		bus_release_resource(dev, SYS_RES_MEMORY,
2735		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2736
2737	if (adapter->memory != NULL)
2738		bus_release_resource(dev, SYS_RES_MEMORY,
2739		    PCIR_BAR(0), adapter->memory);
2740
2741	if (adapter->flash != NULL)
2742		bus_release_resource(dev, SYS_RES_MEMORY,
2743		    EM_FLASH, adapter->flash);
2744}
2745
2746/*
2747 * Setup MSI or MSI/X
2748 */
2749static int
2750em_setup_msix(struct adapter *adapter)
2751{
2752	device_t dev = adapter->dev;
2753	int val = 0;
2754
2755	/*
2756	** Setup MSI/X for Hartwell: tests have shown
2757	** use of two queues to be unstable, and to
2758	** provide no great gain anyway, so we simply
2759	** seperate the interrupts and use a single queue.
2760	*/
2761	if ((adapter->hw.mac.type == e1000_82574) &&
2762	    (em_enable_msix == TRUE)) {
2763		/* Map the MSIX BAR */
2764		int rid = PCIR_BAR(EM_MSIX_BAR);
2765		adapter->msix_mem = bus_alloc_resource_any(dev,
2766		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2767       		if (!adapter->msix_mem) {
2768			/* May not be enabled */
2769               		device_printf(adapter->dev,
2770			    "Unable to map MSIX table \n");
2771			goto msi;
2772       		}
2773		val = pci_msix_count(dev);
2774		/* We only need 3 vectors */
2775		if (val > 3)
2776			val = 3;
2777		if ((val != 3) && (val != 5)) {
2778			bus_release_resource(dev, SYS_RES_MEMORY,
2779			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2780			adapter->msix_mem = NULL;
2781               		device_printf(adapter->dev,
2782			    "MSIX: incorrect vectors, using MSI\n");
2783			goto msi;
2784		}
2785
2786		if (pci_alloc_msix(dev, &val) == 0) {
2787			device_printf(adapter->dev,
2788			    "Using MSIX interrupts "
2789			    "with %d vectors\n", val);
2790		}
2791
2792		return (val);
2793	}
2794msi:
2795       	val = pci_msi_count(dev);
2796       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2797               	adapter->msix = 1;
2798               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2799		return (val);
2800	}
2801	/* Should only happen due to manual configuration */
2802	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2803	return (0);
2804}
2805
2806
2807/*********************************************************************
2808 *
2809 *  Initialize the hardware to a configuration
2810 *  as specified by the adapter structure.
2811 *
2812 **********************************************************************/
2813static void
2814em_reset(struct adapter *adapter)
2815{
2816	device_t	dev = adapter->dev;
2817	struct ifnet	*ifp = adapter->ifp;
2818	struct e1000_hw	*hw = &adapter->hw;
2819	u16		rx_buffer_size;
2820	u32		pba;
2821
2822	INIT_DEBUGOUT("em_reset: begin");
2823
2824	/* Set up smart power down as default off on newer adapters. */
2825	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2826	    hw->mac.type == e1000_82572)) {
2827		u16 phy_tmp = 0;
2828
2829		/* Speed up time to link by disabling smart power down. */
2830		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2831		phy_tmp &= ~IGP02E1000_PM_SPD;
2832		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2833	}
2834
2835	/*
2836	 * Packet Buffer Allocation (PBA)
2837	 * Writing PBA sets the receive portion of the buffer
2838	 * the remainder is used for the transmit buffer.
2839	 */
2840	switch (hw->mac.type) {
2841	/* Total Packet Buffer on these is 48K */
2842	case e1000_82571:
2843	case e1000_82572:
2844	case e1000_80003es2lan:
2845			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2846		break;
2847	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2848			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2849		break;
2850	case e1000_82574:
2851	case e1000_82583:
2852			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2853		break;
2854	case e1000_ich8lan:
2855		pba = E1000_PBA_8K;
2856		break;
2857	case e1000_ich9lan:
2858	case e1000_ich10lan:
2859		/* Boost Receive side for jumbo frames */
2860		if (adapter->hw.mac.max_frame_size > 4096)
2861			pba = E1000_PBA_14K;
2862		else
2863			pba = E1000_PBA_10K;
2864		break;
2865	case e1000_pchlan:
2866	case e1000_pch2lan:
2867	case e1000_pch_lpt:
2868		pba = E1000_PBA_26K;
2869		break;
2870	default:
2871		if (adapter->hw.mac.max_frame_size > 8192)
2872			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2873		else
2874			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2875	}
2876	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2877
2878	/*
2879	 * These parameters control the automatic generation (Tx) and
2880	 * response (Rx) to Ethernet PAUSE frames.
2881	 * - High water mark should allow for at least two frames to be
2882	 *   received after sending an XOFF.
2883	 * - Low water mark works best when it is very near the high water mark.
2884	 *   This allows the receiver to restart by sending XON when it has
2885	 *   drained a bit. Here we use an arbitary value of 1500 which will
2886	 *   restart after one full frame is pulled from the buffer. There
2887	 *   could be several smaller frames in the buffer and if so they will
2888	 *   not trigger the XON until their total number reduces the buffer
2889	 *   by 1500.
2890	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2891	 */
2892	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2893	hw->fc.high_water = rx_buffer_size -
2894	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2895	hw->fc.low_water = hw->fc.high_water - 1500;
2896
2897	if (adapter->fc) /* locally set flow control value? */
2898		hw->fc.requested_mode = adapter->fc;
2899	else
2900		hw->fc.requested_mode = e1000_fc_full;
2901
2902	if (hw->mac.type == e1000_80003es2lan)
2903		hw->fc.pause_time = 0xFFFF;
2904	else
2905		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2906
2907	hw->fc.send_xon = TRUE;
2908
2909	/* Device specific overrides/settings */
2910	switch (hw->mac.type) {
2911	case e1000_pchlan:
2912		/* Workaround: no TX flow ctrl for PCH */
2913                hw->fc.requested_mode = e1000_fc_rx_pause;
2914		hw->fc.pause_time = 0xFFFF; /* override */
2915		if (ifp->if_mtu > ETHERMTU) {
2916			hw->fc.high_water = 0x3500;
2917			hw->fc.low_water = 0x1500;
2918		} else {
2919			hw->fc.high_water = 0x5000;
2920			hw->fc.low_water = 0x3000;
2921		}
2922		hw->fc.refresh_time = 0x1000;
2923		break;
2924	case e1000_pch2lan:
2925	case e1000_pch_lpt:
2926		hw->fc.high_water = 0x5C20;
2927		hw->fc.low_water = 0x5048;
2928		hw->fc.pause_time = 0x0650;
2929		hw->fc.refresh_time = 0x0400;
2930		/* Jumbos need adjusted PBA */
2931		if (ifp->if_mtu > ETHERMTU)
2932			E1000_WRITE_REG(hw, E1000_PBA, 12);
2933		else
2934			E1000_WRITE_REG(hw, E1000_PBA, 26);
2935		break;
2936        case e1000_ich9lan:
2937        case e1000_ich10lan:
2938		if (ifp->if_mtu > ETHERMTU) {
2939			hw->fc.high_water = 0x2800;
2940			hw->fc.low_water = hw->fc.high_water - 8;
2941			break;
2942		}
2943		/* else fall thru */
2944	default:
2945		if (hw->mac.type == e1000_80003es2lan)
2946			hw->fc.pause_time = 0xFFFF;
2947		break;
2948	}
2949
2950	/* Issue a global reset */
2951	e1000_reset_hw(hw);
2952	E1000_WRITE_REG(hw, E1000_WUC, 0);
2953	em_disable_aspm(adapter);
2954	/* and a re-init */
2955	if (e1000_init_hw(hw) < 0) {
2956		device_printf(dev, "Hardware Initialization Failed\n");
2957		return;
2958	}
2959
2960	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2961	e1000_get_phy_info(hw);
2962	e1000_check_for_link(hw);
2963	return;
2964}
2965
2966/*********************************************************************
2967 *
2968 *  Setup networking device structure and register an interface.
2969 *
2970 **********************************************************************/
2971static int
2972em_setup_interface(device_t dev, struct adapter *adapter)
2973{
2974	struct ifnet   *ifp;
2975
2976	INIT_DEBUGOUT("em_setup_interface: begin");
2977
2978	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2979	if (ifp == NULL) {
2980		device_printf(dev, "can not allocate ifnet structure\n");
2981		return (-1);
2982	}
2983	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2984	ifp->if_init =  em_init;
2985	ifp->if_softc = adapter;
2986	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2987	ifp->if_ioctl = em_ioctl;
2988#ifdef EM_MULTIQUEUE
2989	/* Multiqueue stack interface */
2990	ifp->if_transmit = em_mq_start;
2991	ifp->if_qflush = em_qflush;
2992#else
2993	ifp->if_start = em_start;
2994	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2995	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2996	IFQ_SET_READY(&ifp->if_snd);
2997#endif
2998
2999	ether_ifattach(ifp, adapter->hw.mac.addr);
3000
3001	ifp->if_capabilities = ifp->if_capenable = 0;
3002
3003
3004	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3005	ifp->if_capabilities |= IFCAP_TSO4;
3006	/*
3007	 * Tell the upper layer(s) we
3008	 * support full VLAN capability
3009	 */
3010	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3011	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3012			     |  IFCAP_VLAN_HWTSO
3013			     |  IFCAP_VLAN_MTU;
3014	ifp->if_capenable = ifp->if_capabilities;
3015
3016	/*
3017	** Don't turn this on by default, if vlans are
3018	** created on another pseudo device (eg. lagg)
3019	** then vlan events are not passed thru, breaking
3020	** operation, but with HW FILTER off it works. If
3021	** using vlans directly on the em driver you can
3022	** enable this and get full hardware tag filtering.
3023	*/
3024	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3025
3026#ifdef DEVICE_POLLING
3027	ifp->if_capabilities |= IFCAP_POLLING;
3028#endif
3029
3030	/* Enable only WOL MAGIC by default */
3031	if (adapter->wol) {
3032		ifp->if_capabilities |= IFCAP_WOL;
3033		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3034	}
3035
3036	/*
3037	 * Specify the media types supported by this adapter and register
3038	 * callbacks to update media and link information
3039	 */
3040	ifmedia_init(&adapter->media, IFM_IMASK,
3041	    em_media_change, em_media_status);
3042	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3043	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3044		u_char fiber_type = IFM_1000_SX;	/* default type */
3045
3046		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3047			    0, NULL);
3048		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3049	} else {
3050		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3051		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3052			    0, NULL);
3053		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3054			    0, NULL);
3055		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3056			    0, NULL);
3057		if (adapter->hw.phy.type != e1000_phy_ife) {
3058			ifmedia_add(&adapter->media,
3059				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3060			ifmedia_add(&adapter->media,
3061				IFM_ETHER | IFM_1000_T, 0, NULL);
3062		}
3063	}
3064	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3065	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3066	return (0);
3067}
3068
3069
3070/*
3071 * Manage DMA'able memory.
3072 */
3073static void
3074em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3075{
3076	if (error)
3077		return;
3078	*(bus_addr_t *) arg = segs[0].ds_addr;
3079}
3080
3081static int
3082em_dma_malloc(struct adapter *adapter, bus_size_t size,
3083        struct em_dma_alloc *dma, int mapflags)
3084{
3085	int error;
3086
3087	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3088				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3089				BUS_SPACE_MAXADDR,	/* lowaddr */
3090				BUS_SPACE_MAXADDR,	/* highaddr */
3091				NULL, NULL,		/* filter, filterarg */
3092				size,			/* maxsize */
3093				1,			/* nsegments */
3094				size,			/* maxsegsize */
3095				0,			/* flags */
3096				NULL,			/* lockfunc */
3097				NULL,			/* lockarg */
3098				&dma->dma_tag);
3099	if (error) {
3100		device_printf(adapter->dev,
3101		    "%s: bus_dma_tag_create failed: %d\n",
3102		    __func__, error);
3103		goto fail_0;
3104	}
3105
3106	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3107	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3108	if (error) {
3109		device_printf(adapter->dev,
3110		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3111		    __func__, (uintmax_t)size, error);
3112		goto fail_2;
3113	}
3114
3115	dma->dma_paddr = 0;
3116	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3117	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3118	if (error || dma->dma_paddr == 0) {
3119		device_printf(adapter->dev,
3120		    "%s: bus_dmamap_load failed: %d\n",
3121		    __func__, error);
3122		goto fail_3;
3123	}
3124
3125	return (0);
3126
3127fail_3:
3128	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3129fail_2:
3130	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3131	bus_dma_tag_destroy(dma->dma_tag);
3132fail_0:
3133	dma->dma_map = NULL;
3134	dma->dma_tag = NULL;
3135
3136	return (error);
3137}
3138
3139static void
3140em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3141{
3142	if (dma->dma_tag == NULL)
3143		return;
3144	if (dma->dma_map != NULL) {
3145		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3146		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3147		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3148		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3149		dma->dma_map = NULL;
3150	}
3151	bus_dma_tag_destroy(dma->dma_tag);
3152	dma->dma_tag = NULL;
3153}
3154
3155
3156/*********************************************************************
3157 *
3158 *  Allocate memory for the transmit and receive rings, and then
3159 *  the descriptors associated with each, called only once at attach.
3160 *
3161 **********************************************************************/
3162static int
3163em_allocate_queues(struct adapter *adapter)
3164{
3165	device_t		dev = adapter->dev;
3166	struct tx_ring		*txr = NULL;
3167	struct rx_ring		*rxr = NULL;
3168	int rsize, tsize, error = E1000_SUCCESS;
3169	int txconf = 0, rxconf = 0;
3170
3171
3172	/* Allocate the TX ring struct memory */
3173	if (!(adapter->tx_rings =
3174	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3175	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3176		device_printf(dev, "Unable to allocate TX ring memory\n");
3177		error = ENOMEM;
3178		goto fail;
3179	}
3180
3181	/* Now allocate the RX */
3182	if (!(adapter->rx_rings =
3183	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3184	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3185		device_printf(dev, "Unable to allocate RX ring memory\n");
3186		error = ENOMEM;
3187		goto rx_fail;
3188	}
3189
3190	tsize = roundup2(adapter->num_tx_desc *
3191	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3192	/*
3193	 * Now set up the TX queues, txconf is needed to handle the
3194	 * possibility that things fail midcourse and we need to
3195	 * undo memory gracefully
3196	 */
3197	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3198		/* Set up some basics */
3199		txr = &adapter->tx_rings[i];
3200		txr->adapter = adapter;
3201		txr->me = i;
3202
3203		/* Initialize the TX lock */
3204		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3205		    device_get_nameunit(dev), txr->me);
3206		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3207
3208		if (em_dma_malloc(adapter, tsize,
3209			&txr->txdma, BUS_DMA_NOWAIT)) {
3210			device_printf(dev,
3211			    "Unable to allocate TX Descriptor memory\n");
3212			error = ENOMEM;
3213			goto err_tx_desc;
3214		}
3215		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3216		bzero((void *)txr->tx_base, tsize);
3217
3218        	if (em_allocate_transmit_buffers(txr)) {
3219			device_printf(dev,
3220			    "Critical Failure setting up transmit buffers\n");
3221			error = ENOMEM;
3222			goto err_tx_desc;
3223        	}
3224#if __FreeBSD_version >= 800000
3225		/* Allocate a buf ring */
3226		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3227		    M_WAITOK, &txr->tx_mtx);
3228#endif
3229	}
3230
3231	/*
3232	 * Next the RX queues...
3233	 */
3234	rsize = roundup2(adapter->num_rx_desc *
3235	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3236	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3237		rxr = &adapter->rx_rings[i];
3238		rxr->adapter = adapter;
3239		rxr->me = i;
3240
3241		/* Initialize the RX lock */
3242		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3243		    device_get_nameunit(dev), txr->me);
3244		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3245
3246		if (em_dma_malloc(adapter, rsize,
3247			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3248			device_printf(dev,
3249			    "Unable to allocate RxDescriptor memory\n");
3250			error = ENOMEM;
3251			goto err_rx_desc;
3252		}
3253		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3254		bzero((void *)rxr->rx_base, rsize);
3255
3256        	/* Allocate receive buffers for the ring*/
3257		if (em_allocate_receive_buffers(rxr)) {
3258			device_printf(dev,
3259			    "Critical Failure setting up receive buffers\n");
3260			error = ENOMEM;
3261			goto err_rx_desc;
3262		}
3263	}
3264
3265	return (0);
3266
3267err_rx_desc:
3268	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3269		em_dma_free(adapter, &rxr->rxdma);
3270err_tx_desc:
3271	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3272		em_dma_free(adapter, &txr->txdma);
3273	free(adapter->rx_rings, M_DEVBUF);
3274rx_fail:
3275#if __FreeBSD_version >= 800000
3276	buf_ring_free(txr->br, M_DEVBUF);
3277#endif
3278	free(adapter->tx_rings, M_DEVBUF);
3279fail:
3280	return (error);
3281}
3282
3283
3284/*********************************************************************
3285 *
3286 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3287 *  the information needed to transmit a packet on the wire. This is
3288 *  called only once at attach, setup is done every reset.
3289 *
3290 **********************************************************************/
3291static int
3292em_allocate_transmit_buffers(struct tx_ring *txr)
3293{
3294	struct adapter *adapter = txr->adapter;
3295	device_t dev = adapter->dev;
3296	struct em_buffer *txbuf;
3297	int error, i;
3298
3299	/*
3300	 * Setup DMA descriptor areas.
3301	 */
3302	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3303			       1, 0,			/* alignment, bounds */
3304			       BUS_SPACE_MAXADDR,	/* lowaddr */
3305			       BUS_SPACE_MAXADDR,	/* highaddr */
3306			       NULL, NULL,		/* filter, filterarg */
3307			       EM_TSO_SIZE,		/* maxsize */
3308			       EM_MAX_SCATTER,		/* nsegments */
3309			       PAGE_SIZE,		/* maxsegsize */
3310			       0,			/* flags */
3311			       NULL,			/* lockfunc */
3312			       NULL,			/* lockfuncarg */
3313			       &txr->txtag))) {
3314		device_printf(dev,"Unable to allocate TX DMA tag\n");
3315		goto fail;
3316	}
3317
3318	if (!(txr->tx_buffers =
3319	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3320	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3321		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3322		error = ENOMEM;
3323		goto fail;
3324	}
3325
3326        /* Create the descriptor buffer dma maps */
3327	txbuf = txr->tx_buffers;
3328	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3329		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3330		if (error != 0) {
3331			device_printf(dev, "Unable to create TX DMA map\n");
3332			goto fail;
3333		}
3334	}
3335
3336	return 0;
3337fail:
3338	/* We free all, it handles case where we are in the middle */
3339	em_free_transmit_structures(adapter);
3340	return (error);
3341}
3342
3343/*********************************************************************
3344 *
3345 *  Initialize a transmit ring.
3346 *
3347 **********************************************************************/
3348static void
3349em_setup_transmit_ring(struct tx_ring *txr)
3350{
3351	struct adapter *adapter = txr->adapter;
3352	struct em_buffer *txbuf;
3353	int i;
3354#ifdef DEV_NETMAP
3355	struct netmap_adapter *na = NA(adapter->ifp);
3356	struct netmap_slot *slot;
3357#endif /* DEV_NETMAP */
3358
3359	/* Clear the old descriptor contents */
3360	EM_TX_LOCK(txr);
3361#ifdef DEV_NETMAP
3362	slot = netmap_reset(na, NR_TX, txr->me, 0);
3363#endif /* DEV_NETMAP */
3364
3365	bzero((void *)txr->tx_base,
3366	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3367	/* Reset indices */
3368	txr->next_avail_desc = 0;
3369	txr->next_to_clean = 0;
3370
3371	/* Free any existing tx buffers. */
3372        txbuf = txr->tx_buffers;
3373	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3374		if (txbuf->m_head != NULL) {
3375			bus_dmamap_sync(txr->txtag, txbuf->map,
3376			    BUS_DMASYNC_POSTWRITE);
3377			bus_dmamap_unload(txr->txtag, txbuf->map);
3378			m_freem(txbuf->m_head);
3379			txbuf->m_head = NULL;
3380		}
3381#ifdef DEV_NETMAP
3382		if (slot) {
3383			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3384			uint64_t paddr;
3385			void *addr;
3386
3387			addr = PNMB(slot + si, &paddr);
3388			txr->tx_base[i].buffer_addr = htole64(paddr);
3389			/* reload the map for netmap mode */
3390			netmap_load_map(txr->txtag, txbuf->map, addr);
3391		}
3392#endif /* DEV_NETMAP */
3393
3394		/* clear the watch index */
3395		txbuf->next_eop = -1;
3396        }
3397
3398	/* Set number of descriptors available */
3399	txr->tx_avail = adapter->num_tx_desc;
3400	txr->queue_status = EM_QUEUE_IDLE;
3401
3402	/* Clear checksum offload context. */
3403	txr->last_hw_offload = 0;
3404	txr->last_hw_ipcss = 0;
3405	txr->last_hw_ipcso = 0;
3406	txr->last_hw_tucss = 0;
3407	txr->last_hw_tucso = 0;
3408
3409	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3410	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3411	EM_TX_UNLOCK(txr);
3412}
3413
3414/*********************************************************************
3415 *
3416 *  Initialize all transmit rings.
3417 *
3418 **********************************************************************/
3419static void
3420em_setup_transmit_structures(struct adapter *adapter)
3421{
3422	struct tx_ring *txr = adapter->tx_rings;
3423
3424	for (int i = 0; i < adapter->num_queues; i++, txr++)
3425		em_setup_transmit_ring(txr);
3426
3427	return;
3428}
3429
3430/*********************************************************************
3431 *
3432 *  Enable transmit unit.
3433 *
3434 **********************************************************************/
3435static void
3436em_initialize_transmit_unit(struct adapter *adapter)
3437{
3438	struct tx_ring	*txr = adapter->tx_rings;
3439	struct e1000_hw	*hw = &adapter->hw;
3440	u32	tctl, tarc, tipg = 0;
3441
3442	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3443
3444	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3445		u64 bus_addr = txr->txdma.dma_paddr;
3446		/* Base and Len of TX Ring */
3447		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3448	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3449		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3450	    	    (u32)(bus_addr >> 32));
3451		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3452	    	    (u32)bus_addr);
3453		/* Init the HEAD/TAIL indices */
3454		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3455		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3456
3457		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3458		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3459		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3460
3461		txr->queue_status = EM_QUEUE_IDLE;
3462	}
3463
3464	/* Set the default values for the Tx Inter Packet Gap timer */
3465	switch (adapter->hw.mac.type) {
3466	case e1000_80003es2lan:
3467		tipg = DEFAULT_82543_TIPG_IPGR1;
3468		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3469		    E1000_TIPG_IPGR2_SHIFT;
3470		break;
3471	default:
3472		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3473		    (adapter->hw.phy.media_type ==
3474		    e1000_media_type_internal_serdes))
3475			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3476		else
3477			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3478		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3479		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3480	}
3481
3482	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3483	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3484
3485	if(adapter->hw.mac.type >= e1000_82540)
3486		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3487		    adapter->tx_abs_int_delay.value);
3488
3489	if ((adapter->hw.mac.type == e1000_82571) ||
3490	    (adapter->hw.mac.type == e1000_82572)) {
3491		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3492		tarc |= SPEED_MODE_BIT;
3493		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3494	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3495		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3496		tarc |= 1;
3497		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3498		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3499		tarc |= 1;
3500		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3501	}
3502
3503	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3504	if (adapter->tx_int_delay.value > 0)
3505		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3506
3507	/* Program the Transmit Control Register */
3508	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3509	tctl &= ~E1000_TCTL_CT;
3510	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3511		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3512
3513	if (adapter->hw.mac.type >= e1000_82571)
3514		tctl |= E1000_TCTL_MULR;
3515
3516	/* This write will effectively turn on the transmit unit. */
3517	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3518
3519}
3520
3521
3522/*********************************************************************
3523 *
3524 *  Free all transmit rings.
3525 *
3526 **********************************************************************/
3527static void
3528em_free_transmit_structures(struct adapter *adapter)
3529{
3530	struct tx_ring *txr = adapter->tx_rings;
3531
3532	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3533		EM_TX_LOCK(txr);
3534		em_free_transmit_buffers(txr);
3535		em_dma_free(adapter, &txr->txdma);
3536		EM_TX_UNLOCK(txr);
3537		EM_TX_LOCK_DESTROY(txr);
3538	}
3539
3540	free(adapter->tx_rings, M_DEVBUF);
3541}
3542
3543/*********************************************************************
3544 *
3545 *  Free transmit ring related data structures.
3546 *
3547 **********************************************************************/
3548static void
3549em_free_transmit_buffers(struct tx_ring *txr)
3550{
3551	struct adapter		*adapter = txr->adapter;
3552	struct em_buffer	*txbuf;
3553
3554	INIT_DEBUGOUT("free_transmit_ring: begin");
3555
3556	if (txr->tx_buffers == NULL)
3557		return;
3558
3559	for (int i = 0; i < adapter->num_tx_desc; i++) {
3560		txbuf = &txr->tx_buffers[i];
3561		if (txbuf->m_head != NULL) {
3562			bus_dmamap_sync(txr->txtag, txbuf->map,
3563			    BUS_DMASYNC_POSTWRITE);
3564			bus_dmamap_unload(txr->txtag,
3565			    txbuf->map);
3566			m_freem(txbuf->m_head);
3567			txbuf->m_head = NULL;
3568			if (txbuf->map != NULL) {
3569				bus_dmamap_destroy(txr->txtag,
3570				    txbuf->map);
3571				txbuf->map = NULL;
3572			}
3573		} else if (txbuf->map != NULL) {
3574			bus_dmamap_unload(txr->txtag,
3575			    txbuf->map);
3576			bus_dmamap_destroy(txr->txtag,
3577			    txbuf->map);
3578			txbuf->map = NULL;
3579		}
3580	}
3581#if __FreeBSD_version >= 800000
3582	if (txr->br != NULL)
3583		buf_ring_free(txr->br, M_DEVBUF);
3584#endif
3585	if (txr->tx_buffers != NULL) {
3586		free(txr->tx_buffers, M_DEVBUF);
3587		txr->tx_buffers = NULL;
3588	}
3589	if (txr->txtag != NULL) {
3590		bus_dma_tag_destroy(txr->txtag);
3591		txr->txtag = NULL;
3592	}
3593	return;
3594}
3595
3596
3597/*********************************************************************
3598 *  The offload context is protocol specific (TCP/UDP) and thus
3599 *  only needs to be set when the protocol changes. The occasion
3600 *  of a context change can be a performance detriment, and
3601 *  might be better just disabled. The reason arises in the way
3602 *  in which the controller supports pipelined requests from the
3603 *  Tx data DMA. Up to four requests can be pipelined, and they may
3604 *  belong to the same packet or to multiple packets. However all
3605 *  requests for one packet are issued before a request is issued
3606 *  for a subsequent packet and if a request for the next packet
3607 *  requires a context change, that request will be stalled
3608 *  until the previous request completes. This means setting up
3609 *  a new context effectively disables pipelined Tx data DMA which
3610 *  in turn greatly slow down performance to send small sized
3611 *  frames.
3612 **********************************************************************/
3613static void
3614em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3615    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3616{
3617	struct adapter			*adapter = txr->adapter;
3618	struct e1000_context_desc	*TXD = NULL;
3619	struct em_buffer		*tx_buffer;
3620	int				cur, hdr_len;
3621	u32				cmd = 0;
3622	u16				offload = 0;
3623	u8				ipcso, ipcss, tucso, tucss;
3624
3625	ipcss = ipcso = tucss = tucso = 0;
3626	hdr_len = ip_off + (ip->ip_hl << 2);
3627	cur = txr->next_avail_desc;
3628
3629	/* Setup of IP header checksum. */
3630	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3631		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3632		offload |= CSUM_IP;
3633		ipcss = ip_off;
3634		ipcso = ip_off + offsetof(struct ip, ip_sum);
3635		/*
3636		 * Start offset for header checksum calculation.
3637		 * End offset for header checksum calculation.
3638		 * Offset of place to put the checksum.
3639		 */
3640		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3641		TXD->lower_setup.ip_fields.ipcss = ipcss;
3642		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3643		TXD->lower_setup.ip_fields.ipcso = ipcso;
3644		cmd |= E1000_TXD_CMD_IP;
3645	}
3646
3647	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3648 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3649 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3650 		offload |= CSUM_TCP;
3651 		tucss = hdr_len;
3652 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3653 		/*
3654 		 * Setting up new checksum offload context for every frames
3655 		 * takes a lot of processing time for hardware. This also
3656 		 * reduces performance a lot for small sized frames so avoid
3657 		 * it if driver can use previously configured checksum
3658 		 * offload context.
3659 		 */
3660 		if (txr->last_hw_offload == offload) {
3661 			if (offload & CSUM_IP) {
3662 				if (txr->last_hw_ipcss == ipcss &&
3663 				    txr->last_hw_ipcso == ipcso &&
3664 				    txr->last_hw_tucss == tucss &&
3665 				    txr->last_hw_tucso == tucso)
3666 					return;
3667 			} else {
3668 				if (txr->last_hw_tucss == tucss &&
3669 				    txr->last_hw_tucso == tucso)
3670 					return;
3671 			}
3672  		}
3673 		txr->last_hw_offload = offload;
3674 		txr->last_hw_tucss = tucss;
3675 		txr->last_hw_tucso = tucso;
3676 		/*
3677 		 * Start offset for payload checksum calculation.
3678 		 * End offset for payload checksum calculation.
3679 		 * Offset of place to put the checksum.
3680 		 */
3681		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3682 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3683 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3684 		TXD->upper_setup.tcp_fields.tucso = tucso;
3685 		cmd |= E1000_TXD_CMD_TCP;
3686 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3687 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3688 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3689 		tucss = hdr_len;
3690 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3691 		/*
3692 		 * Setting up new checksum offload context for every frames
3693 		 * takes a lot of processing time for hardware. This also
3694 		 * reduces performance a lot for small sized frames so avoid
3695 		 * it if driver can use previously configured checksum
3696 		 * offload context.
3697 		 */
3698 		if (txr->last_hw_offload == offload) {
3699 			if (offload & CSUM_IP) {
3700 				if (txr->last_hw_ipcss == ipcss &&
3701 				    txr->last_hw_ipcso == ipcso &&
3702 				    txr->last_hw_tucss == tucss &&
3703 				    txr->last_hw_tucso == tucso)
3704 					return;
3705 			} else {
3706 				if (txr->last_hw_tucss == tucss &&
3707 				    txr->last_hw_tucso == tucso)
3708 					return;
3709 			}
3710 		}
3711 		txr->last_hw_offload = offload;
3712 		txr->last_hw_tucss = tucss;
3713 		txr->last_hw_tucso = tucso;
3714 		/*
3715 		 * Start offset for header checksum calculation.
3716 		 * End offset for header checksum calculation.
3717 		 * Offset of place to put the checksum.
3718 		 */
3719		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3720 		TXD->upper_setup.tcp_fields.tucss = tucss;
3721 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3722 		TXD->upper_setup.tcp_fields.tucso = tucso;
3723  	}
3724
3725 	if (offload & CSUM_IP) {
3726 		txr->last_hw_ipcss = ipcss;
3727 		txr->last_hw_ipcso = ipcso;
3728  	}
3729
3730	TXD->tcp_seg_setup.data = htole32(0);
3731	TXD->cmd_and_length =
3732	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3733	tx_buffer = &txr->tx_buffers[cur];
3734	tx_buffer->m_head = NULL;
3735	tx_buffer->next_eop = -1;
3736
3737	if (++cur == adapter->num_tx_desc)
3738		cur = 0;
3739
3740	txr->tx_avail--;
3741	txr->next_avail_desc = cur;
3742}
3743
3744
3745/**********************************************************************
3746 *
3747 *  Setup work for hardware segmentation offload (TSO)
3748 *
3749 **********************************************************************/
3750static void
3751em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3752    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3753{
3754	struct adapter			*adapter = txr->adapter;
3755	struct e1000_context_desc	*TXD;
3756	struct em_buffer		*tx_buffer;
3757	int cur, hdr_len;
3758
3759	/*
3760	 * In theory we can use the same TSO context if and only if
3761	 * frame is the same type(IP/TCP) and the same MSS. However
3762	 * checking whether a frame has the same IP/TCP structure is
3763	 * hard thing so just ignore that and always restablish a
3764	 * new TSO context.
3765	 */
3766	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3767	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3768		      E1000_TXD_DTYP_D |	/* Data descr type */
3769		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3770
3771	/* IP and/or TCP header checksum calculation and insertion. */
3772	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3773
3774	cur = txr->next_avail_desc;
3775	tx_buffer = &txr->tx_buffers[cur];
3776	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3777
3778	/*
3779	 * Start offset for header checksum calculation.
3780	 * End offset for header checksum calculation.
3781	 * Offset of place put the checksum.
3782	 */
3783	TXD->lower_setup.ip_fields.ipcss = ip_off;
3784	TXD->lower_setup.ip_fields.ipcse =
3785	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3786	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3787	/*
3788	 * Start offset for payload checksum calculation.
3789	 * End offset for payload checksum calculation.
3790	 * Offset of place to put the checksum.
3791	 */
3792	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3793	TXD->upper_setup.tcp_fields.tucse = 0;
3794	TXD->upper_setup.tcp_fields.tucso =
3795	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3796	/*
3797	 * Payload size per packet w/o any headers.
3798	 * Length of all headers up to payload.
3799	 */
3800	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3801	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3802
3803	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3804				E1000_TXD_CMD_DEXT |	/* Extended descr */
3805				E1000_TXD_CMD_TSE |	/* TSE context */
3806				E1000_TXD_CMD_IP |	/* Do IP csum */
3807				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3808				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3809
3810	tx_buffer->m_head = NULL;
3811	tx_buffer->next_eop = -1;
3812
3813	if (++cur == adapter->num_tx_desc)
3814		cur = 0;
3815
3816	txr->tx_avail--;
3817	txr->next_avail_desc = cur;
3818	txr->tx_tso = TRUE;
3819}
3820
3821
3822/**********************************************************************
3823 *
3824 *  Examine each tx_buffer in the used queue. If the hardware is done
3825 *  processing the packet then free associated resources. The
3826 *  tx_buffer is put back on the free queue.
3827 *
3828 **********************************************************************/
3829static void
3830em_txeof(struct tx_ring *txr)
3831{
3832	struct adapter	*adapter = txr->adapter;
3833        int first, last, done, processed;
3834        struct em_buffer *tx_buffer;
3835        struct e1000_tx_desc   *tx_desc, *eop_desc;
3836	struct ifnet   *ifp = adapter->ifp;
3837
3838	EM_TX_LOCK_ASSERT(txr);
3839#ifdef DEV_NETMAP
3840	if (netmap_tx_irq(ifp, txr->me |
3841	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3842		return;
3843#endif /* DEV_NETMAP */
3844
3845	/* No work, make sure watchdog is off */
3846        if (txr->tx_avail == adapter->num_tx_desc) {
3847		txr->queue_status = EM_QUEUE_IDLE;
3848                return;
3849	}
3850
3851	processed = 0;
3852        first = txr->next_to_clean;
3853        tx_desc = &txr->tx_base[first];
3854        tx_buffer = &txr->tx_buffers[first];
3855	last = tx_buffer->next_eop;
3856        eop_desc = &txr->tx_base[last];
3857
3858	/*
3859	 * What this does is get the index of the
3860	 * first descriptor AFTER the EOP of the
3861	 * first packet, that way we can do the
3862	 * simple comparison on the inner while loop.
3863	 */
3864	if (++last == adapter->num_tx_desc)
3865 		last = 0;
3866	done = last;
3867
3868        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3869            BUS_DMASYNC_POSTREAD);
3870
3871        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3872		/* We clean the range of the packet */
3873		while (first != done) {
3874                	tx_desc->upper.data = 0;
3875                	tx_desc->lower.data = 0;
3876                	tx_desc->buffer_addr = 0;
3877                	++txr->tx_avail;
3878			++processed;
3879
3880			if (tx_buffer->m_head) {
3881				bus_dmamap_sync(txr->txtag,
3882				    tx_buffer->map,
3883				    BUS_DMASYNC_POSTWRITE);
3884				bus_dmamap_unload(txr->txtag,
3885				    tx_buffer->map);
3886                        	m_freem(tx_buffer->m_head);
3887                        	tx_buffer->m_head = NULL;
3888                	}
3889			tx_buffer->next_eop = -1;
3890			txr->watchdog_time = ticks;
3891
3892	                if (++first == adapter->num_tx_desc)
3893				first = 0;
3894
3895	                tx_buffer = &txr->tx_buffers[first];
3896			tx_desc = &txr->tx_base[first];
3897		}
3898		++ifp->if_opackets;
3899		/* See if we can continue to the next packet */
3900		last = tx_buffer->next_eop;
3901		if (last != -1) {
3902        		eop_desc = &txr->tx_base[last];
3903			/* Get new done point */
3904			if (++last == adapter->num_tx_desc) last = 0;
3905			done = last;
3906		} else
3907			break;
3908        }
3909        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3910            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3911
3912        txr->next_to_clean = first;
3913
3914	/*
3915	** Watchdog calculation, we know there's
3916	** work outstanding or the first return
3917	** would have been taken, so none processed
3918	** for too long indicates a hang. local timer
3919	** will examine this and do a reset if needed.
3920	*/
3921	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3922		txr->queue_status = EM_QUEUE_HUNG;
3923
3924        /*
3925         * If we have a minimum free, clear IFF_DRV_OACTIVE
3926         * to tell the stack that it is OK to send packets.
3927	 * Notice that all writes of OACTIVE happen under the
3928	 * TX lock which, with a single queue, guarantees
3929	 * sanity.
3930         */
3931        if (txr->tx_avail >= EM_MAX_SCATTER)
3932		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3933
3934	/* Disable watchdog if all clean */
3935	if (txr->tx_avail == adapter->num_tx_desc) {
3936		txr->queue_status = EM_QUEUE_IDLE;
3937	}
3938}
3939
3940
3941/*********************************************************************
3942 *
3943 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3944 *
3945 **********************************************************************/
3946static void
3947em_refresh_mbufs(struct rx_ring *rxr, int limit)
3948{
3949	struct adapter		*adapter = rxr->adapter;
3950	struct mbuf		*m;
3951	bus_dma_segment_t	segs[1];
3952	struct em_buffer	*rxbuf;
3953	int			i, j, error, nsegs;
3954	bool			cleaned = FALSE;
3955
3956	i = j = rxr->next_to_refresh;
3957	/*
3958	** Get one descriptor beyond
3959	** our work mark to control
3960	** the loop.
3961	*/
3962	if (++j == adapter->num_rx_desc)
3963		j = 0;
3964
3965	while (j != limit) {
3966		rxbuf = &rxr->rx_buffers[i];
3967		if (rxbuf->m_head == NULL) {
3968			m = m_getjcl(M_NOWAIT, MT_DATA,
3969			    M_PKTHDR, adapter->rx_mbuf_sz);
3970			/*
3971			** If we have a temporary resource shortage
3972			** that causes a failure, just abort refresh
3973			** for now, we will return to this point when
3974			** reinvoked from em_rxeof.
3975			*/
3976			if (m == NULL)
3977				goto update;
3978		} else
3979			m = rxbuf->m_head;
3980
3981		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3982		m->m_flags |= M_PKTHDR;
3983		m->m_data = m->m_ext.ext_buf;
3984
3985		/* Use bus_dma machinery to setup the memory mapping  */
3986		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3987		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3988		if (error != 0) {
3989			printf("Refresh mbufs: hdr dmamap load"
3990			    " failure - %d\n", error);
3991			m_free(m);
3992			rxbuf->m_head = NULL;
3993			goto update;
3994		}
3995		rxbuf->m_head = m;
3996		bus_dmamap_sync(rxr->rxtag,
3997		    rxbuf->map, BUS_DMASYNC_PREREAD);
3998		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3999		cleaned = TRUE;
4000
4001		i = j; /* Next is precalulated for us */
4002		rxr->next_to_refresh = i;
4003		/* Calculate next controlling index */
4004		if (++j == adapter->num_rx_desc)
4005			j = 0;
4006	}
4007update:
4008	/*
4009	** Update the tail pointer only if,
4010	** and as far as we have refreshed.
4011	*/
4012	if (cleaned)
4013		E1000_WRITE_REG(&adapter->hw,
4014		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4015
4016	return;
4017}
4018
4019
4020/*********************************************************************
4021 *
4022 *  Allocate memory for rx_buffer structures. Since we use one
4023 *  rx_buffer per received packet, the maximum number of rx_buffer's
4024 *  that we'll need is equal to the number of receive descriptors
4025 *  that we've allocated.
4026 *
4027 **********************************************************************/
4028static int
4029em_allocate_receive_buffers(struct rx_ring *rxr)
4030{
4031	struct adapter		*adapter = rxr->adapter;
4032	device_t		dev = adapter->dev;
4033	struct em_buffer	*rxbuf;
4034	int			error;
4035
4036	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4037	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4038	if (rxr->rx_buffers == NULL) {
4039		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4040		return (ENOMEM);
4041	}
4042
4043	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4044				1, 0,			/* alignment, bounds */
4045				BUS_SPACE_MAXADDR,	/* lowaddr */
4046				BUS_SPACE_MAXADDR,	/* highaddr */
4047				NULL, NULL,		/* filter, filterarg */
4048				MJUM9BYTES,		/* maxsize */
4049				1,			/* nsegments */
4050				MJUM9BYTES,		/* maxsegsize */
4051				0,			/* flags */
4052				NULL,			/* lockfunc */
4053				NULL,			/* lockarg */
4054				&rxr->rxtag);
4055	if (error) {
4056		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4057		    __func__, error);
4058		goto fail;
4059	}
4060
4061	rxbuf = rxr->rx_buffers;
4062	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4063		rxbuf = &rxr->rx_buffers[i];
4064		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4065		    &rxbuf->map);
4066		if (error) {
4067			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4068			    __func__, error);
4069			goto fail;
4070		}
4071	}
4072
4073	return (0);
4074
4075fail:
4076	em_free_receive_structures(adapter);
4077	return (error);
4078}
4079
4080
4081/*********************************************************************
4082 *
4083 *  Initialize a receive ring and its buffers.
4084 *
4085 **********************************************************************/
4086static int
4087em_setup_receive_ring(struct rx_ring *rxr)
4088{
4089	struct	adapter 	*adapter = rxr->adapter;
4090	struct em_buffer	*rxbuf;
4091	bus_dma_segment_t	seg[1];
4092	int			rsize, nsegs, error = 0;
4093#ifdef DEV_NETMAP
4094	struct netmap_adapter *na = NA(adapter->ifp);
4095	struct netmap_slot *slot;
4096#endif
4097
4098
4099	/* Clear the ring contents */
4100	EM_RX_LOCK(rxr);
4101	rsize = roundup2(adapter->num_rx_desc *
4102	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4103	bzero((void *)rxr->rx_base, rsize);
4104#ifdef DEV_NETMAP
4105	slot = netmap_reset(na, NR_RX, 0, 0);
4106#endif
4107
4108	/*
4109	** Free current RX buffer structs and their mbufs
4110	*/
4111	for (int i = 0; i < adapter->num_rx_desc; i++) {
4112		rxbuf = &rxr->rx_buffers[i];
4113		if (rxbuf->m_head != NULL) {
4114			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4115			    BUS_DMASYNC_POSTREAD);
4116			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4117			m_freem(rxbuf->m_head);
4118			rxbuf->m_head = NULL; /* mark as freed */
4119		}
4120	}
4121
4122	/* Now replenish the mbufs */
4123        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4124		rxbuf = &rxr->rx_buffers[j];
4125#ifdef DEV_NETMAP
4126		if (slot) {
4127			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4128			uint64_t paddr;
4129			void *addr;
4130
4131			addr = PNMB(slot + si, &paddr);
4132			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4133			/* Update descriptor */
4134			rxr->rx_base[j].buffer_addr = htole64(paddr);
4135			continue;
4136		}
4137#endif /* DEV_NETMAP */
4138		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4139		    M_PKTHDR, adapter->rx_mbuf_sz);
4140		if (rxbuf->m_head == NULL) {
4141			error = ENOBUFS;
4142			goto fail;
4143		}
4144		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4145		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4146		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4147
4148		/* Get the memory mapping */
4149		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4150		    rxbuf->map, rxbuf->m_head, seg,
4151		    &nsegs, BUS_DMA_NOWAIT);
4152		if (error != 0) {
4153			m_freem(rxbuf->m_head);
4154			rxbuf->m_head = NULL;
4155			goto fail;
4156		}
4157		bus_dmamap_sync(rxr->rxtag,
4158		    rxbuf->map, BUS_DMASYNC_PREREAD);
4159
4160		/* Update descriptor */
4161		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4162	}
4163	rxr->next_to_check = 0;
4164	rxr->next_to_refresh = 0;
4165	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4166	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4167
4168fail:
4169	EM_RX_UNLOCK(rxr);
4170	return (error);
4171}
4172
4173/*********************************************************************
4174 *
4175 *  Initialize all receive rings.
4176 *
4177 **********************************************************************/
4178static int
4179em_setup_receive_structures(struct adapter *adapter)
4180{
4181	struct rx_ring *rxr = adapter->rx_rings;
4182	int q;
4183
4184	for (q = 0; q < adapter->num_queues; q++, rxr++)
4185		if (em_setup_receive_ring(rxr))
4186			goto fail;
4187
4188	return (0);
4189fail:
4190	/*
4191	 * Free RX buffers allocated so far, we will only handle
4192	 * the rings that completed, the failing case will have
4193	 * cleaned up for itself. 'q' failed, so its the terminus.
4194	 */
4195	for (int i = 0; i < q; ++i) {
4196		rxr = &adapter->rx_rings[i];
4197		for (int n = 0; n < adapter->num_rx_desc; n++) {
4198			struct em_buffer *rxbuf;
4199			rxbuf = &rxr->rx_buffers[n];
4200			if (rxbuf->m_head != NULL) {
4201				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4202			  	  BUS_DMASYNC_POSTREAD);
4203				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4204				m_freem(rxbuf->m_head);
4205				rxbuf->m_head = NULL;
4206			}
4207		}
4208		rxr->next_to_check = 0;
4209		rxr->next_to_refresh = 0;
4210	}
4211
4212	return (ENOBUFS);
4213}
4214
4215/*********************************************************************
4216 *
4217 *  Free all receive rings.
4218 *
4219 **********************************************************************/
4220static void
4221em_free_receive_structures(struct adapter *adapter)
4222{
4223	struct rx_ring *rxr = adapter->rx_rings;
4224
4225	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4226		em_free_receive_buffers(rxr);
4227		/* Free the ring memory as well */
4228		em_dma_free(adapter, &rxr->rxdma);
4229		EM_RX_LOCK_DESTROY(rxr);
4230	}
4231
4232	free(adapter->rx_rings, M_DEVBUF);
4233}
4234
4235
4236/*********************************************************************
4237 *
4238 *  Free receive ring data structures
4239 *
4240 **********************************************************************/
4241static void
4242em_free_receive_buffers(struct rx_ring *rxr)
4243{
4244	struct adapter		*adapter = rxr->adapter;
4245	struct em_buffer	*rxbuf = NULL;
4246
4247	INIT_DEBUGOUT("free_receive_buffers: begin");
4248
4249	if (rxr->rx_buffers != NULL) {
4250		for (int i = 0; i < adapter->num_rx_desc; i++) {
4251			rxbuf = &rxr->rx_buffers[i];
4252			if (rxbuf->map != NULL) {
4253				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4254				    BUS_DMASYNC_POSTREAD);
4255				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4256				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4257			}
4258			if (rxbuf->m_head != NULL) {
4259				m_freem(rxbuf->m_head);
4260				rxbuf->m_head = NULL;
4261			}
4262		}
4263		free(rxr->rx_buffers, M_DEVBUF);
4264		rxr->rx_buffers = NULL;
4265		rxr->next_to_check = 0;
4266		rxr->next_to_refresh = 0;
4267	}
4268
4269	if (rxr->rxtag != NULL) {
4270		bus_dma_tag_destroy(rxr->rxtag);
4271		rxr->rxtag = NULL;
4272	}
4273
4274	return;
4275}
4276
4277
4278/*********************************************************************
4279 *
4280 *  Enable receive unit.
4281 *
4282 **********************************************************************/
4283
4284static void
4285em_initialize_receive_unit(struct adapter *adapter)
4286{
4287	struct rx_ring	*rxr = adapter->rx_rings;
4288	struct ifnet	*ifp = adapter->ifp;
4289	struct e1000_hw	*hw = &adapter->hw;
4290	u64	bus_addr;
4291	u32	rctl, rxcsum;
4292
4293	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4294
4295	/*
4296	 * Make sure receives are disabled while setting
4297	 * up the descriptor ring
4298	 */
4299	rctl = E1000_READ_REG(hw, E1000_RCTL);
4300	/* Do not disable if ever enabled on this hardware */
4301	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4302		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4303
4304	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4305	    adapter->rx_abs_int_delay.value);
4306	/*
4307	 * Set the interrupt throttling rate. Value is calculated
4308	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4309	 */
4310	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4311
4312	/*
4313	** When using MSIX interrupts we need to throttle
4314	** using the EITR register (82574 only)
4315	*/
4316	if (hw->mac.type == e1000_82574) {
4317		for (int i = 0; i < 4; i++)
4318			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4319			    DEFAULT_ITR);
4320		/* Disable accelerated acknowledge */
4321		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4322	}
4323
4324	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4325	if (ifp->if_capenable & IFCAP_RXCSUM)
4326		rxcsum |= E1000_RXCSUM_TUOFL;
4327	else
4328		rxcsum &= ~E1000_RXCSUM_TUOFL;
4329	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4330
4331	/*
4332	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4333	** long latencies are observed, like Lenovo X60. This
4334	** change eliminates the problem, but since having positive
4335	** values in RDTR is a known source of problems on other
4336	** platforms another solution is being sought.
4337	*/
4338	if (hw->mac.type == e1000_82573)
4339		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4340
4341	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4342		/* Setup the Base and Length of the Rx Descriptor Ring */
4343		u32 rdt = adapter->num_rx_desc - 1; /* default */
4344
4345		bus_addr = rxr->rxdma.dma_paddr;
4346		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4347		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4348		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4349		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4350		/* Setup the Head and Tail Descriptor Pointers */
4351		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4352#ifdef DEV_NETMAP
4353		/*
4354		 * an init() while a netmap client is active must
4355		 * preserve the rx buffers passed to userspace.
4356		 */
4357		if (ifp->if_capenable & IFCAP_NETMAP)
4358			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4359#endif /* DEV_NETMAP */
4360		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4361	}
4362
4363	/* Set PTHRESH for improved jumbo performance */
4364	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4365	    (adapter->hw.mac.type == e1000_pch2lan) ||
4366	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4367	    (ifp->if_mtu > ETHERMTU)) {
4368		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4369		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4370	}
4371
4372	if (adapter->hw.mac.type >= e1000_pch2lan) {
4373		if (ifp->if_mtu > ETHERMTU)
4374			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4375		else
4376			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4377	}
4378
4379	/* Setup the Receive Control Register */
4380	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4381	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4382	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4383	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4384
4385        /* Strip the CRC */
4386        rctl |= E1000_RCTL_SECRC;
4387
4388        /* Make sure VLAN Filters are off */
4389        rctl &= ~E1000_RCTL_VFE;
4390	rctl &= ~E1000_RCTL_SBP;
4391
4392	if (adapter->rx_mbuf_sz == MCLBYTES)
4393		rctl |= E1000_RCTL_SZ_2048;
4394	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4395		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4396	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4397		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4398
4399	if (ifp->if_mtu > ETHERMTU)
4400		rctl |= E1000_RCTL_LPE;
4401	else
4402		rctl &= ~E1000_RCTL_LPE;
4403
4404	/* Write out the settings */
4405	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4406
4407	return;
4408}
4409
4410
4411/*********************************************************************
4412 *
4413 *  This routine executes in interrupt context. It replenishes
4414 *  the mbufs in the descriptor and sends data which has been
4415 *  dma'ed into host memory to upper layer.
4416 *
4417 *  We loop at most count times if count is > 0, or until done if
4418 *  count < 0.
4419 *
4420 *  For polling we also now return the number of cleaned packets
4421 *********************************************************************/
4422static bool
4423em_rxeof(struct rx_ring *rxr, int count, int *done)
4424{
4425	struct adapter		*adapter = rxr->adapter;
4426	struct ifnet		*ifp = adapter->ifp;
4427	struct mbuf		*mp, *sendmp;
4428	u8			status = 0;
4429	u16 			len;
4430	int			i, processed, rxdone = 0;
4431	bool			eop;
4432	struct e1000_rx_desc	*cur;
4433
4434	EM_RX_LOCK(rxr);
4435
4436#ifdef DEV_NETMAP
4437	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4438		return (FALSE);
4439#endif /* DEV_NETMAP */
4440
4441	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4442
4443		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4444			break;
4445
4446		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4447		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4448
4449		cur = &rxr->rx_base[i];
4450		status = cur->status;
4451		mp = sendmp = NULL;
4452
4453		if ((status & E1000_RXD_STAT_DD) == 0)
4454			break;
4455
4456		len = le16toh(cur->length);
4457		eop = (status & E1000_RXD_STAT_EOP) != 0;
4458
4459		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4460		    (rxr->discard == TRUE)) {
4461			adapter->dropped_pkts++;
4462			++rxr->rx_discarded;
4463			if (!eop) /* Catch subsequent segs */
4464				rxr->discard = TRUE;
4465			else
4466				rxr->discard = FALSE;
4467			em_rx_discard(rxr, i);
4468			goto next_desc;
4469		}
4470
4471		/* Assign correct length to the current fragment */
4472		mp = rxr->rx_buffers[i].m_head;
4473		mp->m_len = len;
4474
4475		/* Trigger for refresh */
4476		rxr->rx_buffers[i].m_head = NULL;
4477
4478		/* First segment? */
4479		if (rxr->fmp == NULL) {
4480			mp->m_pkthdr.len = len;
4481			rxr->fmp = rxr->lmp = mp;
4482		} else {
4483			/* Chain mbuf's together */
4484			mp->m_flags &= ~M_PKTHDR;
4485			rxr->lmp->m_next = mp;
4486			rxr->lmp = mp;
4487			rxr->fmp->m_pkthdr.len += len;
4488		}
4489
4490		if (eop) {
4491			--count;
4492			sendmp = rxr->fmp;
4493			sendmp->m_pkthdr.rcvif = ifp;
4494			ifp->if_ipackets++;
4495			em_receive_checksum(cur, sendmp);
4496#ifndef __NO_STRICT_ALIGNMENT
4497			if (adapter->hw.mac.max_frame_size >
4498			    (MCLBYTES - ETHER_ALIGN) &&
4499			    em_fixup_rx(rxr) != 0)
4500				goto skip;
4501#endif
4502			if (status & E1000_RXD_STAT_VP) {
4503				sendmp->m_pkthdr.ether_vtag =
4504				    le16toh(cur->special);
4505				sendmp->m_flags |= M_VLANTAG;
4506			}
4507#ifndef __NO_STRICT_ALIGNMENT
4508skip:
4509#endif
4510			rxr->fmp = rxr->lmp = NULL;
4511		}
4512next_desc:
4513		/* Zero out the receive descriptors status. */
4514		cur->status = 0;
4515		++rxdone;	/* cumulative for POLL */
4516		++processed;
4517
4518		/* Advance our pointers to the next descriptor. */
4519		if (++i == adapter->num_rx_desc)
4520			i = 0;
4521
4522		/* Send to the stack */
4523		if (sendmp != NULL) {
4524			rxr->next_to_check = i;
4525			EM_RX_UNLOCK(rxr);
4526			(*ifp->if_input)(ifp, sendmp);
4527			EM_RX_LOCK(rxr);
4528			i = rxr->next_to_check;
4529		}
4530
4531		/* Only refresh mbufs every 8 descriptors */
4532		if (processed == 8) {
4533			em_refresh_mbufs(rxr, i);
4534			processed = 0;
4535		}
4536	}
4537
4538	/* Catch any remaining refresh work */
4539	if (e1000_rx_unrefreshed(rxr))
4540		em_refresh_mbufs(rxr, i);
4541
4542	rxr->next_to_check = i;
4543	if (done != NULL)
4544		*done = rxdone;
4545	EM_RX_UNLOCK(rxr);
4546
4547	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4548}
4549
4550static __inline void
4551em_rx_discard(struct rx_ring *rxr, int i)
4552{
4553	struct em_buffer	*rbuf;
4554
4555	rbuf = &rxr->rx_buffers[i];
4556	/* Free any previous pieces */
4557	if (rxr->fmp != NULL) {
4558		rxr->fmp->m_flags |= M_PKTHDR;
4559		m_freem(rxr->fmp);
4560		rxr->fmp = NULL;
4561		rxr->lmp = NULL;
4562	}
4563	/*
4564	** Free buffer and allow em_refresh_mbufs()
4565	** to clean up and recharge buffer.
4566	*/
4567	if (rbuf->m_head) {
4568		m_free(rbuf->m_head);
4569		rbuf->m_head = NULL;
4570	}
4571	return;
4572}
4573
4574#ifndef __NO_STRICT_ALIGNMENT
4575/*
4576 * When jumbo frames are enabled we should realign entire payload on
4577 * architecures with strict alignment. This is serious design mistake of 8254x
4578 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4579 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4580 * payload. On architecures without strict alignment restrictions 8254x still
4581 * performs unaligned memory access which would reduce the performance too.
4582 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4583 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4584 * existing mbuf chain.
4585 *
4586 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4587 * not used at all on architectures with strict alignment.
4588 */
4589static int
4590em_fixup_rx(struct rx_ring *rxr)
4591{
4592	struct adapter *adapter = rxr->adapter;
4593	struct mbuf *m, *n;
4594	int error;
4595
4596	error = 0;
4597	m = rxr->fmp;
4598	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4599		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4600		m->m_data += ETHER_HDR_LEN;
4601	} else {
4602		MGETHDR(n, M_NOWAIT, MT_DATA);
4603		if (n != NULL) {
4604			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4605			m->m_data += ETHER_HDR_LEN;
4606			m->m_len -= ETHER_HDR_LEN;
4607			n->m_len = ETHER_HDR_LEN;
4608			M_MOVE_PKTHDR(n, m);
4609			n->m_next = m;
4610			rxr->fmp = n;
4611		} else {
4612			adapter->dropped_pkts++;
4613			m_freem(rxr->fmp);
4614			rxr->fmp = NULL;
4615			error = ENOMEM;
4616		}
4617	}
4618
4619	return (error);
4620}
4621#endif
4622
4623/*********************************************************************
4624 *
4625 *  Verify that the hardware indicated that the checksum is valid.
4626 *  Inform the stack about the status of checksum so that stack
4627 *  doesn't spend time verifying the checksum.
4628 *
4629 *********************************************************************/
4630static void
4631em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4632{
4633	mp->m_pkthdr.csum_flags = 0;
4634
4635	/* Ignore Checksum bit is set */
4636	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4637		return;
4638
4639	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4640		return;
4641
4642	/* IP Checksum Good? */
4643	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4644		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4645
4646	/* TCP or UDP checksum */
4647	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4648		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4649		mp->m_pkthdr.csum_data = htons(0xffff);
4650	}
4651}
4652
4653/*
4654 * This routine is run via an vlan
4655 * config EVENT
4656 */
4657static void
4658em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4659{
4660	struct adapter	*adapter = ifp->if_softc;
4661	u32		index, bit;
4662
4663	if (ifp->if_softc !=  arg)   /* Not our event */
4664		return;
4665
4666	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4667                return;
4668
4669	EM_CORE_LOCK(adapter);
4670	index = (vtag >> 5) & 0x7F;
4671	bit = vtag & 0x1F;
4672	adapter->shadow_vfta[index] |= (1 << bit);
4673	++adapter->num_vlans;
4674	/* Re-init to load the changes */
4675	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4676		em_init_locked(adapter);
4677	EM_CORE_UNLOCK(adapter);
4678}
4679
4680/*
4681 * This routine is run via an vlan
4682 * unconfig EVENT
4683 */
4684static void
4685em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4686{
4687	struct adapter	*adapter = ifp->if_softc;
4688	u32		index, bit;
4689
4690	if (ifp->if_softc !=  arg)
4691		return;
4692
4693	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4694                return;
4695
4696	EM_CORE_LOCK(adapter);
4697	index = (vtag >> 5) & 0x7F;
4698	bit = vtag & 0x1F;
4699	adapter->shadow_vfta[index] &= ~(1 << bit);
4700	--adapter->num_vlans;
4701	/* Re-init to load the changes */
4702	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4703		em_init_locked(adapter);
4704	EM_CORE_UNLOCK(adapter);
4705}
4706
4707static void
4708em_setup_vlan_hw_support(struct adapter *adapter)
4709{
4710	struct e1000_hw *hw = &adapter->hw;
4711	u32             reg;
4712
4713	/*
4714	** We get here thru init_locked, meaning
4715	** a soft reset, this has already cleared
4716	** the VFTA and other state, so if there
4717	** have been no vlan's registered do nothing.
4718	*/
4719	if (adapter->num_vlans == 0)
4720                return;
4721
4722	/*
4723	** A soft reset zero's out the VFTA, so
4724	** we need to repopulate it now.
4725	*/
4726	for (int i = 0; i < EM_VFTA_SIZE; i++)
4727                if (adapter->shadow_vfta[i] != 0)
4728			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4729                            i, adapter->shadow_vfta[i]);
4730
4731	reg = E1000_READ_REG(hw, E1000_CTRL);
4732	reg |= E1000_CTRL_VME;
4733	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4734
4735	/* Enable the Filter Table */
4736	reg = E1000_READ_REG(hw, E1000_RCTL);
4737	reg &= ~E1000_RCTL_CFIEN;
4738	reg |= E1000_RCTL_VFE;
4739	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4740}
4741
4742static void
4743em_enable_intr(struct adapter *adapter)
4744{
4745	struct e1000_hw *hw = &adapter->hw;
4746	u32 ims_mask = IMS_ENABLE_MASK;
4747
4748	if (hw->mac.type == e1000_82574) {
4749		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4750		ims_mask |= EM_MSIX_MASK;
4751	}
4752	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4753}
4754
4755static void
4756em_disable_intr(struct adapter *adapter)
4757{
4758	struct e1000_hw *hw = &adapter->hw;
4759
4760	if (hw->mac.type == e1000_82574)
4761		E1000_WRITE_REG(hw, EM_EIAC, 0);
4762	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4763}
4764
4765/*
4766 * Bit of a misnomer, what this really means is
4767 * to enable OS management of the system... aka
4768 * to disable special hardware management features
4769 */
4770static void
4771em_init_manageability(struct adapter *adapter)
4772{
4773	/* A shared code workaround */
4774#define E1000_82542_MANC2H E1000_MANC2H
4775	if (adapter->has_manage) {
4776		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4777		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4778
4779		/* disable hardware interception of ARP */
4780		manc &= ~(E1000_MANC_ARP_EN);
4781
4782                /* enable receiving management packets to the host */
4783		manc |= E1000_MANC_EN_MNG2HOST;
4784#define E1000_MNG2HOST_PORT_623 (1 << 5)
4785#define E1000_MNG2HOST_PORT_664 (1 << 6)
4786		manc2h |= E1000_MNG2HOST_PORT_623;
4787		manc2h |= E1000_MNG2HOST_PORT_664;
4788		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4789		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4790	}
4791}
4792
4793/*
4794 * Give control back to hardware management
4795 * controller if there is one.
4796 */
4797static void
4798em_release_manageability(struct adapter *adapter)
4799{
4800	if (adapter->has_manage) {
4801		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4802
4803		/* re-enable hardware interception of ARP */
4804		manc |= E1000_MANC_ARP_EN;
4805		manc &= ~E1000_MANC_EN_MNG2HOST;
4806
4807		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4808	}
4809}
4810
4811/*
4812 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4813 * For ASF and Pass Through versions of f/w this means
4814 * that the driver is loaded. For AMT version type f/w
4815 * this means that the network i/f is open.
4816 */
4817static void
4818em_get_hw_control(struct adapter *adapter)
4819{
4820	u32 ctrl_ext, swsm;
4821
4822	if (adapter->hw.mac.type == e1000_82573) {
4823		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4824		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4825		    swsm | E1000_SWSM_DRV_LOAD);
4826		return;
4827	}
4828	/* else */
4829	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4830	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4831	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4832	return;
4833}
4834
4835/*
4836 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4837 * For ASF and Pass Through versions of f/w this means that
4838 * the driver is no longer loaded. For AMT versions of the
4839 * f/w this means that the network i/f is closed.
4840 */
4841static void
4842em_release_hw_control(struct adapter *adapter)
4843{
4844	u32 ctrl_ext, swsm;
4845
4846	if (!adapter->has_manage)
4847		return;
4848
4849	if (adapter->hw.mac.type == e1000_82573) {
4850		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4851		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4852		    swsm & ~E1000_SWSM_DRV_LOAD);
4853		return;
4854	}
4855	/* else */
4856	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4857	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4858	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4859	return;
4860}
4861
4862static int
4863em_is_valid_ether_addr(u8 *addr)
4864{
4865	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4866
4867	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4868		return (FALSE);
4869	}
4870
4871	return (TRUE);
4872}
4873
4874/*
4875** Parse the interface capabilities with regard
4876** to both system management and wake-on-lan for
4877** later use.
4878*/
4879static void
4880em_get_wakeup(device_t dev)
4881{
4882	struct adapter	*adapter = device_get_softc(dev);
4883	u16		eeprom_data = 0, device_id, apme_mask;
4884
4885	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4886	apme_mask = EM_EEPROM_APME;
4887
4888	switch (adapter->hw.mac.type) {
4889	case e1000_82573:
4890	case e1000_82583:
4891		adapter->has_amt = TRUE;
4892		/* Falls thru */
4893	case e1000_82571:
4894	case e1000_82572:
4895	case e1000_80003es2lan:
4896		if (adapter->hw.bus.func == 1) {
4897			e1000_read_nvm(&adapter->hw,
4898			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4899			break;
4900		} else
4901			e1000_read_nvm(&adapter->hw,
4902			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4903		break;
4904	case e1000_ich8lan:
4905	case e1000_ich9lan:
4906	case e1000_ich10lan:
4907	case e1000_pchlan:
4908	case e1000_pch2lan:
4909		apme_mask = E1000_WUC_APME;
4910		adapter->has_amt = TRUE;
4911		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4912		break;
4913	default:
4914		e1000_read_nvm(&adapter->hw,
4915		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4916		break;
4917	}
4918	if (eeprom_data & apme_mask)
4919		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4920	/*
4921         * We have the eeprom settings, now apply the special cases
4922         * where the eeprom may be wrong or the board won't support
4923         * wake on lan on a particular port
4924	 */
4925	device_id = pci_get_device(dev);
4926        switch (device_id) {
4927	case E1000_DEV_ID_82571EB_FIBER:
4928		/* Wake events only supported on port A for dual fiber
4929		 * regardless of eeprom setting */
4930		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4931		    E1000_STATUS_FUNC_1)
4932			adapter->wol = 0;
4933		break;
4934	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4935	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4936	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4937                /* if quad port adapter, disable WoL on all but port A */
4938		if (global_quad_port_a != 0)
4939			adapter->wol = 0;
4940		/* Reset for multiple quad port adapters */
4941		if (++global_quad_port_a == 4)
4942			global_quad_port_a = 0;
4943                break;
4944	}
4945	return;
4946}
4947
4948
4949/*
4950 * Enable PCI Wake On Lan capability
4951 */
4952static void
4953em_enable_wakeup(device_t dev)
4954{
4955	struct adapter	*adapter = device_get_softc(dev);
4956	struct ifnet	*ifp = adapter->ifp;
4957	u32		pmc, ctrl, ctrl_ext, rctl;
4958	u16     	status;
4959
4960	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4961		return;
4962
4963	/* Advertise the wakeup capability */
4964	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4965	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4966	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4967	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4968
4969	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4970	    (adapter->hw.mac.type == e1000_pchlan) ||
4971	    (adapter->hw.mac.type == e1000_ich9lan) ||
4972	    (adapter->hw.mac.type == e1000_ich10lan))
4973		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4974
4975	/* Keep the laser running on Fiber adapters */
4976	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4977	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4978		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4979		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4980		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4981	}
4982
4983	/*
4984	** Determine type of Wakeup: note that wol
4985	** is set with all bits on by default.
4986	*/
4987	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4988		adapter->wol &= ~E1000_WUFC_MAG;
4989
4990	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4991		adapter->wol &= ~E1000_WUFC_MC;
4992	else {
4993		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4994		rctl |= E1000_RCTL_MPE;
4995		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4996	}
4997
4998	if ((adapter->hw.mac.type == e1000_pchlan) ||
4999	    (adapter->hw.mac.type == e1000_pch2lan)) {
5000		if (em_enable_phy_wakeup(adapter))
5001			return;
5002	} else {
5003		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5004		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5005	}
5006
5007	if (adapter->hw.phy.type == e1000_phy_igp_3)
5008		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5009
5010        /* Request PME */
5011        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5012	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5013	if (ifp->if_capenable & IFCAP_WOL)
5014		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5015        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5016
5017	return;
5018}
5019
5020/*
5021** WOL in the newer chipset interfaces (pchlan)
5022** require thing to be copied into the phy
5023*/
5024static int
5025em_enable_phy_wakeup(struct adapter *adapter)
5026{
5027	struct e1000_hw *hw = &adapter->hw;
5028	u32 mreg, ret = 0;
5029	u16 preg;
5030
5031	/* copy MAC RARs to PHY RARs */
5032	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5033
5034	/* copy MAC MTA to PHY MTA */
5035	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5036		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5037		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5038		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5039		    (u16)((mreg >> 16) & 0xFFFF));
5040	}
5041
5042	/* configure PHY Rx Control register */
5043	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5044	mreg = E1000_READ_REG(hw, E1000_RCTL);
5045	if (mreg & E1000_RCTL_UPE)
5046		preg |= BM_RCTL_UPE;
5047	if (mreg & E1000_RCTL_MPE)
5048		preg |= BM_RCTL_MPE;
5049	preg &= ~(BM_RCTL_MO_MASK);
5050	if (mreg & E1000_RCTL_MO_3)
5051		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5052				<< BM_RCTL_MO_SHIFT);
5053	if (mreg & E1000_RCTL_BAM)
5054		preg |= BM_RCTL_BAM;
5055	if (mreg & E1000_RCTL_PMCF)
5056		preg |= BM_RCTL_PMCF;
5057	mreg = E1000_READ_REG(hw, E1000_CTRL);
5058	if (mreg & E1000_CTRL_RFCE)
5059		preg |= BM_RCTL_RFCE;
5060	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5061
5062	/* enable PHY wakeup in MAC register */
5063	E1000_WRITE_REG(hw, E1000_WUC,
5064	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5065	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5066
5067	/* configure and enable PHY wakeup in PHY registers */
5068	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5069	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5070
5071	/* activate PHY wakeup */
5072	ret = hw->phy.ops.acquire(hw);
5073	if (ret) {
5074		printf("Could not acquire PHY\n");
5075		return ret;
5076	}
5077	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5078	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5079	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5080	if (ret) {
5081		printf("Could not read PHY page 769\n");
5082		goto out;
5083	}
5084	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5085	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5086	if (ret)
5087		printf("Could not set PHY Host Wakeup bit\n");
5088out:
5089	hw->phy.ops.release(hw);
5090
5091	return ret;
5092}
5093
5094static void
5095em_led_func(void *arg, int onoff)
5096{
5097	struct adapter	*adapter = arg;
5098
5099	EM_CORE_LOCK(adapter);
5100	if (onoff) {
5101		e1000_setup_led(&adapter->hw);
5102		e1000_led_on(&adapter->hw);
5103	} else {
5104		e1000_led_off(&adapter->hw);
5105		e1000_cleanup_led(&adapter->hw);
5106	}
5107	EM_CORE_UNLOCK(adapter);
5108}
5109
5110/*
5111** Disable the L0S and L1 LINK states
5112*/
5113static void
5114em_disable_aspm(struct adapter *adapter)
5115{
5116	int		base, reg;
5117	u16		link_cap,link_ctrl;
5118	device_t	dev = adapter->dev;
5119
5120	switch (adapter->hw.mac.type) {
5121		case e1000_82573:
5122		case e1000_82574:
5123		case e1000_82583:
5124			break;
5125		default:
5126			return;
5127	}
5128	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5129		return;
5130	reg = base + PCIER_LINK_CAP;
5131	link_cap = pci_read_config(dev, reg, 2);
5132	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5133		return;
5134	reg = base + PCIER_LINK_CTL;
5135	link_ctrl = pci_read_config(dev, reg, 2);
5136	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5137	pci_write_config(dev, reg, link_ctrl, 2);
5138	return;
5139}
5140
5141/**********************************************************************
5142 *
5143 *  Update the board statistics counters.
5144 *
5145 **********************************************************************/
5146static void
5147em_update_stats_counters(struct adapter *adapter)
5148{
5149	struct ifnet   *ifp;
5150
5151	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5152	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5153		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5154		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5155	}
5156	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5157	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5158	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5159	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5160
5161	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5162	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5163	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5164	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5165	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5166	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5167	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5168	/*
5169	** For watchdog management we need to know if we have been
5170	** paused during the last interval, so capture that here.
5171	*/
5172	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5173	adapter->stats.xoffrxc += adapter->pause_frames;
5174	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5175	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5176	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5177	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5178	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5179	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5180	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5181	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5182	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5183	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5184	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5185	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5186
5187	/* For the 64-bit byte counters the low dword must be read first. */
5188	/* Both registers clear on the read of the high dword */
5189
5190	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5191	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5192	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5193	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5194
5195	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5196	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5197	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5198	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5199	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5200
5201	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5202	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5203
5204	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5205	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5206	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5207	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5208	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5209	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5210	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5211	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5212	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5213	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5214
5215	/* Interrupt Counts */
5216
5217	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5218	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5219	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5220	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5221	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5222	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5223	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5224	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5225	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5226
5227	if (adapter->hw.mac.type >= e1000_82543) {
5228		adapter->stats.algnerrc +=
5229		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5230		adapter->stats.rxerrc +=
5231		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5232		adapter->stats.tncrs +=
5233		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5234		adapter->stats.cexterr +=
5235		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5236		adapter->stats.tsctc +=
5237		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5238		adapter->stats.tsctfc +=
5239		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5240	}
5241	ifp = adapter->ifp;
5242
5243	ifp->if_collisions = adapter->stats.colc;
5244
5245	/* Rx Errors */
5246	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5247	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5248	    adapter->stats.ruc + adapter->stats.roc +
5249	    adapter->stats.mpc + adapter->stats.cexterr;
5250
5251	/* Tx Errors */
5252	ifp->if_oerrors = adapter->stats.ecol +
5253	    adapter->stats.latecol + adapter->watchdog_events;
5254}
5255
5256/* Export a single 32-bit register via a read-only sysctl. */
5257static int
5258em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5259{
5260	struct adapter *adapter;
5261	u_int val;
5262
5263	adapter = oidp->oid_arg1;
5264	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5265	return (sysctl_handle_int(oidp, &val, 0, req));
5266}
5267
5268/*
5269 * Add sysctl variables, one per statistic, to the system.
5270 */
5271static void
5272em_add_hw_stats(struct adapter *adapter)
5273{
5274	device_t dev = adapter->dev;
5275
5276	struct tx_ring *txr = adapter->tx_rings;
5277	struct rx_ring *rxr = adapter->rx_rings;
5278
5279	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5280	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5281	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5282	struct e1000_hw_stats *stats = &adapter->stats;
5283
5284	struct sysctl_oid *stat_node, *queue_node, *int_node;
5285	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5286
5287#define QUEUE_NAME_LEN 32
5288	char namebuf[QUEUE_NAME_LEN];
5289
5290	/* Driver Statistics */
5291	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5292			CTLFLAG_RD, &adapter->link_irq,
5293			"Link MSIX IRQ Handled");
5294	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5295			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5296			 "Std mbuf failed");
5297	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5298			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5299			 "Std mbuf cluster failed");
5300	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5301			CTLFLAG_RD, &adapter->dropped_pkts,
5302			"Driver dropped packets");
5303	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5304			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5305			"Driver tx dma failure in xmit");
5306	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5307			CTLFLAG_RD, &adapter->rx_overruns,
5308			"RX overruns");
5309	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5310			CTLFLAG_RD, &adapter->watchdog_events,
5311			"Watchdog timeouts");
5312
5313	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5314			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5315			em_sysctl_reg_handler, "IU",
5316			"Device Control Register");
5317	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5318			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5319			em_sysctl_reg_handler, "IU",
5320			"Receiver Control Register");
5321	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5322			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5323			"Flow Control High Watermark");
5324	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5325			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5326			"Flow Control Low Watermark");
5327
5328	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5329		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5330		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5331					    CTLFLAG_RD, NULL, "Queue Name");
5332		queue_list = SYSCTL_CHILDREN(queue_node);
5333
5334		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5335				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5336				E1000_TDH(txr->me),
5337				em_sysctl_reg_handler, "IU",
5338 				"Transmit Descriptor Head");
5339		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5340				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5341				E1000_TDT(txr->me),
5342				em_sysctl_reg_handler, "IU",
5343 				"Transmit Descriptor Tail");
5344		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5345				CTLFLAG_RD, &txr->tx_irq,
5346				"Queue MSI-X Transmit Interrupts");
5347		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5348				CTLFLAG_RD, &txr->no_desc_avail,
5349				"Queue No Descriptor Available");
5350
5351		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5352				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5353				E1000_RDH(rxr->me),
5354				em_sysctl_reg_handler, "IU",
5355				"Receive Descriptor Head");
5356		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5357				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5358				E1000_RDT(rxr->me),
5359				em_sysctl_reg_handler, "IU",
5360				"Receive Descriptor Tail");
5361		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5362				CTLFLAG_RD, &rxr->rx_irq,
5363				"Queue MSI-X Receive Interrupts");
5364	}
5365
5366	/* MAC stats get their own sub node */
5367
5368	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5369				    CTLFLAG_RD, NULL, "Statistics");
5370	stat_list = SYSCTL_CHILDREN(stat_node);
5371
5372	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5373			CTLFLAG_RD, &stats->ecol,
5374			"Excessive collisions");
5375	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5376			CTLFLAG_RD, &stats->scc,
5377			"Single collisions");
5378	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5379			CTLFLAG_RD, &stats->mcc,
5380			"Multiple collisions");
5381	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5382			CTLFLAG_RD, &stats->latecol,
5383			"Late collisions");
5384	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5385			CTLFLAG_RD, &stats->colc,
5386			"Collision Count");
5387	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5388			CTLFLAG_RD, &adapter->stats.symerrs,
5389			"Symbol Errors");
5390	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5391			CTLFLAG_RD, &adapter->stats.sec,
5392			"Sequence Errors");
5393	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5394			CTLFLAG_RD, &adapter->stats.dc,
5395			"Defer Count");
5396	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5397			CTLFLAG_RD, &adapter->stats.mpc,
5398			"Missed Packets");
5399	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5400			CTLFLAG_RD, &adapter->stats.rnbc,
5401			"Receive No Buffers");
5402	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5403			CTLFLAG_RD, &adapter->stats.ruc,
5404			"Receive Undersize");
5405	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5406			CTLFLAG_RD, &adapter->stats.rfc,
5407			"Fragmented Packets Received ");
5408	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5409			CTLFLAG_RD, &adapter->stats.roc,
5410			"Oversized Packets Received");
5411	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5412			CTLFLAG_RD, &adapter->stats.rjc,
5413			"Recevied Jabber");
5414	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5415			CTLFLAG_RD, &adapter->stats.rxerrc,
5416			"Receive Errors");
5417	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5418			CTLFLAG_RD, &adapter->stats.crcerrs,
5419			"CRC errors");
5420	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5421			CTLFLAG_RD, &adapter->stats.algnerrc,
5422			"Alignment Errors");
5423	/* On 82575 these are collision counts */
5424	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5425			CTLFLAG_RD, &adapter->stats.cexterr,
5426			"Collision/Carrier extension errors");
5427	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5428			CTLFLAG_RD, &adapter->stats.xonrxc,
5429			"XON Received");
5430	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5431			CTLFLAG_RD, &adapter->stats.xontxc,
5432			"XON Transmitted");
5433	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5434			CTLFLAG_RD, &adapter->stats.xoffrxc,
5435			"XOFF Received");
5436	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5437			CTLFLAG_RD, &adapter->stats.xofftxc,
5438			"XOFF Transmitted");
5439
5440	/* Packet Reception Stats */
5441	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5442			CTLFLAG_RD, &adapter->stats.tpr,
5443			"Total Packets Received ");
5444	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5445			CTLFLAG_RD, &adapter->stats.gprc,
5446			"Good Packets Received");
5447	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5448			CTLFLAG_RD, &adapter->stats.bprc,
5449			"Broadcast Packets Received");
5450	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5451			CTLFLAG_RD, &adapter->stats.mprc,
5452			"Multicast Packets Received");
5453	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5454			CTLFLAG_RD, &adapter->stats.prc64,
5455			"64 byte frames received ");
5456	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5457			CTLFLAG_RD, &adapter->stats.prc127,
5458			"65-127 byte frames received");
5459	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5460			CTLFLAG_RD, &adapter->stats.prc255,
5461			"128-255 byte frames received");
5462	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5463			CTLFLAG_RD, &adapter->stats.prc511,
5464			"256-511 byte frames received");
5465	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5466			CTLFLAG_RD, &adapter->stats.prc1023,
5467			"512-1023 byte frames received");
5468	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5469			CTLFLAG_RD, &adapter->stats.prc1522,
5470			"1023-1522 byte frames received");
5471 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5472 			CTLFLAG_RD, &adapter->stats.gorc,
5473 			"Good Octets Received");
5474
5475	/* Packet Transmission Stats */
5476 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5477 			CTLFLAG_RD, &adapter->stats.gotc,
5478 			"Good Octets Transmitted");
5479	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5480			CTLFLAG_RD, &adapter->stats.tpt,
5481			"Total Packets Transmitted");
5482	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5483			CTLFLAG_RD, &adapter->stats.gptc,
5484			"Good Packets Transmitted");
5485	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5486			CTLFLAG_RD, &adapter->stats.bptc,
5487			"Broadcast Packets Transmitted");
5488	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5489			CTLFLAG_RD, &adapter->stats.mptc,
5490			"Multicast Packets Transmitted");
5491	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5492			CTLFLAG_RD, &adapter->stats.ptc64,
5493			"64 byte frames transmitted ");
5494	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5495			CTLFLAG_RD, &adapter->stats.ptc127,
5496			"65-127 byte frames transmitted");
5497	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5498			CTLFLAG_RD, &adapter->stats.ptc255,
5499			"128-255 byte frames transmitted");
5500	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5501			CTLFLAG_RD, &adapter->stats.ptc511,
5502			"256-511 byte frames transmitted");
5503	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5504			CTLFLAG_RD, &adapter->stats.ptc1023,
5505			"512-1023 byte frames transmitted");
5506	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5507			CTLFLAG_RD, &adapter->stats.ptc1522,
5508			"1024-1522 byte frames transmitted");
5509	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5510			CTLFLAG_RD, &adapter->stats.tsctc,
5511			"TSO Contexts Transmitted");
5512	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5513			CTLFLAG_RD, &adapter->stats.tsctfc,
5514			"TSO Contexts Failed");
5515
5516
5517	/* Interrupt Stats */
5518
5519	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5520				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5521	int_list = SYSCTL_CHILDREN(int_node);
5522
5523	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5524			CTLFLAG_RD, &adapter->stats.iac,
5525			"Interrupt Assertion Count");
5526
5527	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5528			CTLFLAG_RD, &adapter->stats.icrxptc,
5529			"Interrupt Cause Rx Pkt Timer Expire Count");
5530
5531	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5532			CTLFLAG_RD, &adapter->stats.icrxatc,
5533			"Interrupt Cause Rx Abs Timer Expire Count");
5534
5535	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5536			CTLFLAG_RD, &adapter->stats.ictxptc,
5537			"Interrupt Cause Tx Pkt Timer Expire Count");
5538
5539	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5540			CTLFLAG_RD, &adapter->stats.ictxatc,
5541			"Interrupt Cause Tx Abs Timer Expire Count");
5542
5543	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5544			CTLFLAG_RD, &adapter->stats.ictxqec,
5545			"Interrupt Cause Tx Queue Empty Count");
5546
5547	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5548			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5549			"Interrupt Cause Tx Queue Min Thresh Count");
5550
5551	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5552			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5553			"Interrupt Cause Rx Desc Min Thresh Count");
5554
5555	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5556			CTLFLAG_RD, &adapter->stats.icrxoc,
5557			"Interrupt Cause Receiver Overrun Count");
5558}
5559
5560/**********************************************************************
5561 *
5562 *  This routine provides a way to dump out the adapter eeprom,
5563 *  often a useful debug/service tool. This only dumps the first
5564 *  32 words, stuff that matters is in that extent.
5565 *
5566 **********************************************************************/
5567static int
5568em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5569{
5570	struct adapter *adapter = (struct adapter *)arg1;
5571	int error;
5572	int result;
5573
5574	result = -1;
5575	error = sysctl_handle_int(oidp, &result, 0, req);
5576
5577	if (error || !req->newptr)
5578		return (error);
5579
5580	/*
5581	 * This value will cause a hex dump of the
5582	 * first 32 16-bit words of the EEPROM to
5583	 * the screen.
5584	 */
5585	if (result == 1)
5586		em_print_nvm_info(adapter);
5587
5588	return (error);
5589}
5590
5591static void
5592em_print_nvm_info(struct adapter *adapter)
5593{
5594	u16	eeprom_data;
5595	int	i, j, row = 0;
5596
5597	/* Its a bit crude, but it gets the job done */
5598	printf("\nInterface EEPROM Dump:\n");
5599	printf("Offset\n0x0000  ");
5600	for (i = 0, j = 0; i < 32; i++, j++) {
5601		if (j == 8) { /* Make the offset block */
5602			j = 0; ++row;
5603			printf("\n0x00%x0  ",row);
5604		}
5605		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5606		printf("%04x ", eeprom_data);
5607	}
5608	printf("\n");
5609}
5610
5611static int
5612em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5613{
5614	struct em_int_delay_info *info;
5615	struct adapter *adapter;
5616	u32 regval;
5617	int error, usecs, ticks;
5618
5619	info = (struct em_int_delay_info *)arg1;
5620	usecs = info->value;
5621	error = sysctl_handle_int(oidp, &usecs, 0, req);
5622	if (error != 0 || req->newptr == NULL)
5623		return (error);
5624	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5625		return (EINVAL);
5626	info->value = usecs;
5627	ticks = EM_USECS_TO_TICKS(usecs);
5628	if (info->offset == E1000_ITR)	/* units are 256ns here */
5629		ticks *= 4;
5630
5631	adapter = info->adapter;
5632
5633	EM_CORE_LOCK(adapter);
5634	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5635	regval = (regval & ~0xffff) | (ticks & 0xffff);
5636	/* Handle a few special cases. */
5637	switch (info->offset) {
5638	case E1000_RDTR:
5639		break;
5640	case E1000_TIDV:
5641		if (ticks == 0) {
5642			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5643			/* Don't write 0 into the TIDV register. */
5644			regval++;
5645		} else
5646			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5647		break;
5648	}
5649	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5650	EM_CORE_UNLOCK(adapter);
5651	return (0);
5652}
5653
5654static void
5655em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5656	const char *description, struct em_int_delay_info *info,
5657	int offset, int value)
5658{
5659	info->adapter = adapter;
5660	info->offset = offset;
5661	info->value = value;
5662	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5663	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5664	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5665	    info, 0, em_sysctl_int_delay, "I", description);
5666}
5667
5668static void
5669em_set_sysctl_value(struct adapter *adapter, const char *name,
5670	const char *description, int *limit, int value)
5671{
5672	*limit = value;
5673	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5674	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5675	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5676}
5677
5678
5679/*
5680** Set flow control using sysctl:
5681** Flow control values:
5682**      0 - off
5683**      1 - rx pause
5684**      2 - tx pause
5685**      3 - full
5686*/
5687static int
5688em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5689{
5690        int		error;
5691	static int	input = 3; /* default is full */
5692        struct adapter	*adapter = (struct adapter *) arg1;
5693
5694        error = sysctl_handle_int(oidp, &input, 0, req);
5695
5696        if ((error) || (req->newptr == NULL))
5697                return (error);
5698
5699	if (input == adapter->fc) /* no change? */
5700		return (error);
5701
5702        switch (input) {
5703                case e1000_fc_rx_pause:
5704                case e1000_fc_tx_pause:
5705                case e1000_fc_full:
5706                case e1000_fc_none:
5707                        adapter->hw.fc.requested_mode = input;
5708			adapter->fc = input;
5709                        break;
5710                default:
5711			/* Do nothing */
5712			return (error);
5713        }
5714
5715        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5716        e1000_force_mac_fc(&adapter->hw);
5717        return (error);
5718}
5719
5720/*
5721** Manage Energy Efficient Ethernet:
5722** Control values:
5723**     0/1 - enabled/disabled
5724*/
5725static int
5726em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5727{
5728       struct adapter *adapter = (struct adapter *) arg1;
5729       int             error, value;
5730
5731       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5732       error = sysctl_handle_int(oidp, &value, 0, req);
5733       if (error || req->newptr == NULL)
5734               return (error);
5735       EM_CORE_LOCK(adapter);
5736       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5737       em_init_locked(adapter);
5738       EM_CORE_UNLOCK(adapter);
5739       return (0);
5740}
5741
5742static int
5743em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5744{
5745	struct adapter *adapter;
5746	int error;
5747	int result;
5748
5749	result = -1;
5750	error = sysctl_handle_int(oidp, &result, 0, req);
5751
5752	if (error || !req->newptr)
5753		return (error);
5754
5755	if (result == 1) {
5756		adapter = (struct adapter *)arg1;
5757		em_print_debug_info(adapter);
5758        }
5759
5760	return (error);
5761}
5762
5763/*
5764** This routine is meant to be fluid, add whatever is
5765** needed for debugging a problem.  -jfv
5766*/
5767static void
5768em_print_debug_info(struct adapter *adapter)
5769{
5770	device_t dev = adapter->dev;
5771	struct tx_ring *txr = adapter->tx_rings;
5772	struct rx_ring *rxr = adapter->rx_rings;
5773
5774	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5775		printf("Interface is RUNNING ");
5776	else
5777		printf("Interface is NOT RUNNING\n");
5778
5779	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5780		printf("and INACTIVE\n");
5781	else
5782		printf("and ACTIVE\n");
5783
5784	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5785	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5786	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5787	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5788	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5789	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5790	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5791	device_printf(dev, "TX descriptors avail = %d\n",
5792	    txr->tx_avail);
5793	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5794	    txr->no_desc_avail);
5795	device_printf(dev, "RX discarded packets = %ld\n",
5796	    rxr->rx_discarded);
5797	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5798	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5799}
5800