if_em.c revision 252899
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/9/sys/dev/e1000/if_em.c 252899 2013-07-06 22:34:42Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	em_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.8";
98
99/*********************************************************************
100 *  PCI Device ID Table
101 *
102 *  Used by probe to select devices to load on
103 *  Last field stores an index into e1000_strings
104 *  Last entry must be all 0s
105 *
106 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111	/* Intel(R) PRO/1000 Network Connection */
112	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118						PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
131
132	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
178						PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
180						PCI_ANY_ID, PCI_ANY_ID, 0},
181	/* required last entry */
182	{ 0, 0, 0, 0, 0}
183};
184
185/*********************************************************************
186 *  Table of branding strings for all supported NICs.
187 *********************************************************************/
188
189static char *em_strings[] = {
190	"Intel(R) PRO/1000 Network Connection"
191};
192
193/*********************************************************************
194 *  Function prototypes
195 *********************************************************************/
196static int	em_probe(device_t);
197static int	em_attach(device_t);
198static int	em_detach(device_t);
199static int	em_shutdown(device_t);
200static int	em_suspend(device_t);
201static int	em_resume(device_t);
202#ifdef EM_MULTIQUEUE
203static int	em_mq_start(struct ifnet *, struct mbuf *);
204static int	em_mq_start_locked(struct ifnet *,
205		    struct tx_ring *, struct mbuf *);
206static void	em_qflush(struct ifnet *);
207#else
208static void	em_start(struct ifnet *);
209static void	em_start_locked(struct ifnet *, struct tx_ring *);
210#endif
211static int	em_ioctl(struct ifnet *, u_long, caddr_t);
212static void	em_init(void *);
213static void	em_init_locked(struct adapter *);
214static void	em_stop(void *);
215static void	em_media_status(struct ifnet *, struct ifmediareq *);
216static int	em_media_change(struct ifnet *);
217static void	em_identify_hardware(struct adapter *);
218static int	em_allocate_pci_resources(struct adapter *);
219static int	em_allocate_legacy(struct adapter *);
220static int	em_allocate_msix(struct adapter *);
221static int	em_allocate_queues(struct adapter *);
222static int	em_setup_msix(struct adapter *);
223static void	em_free_pci_resources(struct adapter *);
224static void	em_local_timer(void *);
225static void	em_reset(struct adapter *);
226static int	em_setup_interface(device_t, struct adapter *);
227
228static void	em_setup_transmit_structures(struct adapter *);
229static void	em_initialize_transmit_unit(struct adapter *);
230static int	em_allocate_transmit_buffers(struct tx_ring *);
231static void	em_free_transmit_structures(struct adapter *);
232static void	em_free_transmit_buffers(struct tx_ring *);
233
234static int	em_setup_receive_structures(struct adapter *);
235static int	em_allocate_receive_buffers(struct rx_ring *);
236static void	em_initialize_receive_unit(struct adapter *);
237static void	em_free_receive_structures(struct adapter *);
238static void	em_free_receive_buffers(struct rx_ring *);
239
240static void	em_enable_intr(struct adapter *);
241static void	em_disable_intr(struct adapter *);
242static void	em_update_stats_counters(struct adapter *);
243static void	em_add_hw_stats(struct adapter *adapter);
244static void	em_txeof(struct tx_ring *);
245static bool	em_rxeof(struct rx_ring *, int, int *);
246#ifndef __NO_STRICT_ALIGNMENT
247static int	em_fixup_rx(struct rx_ring *);
248#endif
249static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
250static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
251		    struct ip *, u32 *, u32 *);
252static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
253		    struct tcphdr *, u32 *, u32 *);
254static void	em_set_promisc(struct adapter *);
255static void	em_disable_promisc(struct adapter *);
256static void	em_set_multi(struct adapter *);
257static void	em_update_link_status(struct adapter *);
258static void	em_refresh_mbufs(struct rx_ring *, int);
259static void	em_register_vlan(void *, struct ifnet *, u16);
260static void	em_unregister_vlan(void *, struct ifnet *, u16);
261static void	em_setup_vlan_hw_support(struct adapter *);
262static int	em_xmit(struct tx_ring *, struct mbuf **);
263static int	em_dma_malloc(struct adapter *, bus_size_t,
264		    struct em_dma_alloc *, int);
265static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
266static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
267static void	em_print_nvm_info(struct adapter *);
268static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
269static void	em_print_debug_info(struct adapter *);
270static int 	em_is_valid_ether_addr(u8 *);
271static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
272static void	em_add_int_delay_sysctl(struct adapter *, const char *,
273		    const char *, struct em_int_delay_info *, int, int);
274/* Management and WOL Support */
275static void	em_init_manageability(struct adapter *);
276static void	em_release_manageability(struct adapter *);
277static void     em_get_hw_control(struct adapter *);
278static void     em_release_hw_control(struct adapter *);
279static void	em_get_wakeup(device_t);
280static void     em_enable_wakeup(device_t);
281static int	em_enable_phy_wakeup(struct adapter *);
282static void	em_led_func(void *, int);
283static void	em_disable_aspm(struct adapter *);
284
285static int	em_irq_fast(void *);
286
287/* MSIX handlers */
288static void	em_msix_tx(void *);
289static void	em_msix_rx(void *);
290static void	em_msix_link(void *);
291static void	em_handle_tx(void *context, int pending);
292static void	em_handle_rx(void *context, int pending);
293static void	em_handle_link(void *context, int pending);
294
295static void	em_set_sysctl_value(struct adapter *, const char *,
296		    const char *, int *, int);
297static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
298static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
299
300static __inline void em_rx_discard(struct rx_ring *, int);
301
302#ifdef DEVICE_POLLING
303static poll_handler_t em_poll;
304#endif /* POLLING */
305
306/*********************************************************************
307 *  FreeBSD Device Interface Entry Points
308 *********************************************************************/
309
310static device_method_t em_methods[] = {
311	/* Device interface */
312	DEVMETHOD(device_probe, em_probe),
313	DEVMETHOD(device_attach, em_attach),
314	DEVMETHOD(device_detach, em_detach),
315	DEVMETHOD(device_shutdown, em_shutdown),
316	DEVMETHOD(device_suspend, em_suspend),
317	DEVMETHOD(device_resume, em_resume),
318	DEVMETHOD_END
319};
320
321static driver_t em_driver = {
322	"em", em_methods, sizeof(struct adapter),
323};
324
325devclass_t em_devclass;
326DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
327MODULE_DEPEND(em, pci, 1, 1, 1);
328MODULE_DEPEND(em, ether, 1, 1, 1);
329
330/*********************************************************************
331 *  Tunable default values.
332 *********************************************************************/
333
334#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
335#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
336#define M_TSO_LEN			66
337
338#define MAX_INTS_PER_SEC	8000
339#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
340
341/* Allow common code without TSO */
342#ifndef CSUM_TSO
343#define CSUM_TSO	0
344#endif
345
346static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
347
348static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
349static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
350TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
351TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
352SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
353    0, "Default transmit interrupt delay in usecs");
354SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
355    0, "Default receive interrupt delay in usecs");
356
357static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
358static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
359TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
360TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
361SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
362    &em_tx_abs_int_delay_dflt, 0,
363    "Default transmit interrupt delay limit in usecs");
364SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
365    &em_rx_abs_int_delay_dflt, 0,
366    "Default receive interrupt delay limit in usecs");
367
368static int em_rxd = EM_DEFAULT_RXD;
369static int em_txd = EM_DEFAULT_TXD;
370TUNABLE_INT("hw.em.rxd", &em_rxd);
371TUNABLE_INT("hw.em.txd", &em_txd);
372SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
373    "Number of receive descriptors per queue");
374SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
375    "Number of transmit descriptors per queue");
376
377static int em_smart_pwr_down = FALSE;
378TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
379SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
380    0, "Set to true to leave smart power down enabled on newer adapters");
381
382/* Controls whether promiscuous also shows bad packets */
383static int em_debug_sbp = FALSE;
384TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
385SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
386    "Show bad packets in promiscuous mode");
387
388static int em_enable_msix = TRUE;
389TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
390SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
391    "Enable MSI-X interrupts");
392
393/* How many packets rxeof tries to clean at a time */
394static int em_rx_process_limit = 100;
395TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
396SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397    &em_rx_process_limit, 0,
398    "Maximum number of received packets to process "
399    "at a time, -1 means unlimited");
400
401/* Energy efficient ethernet - default to OFF */
402static int eee_setting = 1;
403TUNABLE_INT("hw.em.eee_setting", &eee_setting);
404SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
405    "Enable Energy Efficient Ethernet");
406
407/* Global used in WOL setup with multiport cards */
408static int global_quad_port_a = 0;
409
410#ifdef DEV_NETMAP	/* see ixgbe.c for details */
411#include <dev/netmap/if_em_netmap.h>
412#endif /* DEV_NETMAP */
413
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  em_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424em_probe(device_t dev)
425{
426	char		adapter_name[60];
427	u16		pci_vendor_id = 0;
428	u16		pci_device_id = 0;
429	u16		pci_subvendor_id = 0;
430	u16		pci_subdevice_id = 0;
431	em_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("em_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != EM_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = em_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				em_strings[ent->index],
455				em_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476em_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	struct e1000_hw	*hw;
480	int		error = 0;
481
482	INIT_DEBUGOUT("em_attach: begin");
483
484	if (resource_disabled("em", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	hw = &adapter->hw;
492	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
493
494	/* SYSCTL stuff */
495	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498	    em_sysctl_nvm_info, "I", "NVM Information");
499
500	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503	    em_sysctl_debug_info, "I", "Debug Information");
504
505	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508	    em_set_flowcntl, "I", "Flow Control");
509
510	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
511
512	/* Determine hardware and mac info */
513	em_identify_hardware(adapter);
514
515	/* Setup PCI resources */
516	if (em_allocate_pci_resources(adapter)) {
517		device_printf(dev, "Allocation of PCI resources failed\n");
518		error = ENXIO;
519		goto err_pci;
520	}
521
522	/*
523	** For ICH8 and family we need to
524	** map the flash memory, and this
525	** must happen after the MAC is
526	** identified
527	*/
528	if ((hw->mac.type == e1000_ich8lan) ||
529	    (hw->mac.type == e1000_ich9lan) ||
530	    (hw->mac.type == e1000_ich10lan) ||
531	    (hw->mac.type == e1000_pchlan) ||
532	    (hw->mac.type == e1000_pch2lan) ||
533	    (hw->mac.type == e1000_pch_lpt)) {
534		int rid = EM_BAR_TYPE_FLASH;
535		adapter->flash = bus_alloc_resource_any(dev,
536		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
537		if (adapter->flash == NULL) {
538			device_printf(dev, "Mapping of Flash failed\n");
539			error = ENXIO;
540			goto err_pci;
541		}
542		/* This is used in the shared code */
543		hw->flash_address = (u8 *)adapter->flash;
544		adapter->osdep.flash_bus_space_tag =
545		    rman_get_bustag(adapter->flash);
546		adapter->osdep.flash_bus_space_handle =
547		    rman_get_bushandle(adapter->flash);
548	}
549
550	/* Do Shared Code initialization */
551	if (e1000_setup_init_funcs(hw, TRUE)) {
552		device_printf(dev, "Setup of Shared code failed\n");
553		error = ENXIO;
554		goto err_pci;
555	}
556
557	e1000_get_bus_info(hw);
558
559	/* Set up some sysctls for the tunable interrupt delays */
560	em_add_int_delay_sysctl(adapter, "rx_int_delay",
561	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
562	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
563	em_add_int_delay_sysctl(adapter, "tx_int_delay",
564	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
565	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
566	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
567	    "receive interrupt delay limit in usecs",
568	    &adapter->rx_abs_int_delay,
569	    E1000_REGISTER(hw, E1000_RADV),
570	    em_rx_abs_int_delay_dflt);
571	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
572	    "transmit interrupt delay limit in usecs",
573	    &adapter->tx_abs_int_delay,
574	    E1000_REGISTER(hw, E1000_TADV),
575	    em_tx_abs_int_delay_dflt);
576	em_add_int_delay_sysctl(adapter, "itr",
577	    "interrupt delay limit in usecs/4",
578	    &adapter->tx_itr,
579	    E1000_REGISTER(hw, E1000_ITR),
580	    DEFAULT_ITR);
581
582	/* Sysctl for limiting the amount of work done in the taskqueue */
583	em_set_sysctl_value(adapter, "rx_processing_limit",
584	    "max number of rx packets to process", &adapter->rx_process_limit,
585	    em_rx_process_limit);
586
587	/*
588	 * Validate number of transmit and receive descriptors. It
589	 * must not exceed hardware maximum, and must be multiple
590	 * of E1000_DBA_ALIGN.
591	 */
592	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
593	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
594		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
595		    EM_DEFAULT_TXD, em_txd);
596		adapter->num_tx_desc = EM_DEFAULT_TXD;
597	} else
598		adapter->num_tx_desc = em_txd;
599
600	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
601	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
602		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
603		    EM_DEFAULT_RXD, em_rxd);
604		adapter->num_rx_desc = EM_DEFAULT_RXD;
605	} else
606		adapter->num_rx_desc = em_rxd;
607
608	hw->mac.autoneg = DO_AUTO_NEG;
609	hw->phy.autoneg_wait_to_complete = FALSE;
610	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
611
612	/* Copper options */
613	if (hw->phy.media_type == e1000_media_type_copper) {
614		hw->phy.mdix = AUTO_ALL_MODES;
615		hw->phy.disable_polarity_correction = FALSE;
616		hw->phy.ms_type = EM_MASTER_SLAVE;
617	}
618
619	/*
620	 * Set the frame limits assuming
621	 * standard ethernet sized frames.
622	 */
623	adapter->hw.mac.max_frame_size =
624	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
625
626	/*
627	 * This controls when hardware reports transmit completion
628	 * status.
629	 */
630	hw->mac.report_tx_early = 1;
631
632	/*
633	** Get queue/ring memory
634	*/
635	if (em_allocate_queues(adapter)) {
636		error = ENOMEM;
637		goto err_pci;
638	}
639
640	/* Allocate multicast array memory. */
641	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
642	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
643	if (adapter->mta == NULL) {
644		device_printf(dev, "Can not allocate multicast setup array\n");
645		error = ENOMEM;
646		goto err_late;
647	}
648
649	/* Check SOL/IDER usage */
650	if (e1000_check_reset_block(hw))
651		device_printf(dev, "PHY reset is blocked"
652		    " due to SOL/IDER session.\n");
653
654	/* Sysctl for setting Energy Efficient Ethernet */
655	hw->dev_spec.ich8lan.eee_disable = eee_setting;
656	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
657	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
658	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
659	    adapter, 0, em_sysctl_eee, "I",
660	    "Disable Energy Efficient Ethernet");
661
662	/*
663	** Start from a known state, this is
664	** important in reading the nvm and
665	** mac from that.
666	*/
667	e1000_reset_hw(hw);
668
669
670	/* Make sure we have a good EEPROM before we read from it */
671	if (e1000_validate_nvm_checksum(hw) < 0) {
672		/*
673		** Some PCI-E parts fail the first check due to
674		** the link being in sleep state, call it again,
675		** if it fails a second time its a real issue.
676		*/
677		if (e1000_validate_nvm_checksum(hw) < 0) {
678			device_printf(dev,
679			    "The EEPROM Checksum Is Not Valid\n");
680			error = EIO;
681			goto err_late;
682		}
683	}
684
685	/* Copy the permanent MAC address out of the EEPROM */
686	if (e1000_read_mac_addr(hw) < 0) {
687		device_printf(dev, "EEPROM read error while reading MAC"
688		    " address\n");
689		error = EIO;
690		goto err_late;
691	}
692
693	if (!em_is_valid_ether_addr(hw->mac.addr)) {
694		device_printf(dev, "Invalid MAC address\n");
695		error = EIO;
696		goto err_late;
697	}
698
699	/*
700	**  Do interrupt configuration
701	*/
702	if (adapter->msix > 1) /* Do MSIX */
703		error = em_allocate_msix(adapter);
704	else  /* MSI or Legacy */
705		error = em_allocate_legacy(adapter);
706	if (error)
707		goto err_late;
708
709	/*
710	 * Get Wake-on-Lan and Management info for later use
711	 */
712	em_get_wakeup(dev);
713
714	/* Setup OS specific network interface */
715	if (em_setup_interface(dev, adapter) != 0)
716		goto err_late;
717
718	em_reset(adapter);
719
720	/* Initialize statistics */
721	em_update_stats_counters(adapter);
722
723	hw->mac.get_link_status = 1;
724	em_update_link_status(adapter);
725
726	/* Register for VLAN events */
727	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
728	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
729	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
730	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
731
732	em_add_hw_stats(adapter);
733
734	/* Non-AMT based hardware can now take control from firmware */
735	if (adapter->has_manage && !adapter->has_amt)
736		em_get_hw_control(adapter);
737
738	/* Tell the stack that the interface is not active */
739	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
740	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
741
742	adapter->led_dev = led_create(em_led_func, adapter,
743	    device_get_nameunit(dev));
744#ifdef DEV_NETMAP
745	em_netmap_attach(adapter);
746#endif /* DEV_NETMAP */
747
748	INIT_DEBUGOUT("em_attach: end");
749
750	return (0);
751
752err_late:
753	em_free_transmit_structures(adapter);
754	em_free_receive_structures(adapter);
755	em_release_hw_control(adapter);
756	if (adapter->ifp != NULL)
757		if_free(adapter->ifp);
758err_pci:
759	em_free_pci_resources(adapter);
760	free(adapter->mta, M_DEVBUF);
761	EM_CORE_LOCK_DESTROY(adapter);
762
763	return (error);
764}
765
766/*********************************************************************
767 *  Device removal routine
768 *
769 *  The detach entry point is called when the driver is being removed.
770 *  This routine stops the adapter and deallocates all the resources
771 *  that were allocated for driver operation.
772 *
773 *  return 0 on success, positive on failure
774 *********************************************************************/
775
776static int
777em_detach(device_t dev)
778{
779	struct adapter	*adapter = device_get_softc(dev);
780	struct ifnet	*ifp = adapter->ifp;
781
782	INIT_DEBUGOUT("em_detach: begin");
783
784	/* Make sure VLANS are not using driver */
785	if (adapter->ifp->if_vlantrunk != NULL) {
786		device_printf(dev,"Vlan in use, detach first\n");
787		return (EBUSY);
788	}
789
790#ifdef DEVICE_POLLING
791	if (ifp->if_capenable & IFCAP_POLLING)
792		ether_poll_deregister(ifp);
793#endif
794
795	if (adapter->led_dev != NULL)
796		led_destroy(adapter->led_dev);
797
798	EM_CORE_LOCK(adapter);
799	adapter->in_detach = 1;
800	em_stop(adapter);
801	EM_CORE_UNLOCK(adapter);
802	EM_CORE_LOCK_DESTROY(adapter);
803
804	e1000_phy_hw_reset(&adapter->hw);
805
806	em_release_manageability(adapter);
807	em_release_hw_control(adapter);
808
809	/* Unregister VLAN events */
810	if (adapter->vlan_attach != NULL)
811		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
812	if (adapter->vlan_detach != NULL)
813		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
814
815	ether_ifdetach(adapter->ifp);
816	callout_drain(&adapter->timer);
817
818#ifdef DEV_NETMAP
819	netmap_detach(ifp);
820#endif /* DEV_NETMAP */
821
822	em_free_pci_resources(adapter);
823	bus_generic_detach(dev);
824	if_free(ifp);
825
826	em_free_transmit_structures(adapter);
827	em_free_receive_structures(adapter);
828
829	em_release_hw_control(adapter);
830	free(adapter->mta, M_DEVBUF);
831
832	return (0);
833}
834
835/*********************************************************************
836 *
837 *  Shutdown entry point
838 *
839 **********************************************************************/
840
841static int
842em_shutdown(device_t dev)
843{
844	return em_suspend(dev);
845}
846
847/*
848 * Suspend/resume device methods.
849 */
850static int
851em_suspend(device_t dev)
852{
853	struct adapter *adapter = device_get_softc(dev);
854
855	EM_CORE_LOCK(adapter);
856
857        em_release_manageability(adapter);
858	em_release_hw_control(adapter);
859	em_enable_wakeup(dev);
860
861	EM_CORE_UNLOCK(adapter);
862
863	return bus_generic_suspend(dev);
864}
865
866static int
867em_resume(device_t dev)
868{
869	struct adapter *adapter = device_get_softc(dev);
870	struct tx_ring	*txr = adapter->tx_rings;
871	struct ifnet *ifp = adapter->ifp;
872
873	EM_CORE_LOCK(adapter);
874	if (adapter->hw.mac.type == e1000_pch2lan)
875		e1000_resume_workarounds_pchlan(&adapter->hw);
876	em_init_locked(adapter);
877	em_init_manageability(adapter);
878
879	if ((ifp->if_flags & IFF_UP) &&
880	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
881		for (int i = 0; i < adapter->num_queues; i++, txr++) {
882			EM_TX_LOCK(txr);
883#ifdef EM_MULTIQUEUE
884			if (!drbr_empty(ifp, txr->br))
885				em_mq_start_locked(ifp, txr, NULL);
886#else
887			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
888				em_start_locked(ifp, txr);
889#endif
890			EM_TX_UNLOCK(txr);
891		}
892	}
893	EM_CORE_UNLOCK(adapter);
894
895	return bus_generic_resume(dev);
896}
897
898
899#ifdef EM_MULTIQUEUE
900/*********************************************************************
901 *  Multiqueue Transmit routines
902 *
903 *  em_mq_start is called by the stack to initiate a transmit.
904 *  however, if busy the driver can queue the request rather
905 *  than do an immediate send. It is this that is an advantage
906 *  in this driver, rather than also having multiple tx queues.
907 **********************************************************************/
908static int
909em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
910{
911	struct adapter  *adapter = txr->adapter;
912        struct mbuf     *next;
913        int             err = 0, enq = 0;
914
915	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
916	    IFF_DRV_RUNNING || adapter->link_active == 0) {
917		if (m != NULL)
918			err = drbr_enqueue(ifp, txr->br, m);
919		return (err);
920	}
921
922	enq = 0;
923	if (m != NULL) {
924		err = drbr_enqueue(ifp, txr->br, m);
925		if (err)
926			return (err);
927	}
928
929	/* Process the queue */
930	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
931		if ((err = em_xmit(txr, &next)) != 0) {
932			if (next == NULL)
933				drbr_advance(ifp, txr->br);
934			else
935				drbr_putback(ifp, txr->br, next);
936			break;
937		}
938		drbr_advance(ifp, txr->br);
939		enq++;
940		ifp->if_obytes += next->m_pkthdr.len;
941		if (next->m_flags & M_MCAST)
942			ifp->if_omcasts++;
943		ETHER_BPF_MTAP(ifp, next);
944		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
945                        break;
946	}
947
948	if (enq > 0) {
949                /* Set the watchdog */
950                txr->queue_status = EM_QUEUE_WORKING;
951		txr->watchdog_time = ticks;
952	}
953
954	if (txr->tx_avail < EM_MAX_SCATTER)
955		em_txeof(txr);
956	if (txr->tx_avail < EM_MAX_SCATTER)
957		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
958	return (err);
959}
960
961/*
962** Multiqueue capable stack interface
963*/
964static int
965em_mq_start(struct ifnet *ifp, struct mbuf *m)
966{
967	struct adapter	*adapter = ifp->if_softc;
968	struct tx_ring	*txr = adapter->tx_rings;
969	int 		error;
970
971	if (EM_TX_TRYLOCK(txr)) {
972		error = em_mq_start_locked(ifp, txr, m);
973		EM_TX_UNLOCK(txr);
974	} else
975		error = drbr_enqueue(ifp, txr->br, m);
976
977	return (error);
978}
979
980/*
981** Flush all ring buffers
982*/
983static void
984em_qflush(struct ifnet *ifp)
985{
986	struct adapter  *adapter = ifp->if_softc;
987	struct tx_ring  *txr = adapter->tx_rings;
988	struct mbuf     *m;
989
990	for (int i = 0; i < adapter->num_queues; i++, txr++) {
991		EM_TX_LOCK(txr);
992		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
993			m_freem(m);
994		EM_TX_UNLOCK(txr);
995	}
996	if_qflush(ifp);
997}
998#else  /* !EM_MULTIQUEUE */
999
1000static void
1001em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1002{
1003	struct adapter	*adapter = ifp->if_softc;
1004	struct mbuf	*m_head;
1005
1006	EM_TX_LOCK_ASSERT(txr);
1007
1008	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1009	    IFF_DRV_RUNNING)
1010		return;
1011
1012	if (!adapter->link_active)
1013		return;
1014
1015	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1016        	/* Call cleanup if number of TX descriptors low */
1017		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1018			em_txeof(txr);
1019		if (txr->tx_avail < EM_MAX_SCATTER) {
1020			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1021			break;
1022		}
1023                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1024		if (m_head == NULL)
1025			break;
1026		/*
1027		 *  Encapsulation can modify our pointer, and or make it
1028		 *  NULL on failure.  In that event, we can't requeue.
1029		 */
1030		if (em_xmit(txr, &m_head)) {
1031			if (m_head == NULL)
1032				break;
1033			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1034			break;
1035		}
1036
1037		/* Send a copy of the frame to the BPF listener */
1038		ETHER_BPF_MTAP(ifp, m_head);
1039
1040		/* Set timeout in case hardware has problems transmitting. */
1041		txr->watchdog_time = ticks;
1042                txr->queue_status = EM_QUEUE_WORKING;
1043	}
1044
1045	return;
1046}
1047
1048static void
1049em_start(struct ifnet *ifp)
1050{
1051	struct adapter	*adapter = ifp->if_softc;
1052	struct tx_ring	*txr = adapter->tx_rings;
1053
1054	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1055		EM_TX_LOCK(txr);
1056		em_start_locked(ifp, txr);
1057		EM_TX_UNLOCK(txr);
1058	}
1059	return;
1060}
1061#endif /* EM_MULTIQUEUE */
1062
1063/*********************************************************************
1064 *  Ioctl entry point
1065 *
1066 *  em_ioctl is called when the user wants to configure the
1067 *  interface.
1068 *
1069 *  return 0 on success, positive on failure
1070 **********************************************************************/
1071
1072static int
1073em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1074{
1075	struct adapter	*adapter = ifp->if_softc;
1076	struct ifreq	*ifr = (struct ifreq *)data;
1077#if defined(INET) || defined(INET6)
1078	struct ifaddr	*ifa = (struct ifaddr *)data;
1079#endif
1080	bool		avoid_reset = FALSE;
1081	int		error = 0;
1082
1083	if (adapter->in_detach)
1084		return (error);
1085
1086	switch (command) {
1087	case SIOCSIFADDR:
1088#ifdef INET
1089		if (ifa->ifa_addr->sa_family == AF_INET)
1090			avoid_reset = TRUE;
1091#endif
1092#ifdef INET6
1093		if (ifa->ifa_addr->sa_family == AF_INET6)
1094			avoid_reset = TRUE;
1095#endif
1096		/*
1097		** Calling init results in link renegotiation,
1098		** so we avoid doing it when possible.
1099		*/
1100		if (avoid_reset) {
1101			ifp->if_flags |= IFF_UP;
1102			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1103				em_init(adapter);
1104#ifdef INET
1105			if (!(ifp->if_flags & IFF_NOARP))
1106				arp_ifinit(ifp, ifa);
1107#endif
1108		} else
1109			error = ether_ioctl(ifp, command, data);
1110		break;
1111	case SIOCSIFMTU:
1112	    {
1113		int max_frame_size;
1114
1115		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1116
1117		EM_CORE_LOCK(adapter);
1118		switch (adapter->hw.mac.type) {
1119		case e1000_82571:
1120		case e1000_82572:
1121		case e1000_ich9lan:
1122		case e1000_ich10lan:
1123		case e1000_pch2lan:
1124		case e1000_pch_lpt:
1125		case e1000_82574:
1126		case e1000_82583:
1127		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1128			max_frame_size = 9234;
1129			break;
1130		case e1000_pchlan:
1131			max_frame_size = 4096;
1132			break;
1133			/* Adapters that do not support jumbo frames */
1134		case e1000_ich8lan:
1135			max_frame_size = ETHER_MAX_LEN;
1136			break;
1137		default:
1138			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1139		}
1140		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1141		    ETHER_CRC_LEN) {
1142			EM_CORE_UNLOCK(adapter);
1143			error = EINVAL;
1144			break;
1145		}
1146
1147		ifp->if_mtu = ifr->ifr_mtu;
1148		adapter->hw.mac.max_frame_size =
1149		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1150		em_init_locked(adapter);
1151		EM_CORE_UNLOCK(adapter);
1152		break;
1153	    }
1154	case SIOCSIFFLAGS:
1155		IOCTL_DEBUGOUT("ioctl rcv'd:\
1156		    SIOCSIFFLAGS (Set Interface Flags)");
1157		EM_CORE_LOCK(adapter);
1158		if (ifp->if_flags & IFF_UP) {
1159			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1160				if ((ifp->if_flags ^ adapter->if_flags) &
1161				    (IFF_PROMISC | IFF_ALLMULTI)) {
1162					em_disable_promisc(adapter);
1163					em_set_promisc(adapter);
1164				}
1165			} else
1166				em_init_locked(adapter);
1167		} else
1168			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1169				em_stop(adapter);
1170		adapter->if_flags = ifp->if_flags;
1171		EM_CORE_UNLOCK(adapter);
1172		break;
1173	case SIOCADDMULTI:
1174	case SIOCDELMULTI:
1175		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1176		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1177			EM_CORE_LOCK(adapter);
1178			em_disable_intr(adapter);
1179			em_set_multi(adapter);
1180#ifdef DEVICE_POLLING
1181			if (!(ifp->if_capenable & IFCAP_POLLING))
1182#endif
1183				em_enable_intr(adapter);
1184			EM_CORE_UNLOCK(adapter);
1185		}
1186		break;
1187	case SIOCSIFMEDIA:
1188		/* Check SOL/IDER usage */
1189		EM_CORE_LOCK(adapter);
1190		if (e1000_check_reset_block(&adapter->hw)) {
1191			EM_CORE_UNLOCK(adapter);
1192			device_printf(adapter->dev, "Media change is"
1193			    " blocked due to SOL/IDER session.\n");
1194			break;
1195		}
1196		EM_CORE_UNLOCK(adapter);
1197		/* falls thru */
1198	case SIOCGIFMEDIA:
1199		IOCTL_DEBUGOUT("ioctl rcv'd: \
1200		    SIOCxIFMEDIA (Get/Set Interface Media)");
1201		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1202		break;
1203	case SIOCSIFCAP:
1204	    {
1205		int mask, reinit;
1206
1207		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1208		reinit = 0;
1209		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1210#ifdef DEVICE_POLLING
1211		if (mask & IFCAP_POLLING) {
1212			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1213				error = ether_poll_register(em_poll, ifp);
1214				if (error)
1215					return (error);
1216				EM_CORE_LOCK(adapter);
1217				em_disable_intr(adapter);
1218				ifp->if_capenable |= IFCAP_POLLING;
1219				EM_CORE_UNLOCK(adapter);
1220			} else {
1221				error = ether_poll_deregister(ifp);
1222				/* Enable interrupt even in error case */
1223				EM_CORE_LOCK(adapter);
1224				em_enable_intr(adapter);
1225				ifp->if_capenable &= ~IFCAP_POLLING;
1226				EM_CORE_UNLOCK(adapter);
1227			}
1228		}
1229#endif
1230		if (mask & IFCAP_HWCSUM) {
1231			ifp->if_capenable ^= IFCAP_HWCSUM;
1232			reinit = 1;
1233		}
1234		if (mask & IFCAP_TSO4) {
1235			ifp->if_capenable ^= IFCAP_TSO4;
1236			reinit = 1;
1237		}
1238		if (mask & IFCAP_VLAN_HWTAGGING) {
1239			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1240			reinit = 1;
1241		}
1242		if (mask & IFCAP_VLAN_HWFILTER) {
1243			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1244			reinit = 1;
1245		}
1246		if (mask & IFCAP_VLAN_HWTSO) {
1247			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1248			reinit = 1;
1249		}
1250		if ((mask & IFCAP_WOL) &&
1251		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1252			if (mask & IFCAP_WOL_MCAST)
1253				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1254			if (mask & IFCAP_WOL_MAGIC)
1255				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1256		}
1257		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1258			em_init(adapter);
1259		VLAN_CAPABILITIES(ifp);
1260		break;
1261	    }
1262
1263	default:
1264		error = ether_ioctl(ifp, command, data);
1265		break;
1266	}
1267
1268	return (error);
1269}
1270
1271
1272/*********************************************************************
1273 *  Init entry point
1274 *
1275 *  This routine is used in two ways. It is used by the stack as
1276 *  init entry point in network interface structure. It is also used
1277 *  by the driver as a hw/sw initialization routine to get to a
1278 *  consistent state.
1279 *
1280 *  return 0 on success, positive on failure
1281 **********************************************************************/
1282
1283static void
1284em_init_locked(struct adapter *adapter)
1285{
1286	struct ifnet	*ifp = adapter->ifp;
1287	device_t	dev = adapter->dev;
1288
1289	INIT_DEBUGOUT("em_init: begin");
1290
1291	EM_CORE_LOCK_ASSERT(adapter);
1292
1293	em_disable_intr(adapter);
1294	callout_stop(&adapter->timer);
1295
1296	/* Get the latest mac address, User can use a LAA */
1297        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1298              ETHER_ADDR_LEN);
1299
1300	/* Put the address into the Receive Address Array */
1301	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1302
1303	/*
1304	 * With the 82571 adapter, RAR[0] may be overwritten
1305	 * when the other port is reset, we make a duplicate
1306	 * in RAR[14] for that eventuality, this assures
1307	 * the interface continues to function.
1308	 */
1309	if (adapter->hw.mac.type == e1000_82571) {
1310		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1311		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1312		    E1000_RAR_ENTRIES - 1);
1313	}
1314
1315	/* Initialize the hardware */
1316	em_reset(adapter);
1317	em_update_link_status(adapter);
1318
1319	/* Setup VLAN support, basic and offload if available */
1320	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1321
1322	/* Set hardware offload abilities */
1323	ifp->if_hwassist = 0;
1324	if (ifp->if_capenable & IFCAP_TXCSUM)
1325		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1326	if (ifp->if_capenable & IFCAP_TSO4)
1327		ifp->if_hwassist |= CSUM_TSO;
1328
1329	/* Configure for OS presence */
1330	em_init_manageability(adapter);
1331
1332	/* Prepare transmit descriptors and buffers */
1333	em_setup_transmit_structures(adapter);
1334	em_initialize_transmit_unit(adapter);
1335
1336	/* Setup Multicast table */
1337	em_set_multi(adapter);
1338
1339	/*
1340	** Figure out the desired mbuf
1341	** pool for doing jumbos
1342	*/
1343	if (adapter->hw.mac.max_frame_size <= 2048)
1344		adapter->rx_mbuf_sz = MCLBYTES;
1345	else if (adapter->hw.mac.max_frame_size <= 4096)
1346		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1347	else
1348		adapter->rx_mbuf_sz = MJUM9BYTES;
1349
1350	/* Prepare receive descriptors and buffers */
1351	if (em_setup_receive_structures(adapter)) {
1352		device_printf(dev, "Could not setup receive structures\n");
1353		em_stop(adapter);
1354		return;
1355	}
1356	em_initialize_receive_unit(adapter);
1357
1358	/* Use real VLAN Filter support? */
1359	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1360		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1361			/* Use real VLAN Filter support */
1362			em_setup_vlan_hw_support(adapter);
1363		else {
1364			u32 ctrl;
1365			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1366			ctrl |= E1000_CTRL_VME;
1367			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1368		}
1369	}
1370
1371	/* Don't lose promiscuous settings */
1372	em_set_promisc(adapter);
1373
1374	/* Set the interface as ACTIVE */
1375	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1376	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1377
1378	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1379	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1380
1381	/* MSI/X configuration for 82574 */
1382	if (adapter->hw.mac.type == e1000_82574) {
1383		int tmp;
1384		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1385		tmp |= E1000_CTRL_EXT_PBA_CLR;
1386		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1387		/* Set the IVAR - interrupt vector routing. */
1388		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1389	}
1390
1391#ifdef DEVICE_POLLING
1392	/*
1393	 * Only enable interrupts if we are not polling, make sure
1394	 * they are off otherwise.
1395	 */
1396	if (ifp->if_capenable & IFCAP_POLLING)
1397		em_disable_intr(adapter);
1398	else
1399#endif /* DEVICE_POLLING */
1400		em_enable_intr(adapter);
1401
1402	/* AMT based hardware can now take control from firmware */
1403	if (adapter->has_manage && adapter->has_amt)
1404		em_get_hw_control(adapter);
1405}
1406
1407static void
1408em_init(void *arg)
1409{
1410	struct adapter *adapter = arg;
1411
1412	EM_CORE_LOCK(adapter);
1413	em_init_locked(adapter);
1414	EM_CORE_UNLOCK(adapter);
1415}
1416
1417
1418#ifdef DEVICE_POLLING
1419/*********************************************************************
1420 *
1421 *  Legacy polling routine: note this only works with single queue
1422 *
1423 *********************************************************************/
1424static int
1425em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1426{
1427	struct adapter *adapter = ifp->if_softc;
1428	struct tx_ring	*txr = adapter->tx_rings;
1429	struct rx_ring	*rxr = adapter->rx_rings;
1430	u32		reg_icr;
1431	int		rx_done;
1432
1433	EM_CORE_LOCK(adapter);
1434	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1435		EM_CORE_UNLOCK(adapter);
1436		return (0);
1437	}
1438
1439	if (cmd == POLL_AND_CHECK_STATUS) {
1440		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1441		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1442			callout_stop(&adapter->timer);
1443			adapter->hw.mac.get_link_status = 1;
1444			em_update_link_status(adapter);
1445			callout_reset(&adapter->timer, hz,
1446			    em_local_timer, adapter);
1447		}
1448	}
1449	EM_CORE_UNLOCK(adapter);
1450
1451	em_rxeof(rxr, count, &rx_done);
1452
1453	EM_TX_LOCK(txr);
1454	em_txeof(txr);
1455#ifdef EM_MULTIQUEUE
1456	if (!drbr_empty(ifp, txr->br))
1457		em_mq_start_locked(ifp, txr, NULL);
1458#else
1459	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1460		em_start_locked(ifp, txr);
1461#endif
1462	EM_TX_UNLOCK(txr);
1463
1464	return (rx_done);
1465}
1466#endif /* DEVICE_POLLING */
1467
1468
1469/*********************************************************************
1470 *
1471 *  Fast Legacy/MSI Combined Interrupt Service routine
1472 *
1473 *********************************************************************/
1474static int
1475em_irq_fast(void *arg)
1476{
1477	struct adapter	*adapter = arg;
1478	struct ifnet	*ifp;
1479	u32		reg_icr;
1480
1481	ifp = adapter->ifp;
1482
1483	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1484
1485	/* Hot eject?  */
1486	if (reg_icr == 0xffffffff)
1487		return FILTER_STRAY;
1488
1489	/* Definitely not our interrupt.  */
1490	if (reg_icr == 0x0)
1491		return FILTER_STRAY;
1492
1493	/*
1494	 * Starting with the 82571 chip, bit 31 should be used to
1495	 * determine whether the interrupt belongs to us.
1496	 */
1497	if (adapter->hw.mac.type >= e1000_82571 &&
1498	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499		return FILTER_STRAY;
1500
1501	em_disable_intr(adapter);
1502	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1503
1504	/* Link status change */
1505	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1506		adapter->hw.mac.get_link_status = 1;
1507		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1508	}
1509
1510	if (reg_icr & E1000_ICR_RXO)
1511		adapter->rx_overruns++;
1512	return FILTER_HANDLED;
1513}
1514
1515/* Combined RX/TX handler, used by Legacy and MSI */
1516static void
1517em_handle_que(void *context, int pending)
1518{
1519	struct adapter	*adapter = context;
1520	struct ifnet	*ifp = adapter->ifp;
1521	struct tx_ring	*txr = adapter->tx_rings;
1522	struct rx_ring	*rxr = adapter->rx_rings;
1523
1524
1525	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1527		EM_TX_LOCK(txr);
1528		em_txeof(txr);
1529#ifdef EM_MULTIQUEUE
1530		if (!drbr_empty(ifp, txr->br))
1531			em_mq_start_locked(ifp, txr, NULL);
1532#else
1533		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534			em_start_locked(ifp, txr);
1535#endif
1536		EM_TX_UNLOCK(txr);
1537		if (more) {
1538			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1539			return;
1540		}
1541	}
1542
1543	em_enable_intr(adapter);
1544	return;
1545}
1546
1547
1548/*********************************************************************
1549 *
1550 *  MSIX Interrupt Service Routines
1551 *
1552 **********************************************************************/
1553static void
1554em_msix_tx(void *arg)
1555{
1556	struct tx_ring *txr = arg;
1557	struct adapter *adapter = txr->adapter;
1558	struct ifnet	*ifp = adapter->ifp;
1559
1560	++txr->tx_irq;
1561	EM_TX_LOCK(txr);
1562	em_txeof(txr);
1563#ifdef EM_MULTIQUEUE
1564	if (!drbr_empty(ifp, txr->br))
1565		em_mq_start_locked(ifp, txr, NULL);
1566#else
1567	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1568		em_start_locked(ifp, txr);
1569#endif
1570	/* Reenable this interrupt */
1571	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1572	EM_TX_UNLOCK(txr);
1573	return;
1574}
1575
1576/*********************************************************************
1577 *
1578 *  MSIX RX Interrupt Service routine
1579 *
1580 **********************************************************************/
1581
1582static void
1583em_msix_rx(void *arg)
1584{
1585	struct rx_ring	*rxr = arg;
1586	struct adapter	*adapter = rxr->adapter;
1587	bool		more;
1588
1589	++rxr->rx_irq;
1590	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1591		return;
1592	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1593	if (more)
1594		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1595	else
1596		/* Reenable this interrupt */
1597		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1598	return;
1599}
1600
1601/*********************************************************************
1602 *
1603 *  MSIX Link Fast Interrupt Service routine
1604 *
1605 **********************************************************************/
1606static void
1607em_msix_link(void *arg)
1608{
1609	struct adapter	*adapter = arg;
1610	u32		reg_icr;
1611
1612	++adapter->link_irq;
1613	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1614
1615	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1616		adapter->hw.mac.get_link_status = 1;
1617		em_handle_link(adapter, 0);
1618	} else
1619		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1620		    EM_MSIX_LINK | E1000_IMS_LSC);
1621	return;
1622}
1623
1624static void
1625em_handle_rx(void *context, int pending)
1626{
1627	struct rx_ring	*rxr = context;
1628	struct adapter	*adapter = rxr->adapter;
1629        bool            more;
1630
1631	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1632	if (more)
1633		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1634	else
1635		/* Reenable this interrupt */
1636		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1637}
1638
1639static void
1640em_handle_tx(void *context, int pending)
1641{
1642	struct tx_ring	*txr = context;
1643	struct adapter	*adapter = txr->adapter;
1644	struct ifnet	*ifp = adapter->ifp;
1645
1646	EM_TX_LOCK(txr);
1647	em_txeof(txr);
1648#ifdef EM_MULTIQUEUE
1649	if (!drbr_empty(ifp, txr->br))
1650		em_mq_start_locked(ifp, txr, NULL);
1651#else
1652	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1653		em_start_locked(ifp, txr);
1654#endif
1655	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1656	EM_TX_UNLOCK(txr);
1657}
1658
1659static void
1660em_handle_link(void *context, int pending)
1661{
1662	struct adapter	*adapter = context;
1663	struct tx_ring	*txr = adapter->tx_rings;
1664	struct ifnet *ifp = adapter->ifp;
1665
1666	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1667		return;
1668
1669	EM_CORE_LOCK(adapter);
1670	callout_stop(&adapter->timer);
1671	em_update_link_status(adapter);
1672	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1673	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1674	    EM_MSIX_LINK | E1000_IMS_LSC);
1675	if (adapter->link_active) {
1676		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1677			EM_TX_LOCK(txr);
1678#ifdef EM_MULTIQUEUE
1679			if (!drbr_empty(ifp, txr->br))
1680				em_mq_start_locked(ifp, txr, NULL);
1681#else
1682			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1683				em_start_locked(ifp, txr);
1684#endif
1685			EM_TX_UNLOCK(txr);
1686		}
1687	}
1688	EM_CORE_UNLOCK(adapter);
1689}
1690
1691
1692/*********************************************************************
1693 *
1694 *  Media Ioctl callback
1695 *
1696 *  This routine is called whenever the user queries the status of
1697 *  the interface using ifconfig.
1698 *
1699 **********************************************************************/
1700static void
1701em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1702{
1703	struct adapter *adapter = ifp->if_softc;
1704	u_char fiber_type = IFM_1000_SX;
1705
1706	INIT_DEBUGOUT("em_media_status: begin");
1707
1708	EM_CORE_LOCK(adapter);
1709	em_update_link_status(adapter);
1710
1711	ifmr->ifm_status = IFM_AVALID;
1712	ifmr->ifm_active = IFM_ETHER;
1713
1714	if (!adapter->link_active) {
1715		EM_CORE_UNLOCK(adapter);
1716		return;
1717	}
1718
1719	ifmr->ifm_status |= IFM_ACTIVE;
1720
1721	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1722	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1723		ifmr->ifm_active |= fiber_type | IFM_FDX;
1724	} else {
1725		switch (adapter->link_speed) {
1726		case 10:
1727			ifmr->ifm_active |= IFM_10_T;
1728			break;
1729		case 100:
1730			ifmr->ifm_active |= IFM_100_TX;
1731			break;
1732		case 1000:
1733			ifmr->ifm_active |= IFM_1000_T;
1734			break;
1735		}
1736		if (adapter->link_duplex == FULL_DUPLEX)
1737			ifmr->ifm_active |= IFM_FDX;
1738		else
1739			ifmr->ifm_active |= IFM_HDX;
1740	}
1741	EM_CORE_UNLOCK(adapter);
1742}
1743
1744/*********************************************************************
1745 *
1746 *  Media Ioctl callback
1747 *
1748 *  This routine is called when the user changes speed/duplex using
1749 *  media/mediopt option with ifconfig.
1750 *
1751 **********************************************************************/
1752static int
1753em_media_change(struct ifnet *ifp)
1754{
1755	struct adapter *adapter = ifp->if_softc;
1756	struct ifmedia  *ifm = &adapter->media;
1757
1758	INIT_DEBUGOUT("em_media_change: begin");
1759
1760	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1761		return (EINVAL);
1762
1763	EM_CORE_LOCK(adapter);
1764	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1765	case IFM_AUTO:
1766		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1768		break;
1769	case IFM_1000_LX:
1770	case IFM_1000_SX:
1771	case IFM_1000_T:
1772		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1773		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1774		break;
1775	case IFM_100_TX:
1776		adapter->hw.mac.autoneg = FALSE;
1777		adapter->hw.phy.autoneg_advertised = 0;
1778		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1780		else
1781			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1782		break;
1783	case IFM_10_T:
1784		adapter->hw.mac.autoneg = FALSE;
1785		adapter->hw.phy.autoneg_advertised = 0;
1786		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1788		else
1789			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1790		break;
1791	default:
1792		device_printf(adapter->dev, "Unsupported media type\n");
1793	}
1794
1795	em_init_locked(adapter);
1796	EM_CORE_UNLOCK(adapter);
1797
1798	return (0);
1799}
1800
1801/*********************************************************************
1802 *
1803 *  This routine maps the mbufs to tx descriptors.
1804 *
1805 *  return 0 on success, positive on failure
1806 **********************************************************************/
1807
1808static int
1809em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1810{
1811	struct adapter		*adapter = txr->adapter;
1812	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1813	bus_dmamap_t		map;
1814	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1815	struct e1000_tx_desc	*ctxd = NULL;
1816	struct mbuf		*m_head;
1817	struct ether_header	*eh;
1818	struct ip		*ip = NULL;
1819	struct tcphdr		*tp = NULL;
1820	u32			txd_upper, txd_lower, txd_used, txd_saved;
1821	int			ip_off, poff;
1822	int			nsegs, i, j, first, last = 0;
1823	int			error, do_tso, tso_desc = 0, remap = 1;
1824
1825retry:
1826	m_head = *m_headp;
1827	txd_upper = txd_lower = txd_used = txd_saved = 0;
1828	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1829	ip_off = poff = 0;
1830
1831	/*
1832	 * Intel recommends entire IP/TCP header length reside in a single
1833	 * buffer. If multiple descriptors are used to describe the IP and
1834	 * TCP header, each descriptor should describe one or more
1835	 * complete headers; descriptors referencing only parts of headers
1836	 * are not supported. If all layer headers are not coalesced into
1837	 * a single buffer, each buffer should not cross a 4KB boundary,
1838	 * or be larger than the maximum read request size.
1839	 * Controller also requires modifing IP/TCP header to make TSO work
1840	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1841	 * IP/TCP header into a single buffer to meet the requirement of
1842	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1843	 * which also has similiar restrictions.
1844	 */
1845	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1846		if (do_tso || (m_head->m_next != NULL &&
1847		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1848			if (M_WRITABLE(*m_headp) == 0) {
1849				m_head = m_dup(*m_headp, M_NOWAIT);
1850				m_freem(*m_headp);
1851				if (m_head == NULL) {
1852					*m_headp = NULL;
1853					return (ENOBUFS);
1854				}
1855				*m_headp = m_head;
1856			}
1857		}
1858		/*
1859		 * XXX
1860		 * Assume IPv4, we don't have TSO/checksum offload support
1861		 * for IPv6 yet.
1862		 */
1863		ip_off = sizeof(struct ether_header);
1864		m_head = m_pullup(m_head, ip_off);
1865		if (m_head == NULL) {
1866			*m_headp = NULL;
1867			return (ENOBUFS);
1868		}
1869		eh = mtod(m_head, struct ether_header *);
1870		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1871			ip_off = sizeof(struct ether_vlan_header);
1872			m_head = m_pullup(m_head, ip_off);
1873			if (m_head == NULL) {
1874				*m_headp = NULL;
1875				return (ENOBUFS);
1876			}
1877		}
1878		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1879		if (m_head == NULL) {
1880			*m_headp = NULL;
1881			return (ENOBUFS);
1882		}
1883		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1884		poff = ip_off + (ip->ip_hl << 2);
1885		if (do_tso) {
1886			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1887			if (m_head == NULL) {
1888				*m_headp = NULL;
1889				return (ENOBUFS);
1890			}
1891			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1892			/*
1893			 * TSO workaround:
1894			 *   pull 4 more bytes of data into it.
1895			 */
1896			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1897			if (m_head == NULL) {
1898				*m_headp = NULL;
1899				return (ENOBUFS);
1900			}
1901			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1902			ip->ip_len = 0;
1903			ip->ip_sum = 0;
1904			/*
1905			 * The pseudo TCP checksum does not include TCP payload
1906			 * length so driver should recompute the checksum here
1907			 * what hardware expect to see. This is adherence of
1908			 * Microsoft's Large Send specification.
1909			 */
1910			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1912			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1913		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1914			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1915			if (m_head == NULL) {
1916				*m_headp = NULL;
1917				return (ENOBUFS);
1918			}
1919			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1920			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1921			if (m_head == NULL) {
1922				*m_headp = NULL;
1923				return (ENOBUFS);
1924			}
1925			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1926			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1928			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1929			if (m_head == NULL) {
1930				*m_headp = NULL;
1931				return (ENOBUFS);
1932			}
1933			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1934		}
1935		*m_headp = m_head;
1936	}
1937
1938	/*
1939	 * Map the packet for DMA
1940	 *
1941	 * Capture the first descriptor index,
1942	 * this descriptor will have the index
1943	 * of the EOP which is the only one that
1944	 * now gets a DONE bit writeback.
1945	 */
1946	first = txr->next_avail_desc;
1947	tx_buffer = &txr->tx_buffers[first];
1948	tx_buffer_mapped = tx_buffer;
1949	map = tx_buffer->map;
1950
1951	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1952	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1953
1954	/*
1955	 * There are two types of errors we can (try) to handle:
1956	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1957	 *   out of segments.  Defragment the mbuf chain and try again.
1958	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1959	 *   at this point in time.  Defer sending and try again later.
1960	 * All other errors, in particular EINVAL, are fatal and prevent the
1961	 * mbuf chain from ever going through.  Drop it and report error.
1962	 */
1963	if (error == EFBIG && remap) {
1964		struct mbuf *m;
1965
1966		m = m_defrag(*m_headp, M_NOWAIT);
1967		if (m == NULL) {
1968			adapter->mbuf_alloc_failed++;
1969			m_freem(*m_headp);
1970			*m_headp = NULL;
1971			return (ENOBUFS);
1972		}
1973		*m_headp = m;
1974
1975		/* Try it again, but only once */
1976		remap = 0;
1977		goto retry;
1978	} else if (error == ENOMEM) {
1979		adapter->no_tx_dma_setup++;
1980		return (error);
1981	} else if (error != 0) {
1982		adapter->no_tx_dma_setup++;
1983		m_freem(*m_headp);
1984		*m_headp = NULL;
1985		return (error);
1986	}
1987
1988	/*
1989	 * TSO Hardware workaround, if this packet is not
1990	 * TSO, and is only a single descriptor long, and
1991	 * it follows a TSO burst, then we need to add a
1992	 * sentinel descriptor to prevent premature writeback.
1993	 */
1994	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1995		if (nsegs == 1)
1996			tso_desc = TRUE;
1997		txr->tx_tso = FALSE;
1998	}
1999
2000        if (nsegs > (txr->tx_avail - 2)) {
2001                txr->no_desc_avail++;
2002		bus_dmamap_unload(txr->txtag, map);
2003		return (ENOBUFS);
2004        }
2005	m_head = *m_headp;
2006
2007	/* Do hardware assists */
2008	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2009		em_tso_setup(txr, m_head, ip_off, ip, tp,
2010		    &txd_upper, &txd_lower);
2011		/* we need to make a final sentinel transmit desc */
2012		tso_desc = TRUE;
2013	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2014		em_transmit_checksum_setup(txr, m_head,
2015		    ip_off, ip, &txd_upper, &txd_lower);
2016
2017	if (m_head->m_flags & M_VLANTAG) {
2018		/* Set the vlan id. */
2019		txd_upper |=
2020		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2021                /* Tell hardware to add tag */
2022                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2023        }
2024
2025	i = txr->next_avail_desc;
2026
2027	/* Set up our transmit descriptors */
2028	for (j = 0; j < nsegs; j++) {
2029		bus_size_t seg_len;
2030		bus_addr_t seg_addr;
2031
2032		tx_buffer = &txr->tx_buffers[i];
2033		ctxd = &txr->tx_base[i];
2034		seg_addr = segs[j].ds_addr;
2035		seg_len  = segs[j].ds_len;
2036		/*
2037		** TSO Workaround:
2038		** If this is the last descriptor, we want to
2039		** split it so we have a small final sentinel
2040		*/
2041		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2042			seg_len -= 4;
2043			ctxd->buffer_addr = htole64(seg_addr);
2044			ctxd->lower.data = htole32(
2045			adapter->txd_cmd | txd_lower | seg_len);
2046			ctxd->upper.data =
2047			    htole32(txd_upper);
2048			if (++i == adapter->num_tx_desc)
2049				i = 0;
2050			/* Now make the sentinel */
2051			++txd_used; /* using an extra txd */
2052			ctxd = &txr->tx_base[i];
2053			tx_buffer = &txr->tx_buffers[i];
2054			ctxd->buffer_addr =
2055			    htole64(seg_addr + seg_len);
2056			ctxd->lower.data = htole32(
2057			adapter->txd_cmd | txd_lower | 4);
2058			ctxd->upper.data =
2059			    htole32(txd_upper);
2060			last = i;
2061			if (++i == adapter->num_tx_desc)
2062				i = 0;
2063		} else {
2064			ctxd->buffer_addr = htole64(seg_addr);
2065			ctxd->lower.data = htole32(
2066			adapter->txd_cmd | txd_lower | seg_len);
2067			ctxd->upper.data =
2068			    htole32(txd_upper);
2069			last = i;
2070			if (++i == adapter->num_tx_desc)
2071				i = 0;
2072		}
2073		tx_buffer->m_head = NULL;
2074		tx_buffer->next_eop = -1;
2075	}
2076
2077	txr->next_avail_desc = i;
2078	txr->tx_avail -= nsegs;
2079	if (tso_desc) /* TSO used an extra for sentinel */
2080		txr->tx_avail -= txd_used;
2081
2082        tx_buffer->m_head = m_head;
2083	/*
2084	** Here we swap the map so the last descriptor,
2085	** which gets the completion interrupt has the
2086	** real map, and the first descriptor gets the
2087	** unused map from this descriptor.
2088	*/
2089	tx_buffer_mapped->map = tx_buffer->map;
2090	tx_buffer->map = map;
2091        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2092
2093        /*
2094         * Last Descriptor of Packet
2095	 * needs End Of Packet (EOP)
2096	 * and Report Status (RS)
2097         */
2098        ctxd->lower.data |=
2099	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2100	/*
2101	 * Keep track in the first buffer which
2102	 * descriptor will be written back
2103	 */
2104	tx_buffer = &txr->tx_buffers[first];
2105	tx_buffer->next_eop = last;
2106	/* Update the watchdog time early and often */
2107	txr->watchdog_time = ticks;
2108
2109	/*
2110	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2111	 * that this frame is available to transmit.
2112	 */
2113	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2114	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2115	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2116
2117	return (0);
2118}
2119
2120static void
2121em_set_promisc(struct adapter *adapter)
2122{
2123	struct ifnet	*ifp = adapter->ifp;
2124	u32		reg_rctl;
2125
2126	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2127
2128	if (ifp->if_flags & IFF_PROMISC) {
2129		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2130		/* Turn this on if you want to see bad packets */
2131		if (em_debug_sbp)
2132			reg_rctl |= E1000_RCTL_SBP;
2133		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2134	} else if (ifp->if_flags & IFF_ALLMULTI) {
2135		reg_rctl |= E1000_RCTL_MPE;
2136		reg_rctl &= ~E1000_RCTL_UPE;
2137		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2138	}
2139}
2140
2141static void
2142em_disable_promisc(struct adapter *adapter)
2143{
2144	struct ifnet	*ifp = adapter->ifp;
2145	u32		reg_rctl;
2146	int		mcnt = 0;
2147
2148	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2149	reg_rctl &=  (~E1000_RCTL_UPE);
2150	if (ifp->if_flags & IFF_ALLMULTI)
2151		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2152	else {
2153		struct  ifmultiaddr *ifma;
2154#if __FreeBSD_version < 800000
2155		IF_ADDR_LOCK(ifp);
2156#else
2157		if_maddr_rlock(ifp);
2158#endif
2159		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2160			if (ifma->ifma_addr->sa_family != AF_LINK)
2161				continue;
2162			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2163				break;
2164			mcnt++;
2165		}
2166#if __FreeBSD_version < 800000
2167		IF_ADDR_UNLOCK(ifp);
2168#else
2169		if_maddr_runlock(ifp);
2170#endif
2171	}
2172	/* Don't disable if in MAX groups */
2173	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2174		reg_rctl &=  (~E1000_RCTL_MPE);
2175	reg_rctl &=  (~E1000_RCTL_SBP);
2176	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2177}
2178
2179
2180/*********************************************************************
2181 *  Multicast Update
2182 *
2183 *  This routine is called whenever multicast address list is updated.
2184 *
2185 **********************************************************************/
2186
2187static void
2188em_set_multi(struct adapter *adapter)
2189{
2190	struct ifnet	*ifp = adapter->ifp;
2191	struct ifmultiaddr *ifma;
2192	u32 reg_rctl = 0;
2193	u8  *mta; /* Multicast array memory */
2194	int mcnt = 0;
2195
2196	IOCTL_DEBUGOUT("em_set_multi: begin");
2197
2198	mta = adapter->mta;
2199	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2200
2201	if (adapter->hw.mac.type == e1000_82542 &&
2202	    adapter->hw.revision_id == E1000_REVISION_2) {
2203		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2204		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2205			e1000_pci_clear_mwi(&adapter->hw);
2206		reg_rctl |= E1000_RCTL_RST;
2207		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2208		msec_delay(5);
2209	}
2210
2211#if __FreeBSD_version < 800000
2212	IF_ADDR_LOCK(ifp);
2213#else
2214	if_maddr_rlock(ifp);
2215#endif
2216	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2217		if (ifma->ifma_addr->sa_family != AF_LINK)
2218			continue;
2219
2220		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2221			break;
2222
2223		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2224		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2225		mcnt++;
2226	}
2227#if __FreeBSD_version < 800000
2228	IF_ADDR_UNLOCK(ifp);
2229#else
2230	if_maddr_runlock(ifp);
2231#endif
2232	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2233		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2234		reg_rctl |= E1000_RCTL_MPE;
2235		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2236	} else
2237		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2238
2239	if (adapter->hw.mac.type == e1000_82542 &&
2240	    adapter->hw.revision_id == E1000_REVISION_2) {
2241		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242		reg_rctl &= ~E1000_RCTL_RST;
2243		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244		msec_delay(5);
2245		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2246			e1000_pci_set_mwi(&adapter->hw);
2247	}
2248}
2249
2250
2251/*********************************************************************
2252 *  Timer routine
2253 *
2254 *  This routine checks for link status and updates statistics.
2255 *
2256 **********************************************************************/
2257
2258static void
2259em_local_timer(void *arg)
2260{
2261	struct adapter	*adapter = arg;
2262	struct ifnet	*ifp = adapter->ifp;
2263	struct tx_ring	*txr = adapter->tx_rings;
2264	struct rx_ring	*rxr = adapter->rx_rings;
2265	u32		trigger;
2266
2267	EM_CORE_LOCK_ASSERT(adapter);
2268
2269	em_update_link_status(adapter);
2270	em_update_stats_counters(adapter);
2271
2272	/* Reset LAA into RAR[0] on 82571 */
2273	if ((adapter->hw.mac.type == e1000_82571) &&
2274	    e1000_get_laa_state_82571(&adapter->hw))
2275		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2276
2277	/* Mask to use in the irq trigger */
2278	if (adapter->msix_mem)
2279		trigger = rxr->ims; /* RX for 82574 */
2280	else
2281		trigger = E1000_ICS_RXDMT0;
2282
2283	/*
2284	** Check on the state of the TX queue(s), this
2285	** can be done without the lock because its RO
2286	** and the HUNG state will be static if set.
2287	*/
2288	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2289		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2290		    (adapter->pause_frames == 0))
2291			goto hung;
2292		/* Schedule a TX tasklet if needed */
2293		if (txr->tx_avail <= EM_MAX_SCATTER)
2294			taskqueue_enqueue(txr->tq, &txr->tx_task);
2295	}
2296
2297	adapter->pause_frames = 0;
2298	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2299#ifndef DEVICE_POLLING
2300	/* Trigger an RX interrupt to guarantee mbuf refresh */
2301	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2302#endif
2303	return;
2304hung:
2305	/* Looks like we're hung */
2306	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2307	device_printf(adapter->dev,
2308	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2309	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2310	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2311	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2312	    "Next TX to Clean = %d\n",
2313	    txr->me, txr->tx_avail, txr->next_to_clean);
2314	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2315	adapter->watchdog_events++;
2316	adapter->pause_frames = 0;
2317	em_init_locked(adapter);
2318}
2319
2320
2321static void
2322em_update_link_status(struct adapter *adapter)
2323{
2324	struct e1000_hw *hw = &adapter->hw;
2325	struct ifnet *ifp = adapter->ifp;
2326	device_t dev = adapter->dev;
2327	struct tx_ring *txr = adapter->tx_rings;
2328	u32 link_check = 0;
2329
2330	/* Get the cached link value or read phy for real */
2331	switch (hw->phy.media_type) {
2332	case e1000_media_type_copper:
2333		if (hw->mac.get_link_status) {
2334			/* Do the work to read phy */
2335			e1000_check_for_link(hw);
2336			link_check = !hw->mac.get_link_status;
2337			if (link_check) /* ESB2 fix */
2338				e1000_cfg_on_link_up(hw);
2339		} else
2340			link_check = TRUE;
2341		break;
2342	case e1000_media_type_fiber:
2343		e1000_check_for_link(hw);
2344		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2345                                 E1000_STATUS_LU);
2346		break;
2347	case e1000_media_type_internal_serdes:
2348		e1000_check_for_link(hw);
2349		link_check = adapter->hw.mac.serdes_has_link;
2350		break;
2351	default:
2352	case e1000_media_type_unknown:
2353		break;
2354	}
2355
2356	/* Now check for a transition */
2357	if (link_check && (adapter->link_active == 0)) {
2358		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2359		    &adapter->link_duplex);
2360		/* Check if we must disable SPEED_MODE bit on PCI-E */
2361		if ((adapter->link_speed != SPEED_1000) &&
2362		    ((hw->mac.type == e1000_82571) ||
2363		    (hw->mac.type == e1000_82572))) {
2364			int tarc0;
2365			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2366			tarc0 &= ~SPEED_MODE_BIT;
2367			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2368		}
2369		if (bootverbose)
2370			device_printf(dev, "Link is up %d Mbps %s\n",
2371			    adapter->link_speed,
2372			    ((adapter->link_duplex == FULL_DUPLEX) ?
2373			    "Full Duplex" : "Half Duplex"));
2374		adapter->link_active = 1;
2375		adapter->smartspeed = 0;
2376		ifp->if_baudrate = adapter->link_speed * 1000000;
2377		if_link_state_change(ifp, LINK_STATE_UP);
2378	} else if (!link_check && (adapter->link_active == 1)) {
2379		ifp->if_baudrate = adapter->link_speed = 0;
2380		adapter->link_duplex = 0;
2381		if (bootverbose)
2382			device_printf(dev, "Link is Down\n");
2383		adapter->link_active = 0;
2384		/* Link down, disable watchdog */
2385		for (int i = 0; i < adapter->num_queues; i++, txr++)
2386			txr->queue_status = EM_QUEUE_IDLE;
2387		if_link_state_change(ifp, LINK_STATE_DOWN);
2388	}
2389}
2390
2391/*********************************************************************
2392 *
2393 *  This routine disables all traffic on the adapter by issuing a
2394 *  global reset on the MAC and deallocates TX/RX buffers.
2395 *
2396 *  This routine should always be called with BOTH the CORE
2397 *  and TX locks.
2398 **********************************************************************/
2399
2400static void
2401em_stop(void *arg)
2402{
2403	struct adapter	*adapter = arg;
2404	struct ifnet	*ifp = adapter->ifp;
2405	struct tx_ring	*txr = adapter->tx_rings;
2406
2407	EM_CORE_LOCK_ASSERT(adapter);
2408
2409	INIT_DEBUGOUT("em_stop: begin");
2410
2411	em_disable_intr(adapter);
2412	callout_stop(&adapter->timer);
2413
2414	/* Tell the stack that the interface is no longer active */
2415	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2416	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2417
2418        /* Unarm watchdog timer. */
2419	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2420		EM_TX_LOCK(txr);
2421		txr->queue_status = EM_QUEUE_IDLE;
2422		EM_TX_UNLOCK(txr);
2423	}
2424
2425	e1000_reset_hw(&adapter->hw);
2426	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2427
2428	e1000_led_off(&adapter->hw);
2429	e1000_cleanup_led(&adapter->hw);
2430}
2431
2432
2433/*********************************************************************
2434 *
2435 *  Determine hardware revision.
2436 *
2437 **********************************************************************/
2438static void
2439em_identify_hardware(struct adapter *adapter)
2440{
2441	device_t dev = adapter->dev;
2442
2443	/* Make sure our PCI config space has the necessary stuff set */
2444	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2445	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2446	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2447		device_printf(dev, "Memory Access and/or Bus Master bits "
2448		    "were not set!\n");
2449		adapter->hw.bus.pci_cmd_word |=
2450		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2451		pci_write_config(dev, PCIR_COMMAND,
2452		    adapter->hw.bus.pci_cmd_word, 2);
2453	}
2454
2455	/* Save off the information about this board */
2456	adapter->hw.vendor_id = pci_get_vendor(dev);
2457	adapter->hw.device_id = pci_get_device(dev);
2458	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2459	adapter->hw.subsystem_vendor_id =
2460	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2461	adapter->hw.subsystem_device_id =
2462	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2463
2464	/* Do Shared Code Init and Setup */
2465	if (e1000_set_mac_type(&adapter->hw)) {
2466		device_printf(dev, "Setup init failure\n");
2467		return;
2468	}
2469}
2470
2471static int
2472em_allocate_pci_resources(struct adapter *adapter)
2473{
2474	device_t	dev = adapter->dev;
2475	int		rid;
2476
2477	rid = PCIR_BAR(0);
2478	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479	    &rid, RF_ACTIVE);
2480	if (adapter->memory == NULL) {
2481		device_printf(dev, "Unable to allocate bus resource: memory\n");
2482		return (ENXIO);
2483	}
2484	adapter->osdep.mem_bus_space_tag =
2485	    rman_get_bustag(adapter->memory);
2486	adapter->osdep.mem_bus_space_handle =
2487	    rman_get_bushandle(adapter->memory);
2488	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2489
2490	/* Default to a single queue */
2491	adapter->num_queues = 1;
2492
2493	/*
2494	 * Setup MSI/X or MSI if PCI Express
2495	 */
2496	adapter->msix = em_setup_msix(adapter);
2497
2498	adapter->hw.back = &adapter->osdep;
2499
2500	return (0);
2501}
2502
2503/*********************************************************************
2504 *
2505 *  Setup the Legacy or MSI Interrupt handler
2506 *
2507 **********************************************************************/
2508int
2509em_allocate_legacy(struct adapter *adapter)
2510{
2511	device_t dev = adapter->dev;
2512	struct tx_ring	*txr = adapter->tx_rings;
2513	int error, rid = 0;
2514
2515	/* Manually turn off all interrupts */
2516	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2517
2518	if (adapter->msix == 1) /* using MSI */
2519		rid = 1;
2520	/* We allocate a single interrupt resource */
2521	adapter->res = bus_alloc_resource_any(dev,
2522	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2523	if (adapter->res == NULL) {
2524		device_printf(dev, "Unable to allocate bus resource: "
2525		    "interrupt\n");
2526		return (ENXIO);
2527	}
2528
2529	/*
2530	 * Allocate a fast interrupt and the associated
2531	 * deferred processing contexts.
2532	 */
2533	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2534	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2535	    taskqueue_thread_enqueue, &adapter->tq);
2536	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2537	    device_get_nameunit(adapter->dev));
2538	/* Use a TX only tasklet for local timer */
2539	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2540	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2541	    taskqueue_thread_enqueue, &txr->tq);
2542	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2543	    device_get_nameunit(adapter->dev));
2544	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2545	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2546	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2547		device_printf(dev, "Failed to register fast interrupt "
2548			    "handler: %d\n", error);
2549		taskqueue_free(adapter->tq);
2550		adapter->tq = NULL;
2551		return (error);
2552	}
2553
2554	return (0);
2555}
2556
2557/*********************************************************************
2558 *
2559 *  Setup the MSIX Interrupt handlers
2560 *   This is not really Multiqueue, rather
2561 *   its just seperate interrupt vectors
2562 *   for TX, RX, and Link.
2563 *
2564 **********************************************************************/
2565int
2566em_allocate_msix(struct adapter *adapter)
2567{
2568	device_t	dev = adapter->dev;
2569	struct		tx_ring *txr = adapter->tx_rings;
2570	struct		rx_ring *rxr = adapter->rx_rings;
2571	int		error, rid, vector = 0;
2572
2573
2574	/* Make sure all interrupts are disabled */
2575	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2576
2577	/* First set up ring resources */
2578	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2579
2580		/* RX ring */
2581		rid = vector + 1;
2582
2583		rxr->res = bus_alloc_resource_any(dev,
2584		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2585		if (rxr->res == NULL) {
2586			device_printf(dev,
2587			    "Unable to allocate bus resource: "
2588			    "RX MSIX Interrupt %d\n", i);
2589			return (ENXIO);
2590		}
2591		if ((error = bus_setup_intr(dev, rxr->res,
2592		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2593		    rxr, &rxr->tag)) != 0) {
2594			device_printf(dev, "Failed to register RX handler");
2595			return (error);
2596		}
2597#if __FreeBSD_version >= 800504
2598		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2599#endif
2600		rxr->msix = vector++; /* NOTE increment vector for TX */
2601		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603		    taskqueue_thread_enqueue, &rxr->tq);
2604		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2605		    device_get_nameunit(adapter->dev));
2606		/*
2607		** Set the bit to enable interrupt
2608		** in E1000_IMS -- bits 20 and 21
2609		** are for RX0 and RX1, note this has
2610		** NOTHING to do with the MSIX vector
2611		*/
2612		rxr->ims = 1 << (20 + i);
2613		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2614
2615		/* TX ring */
2616		rid = vector + 1;
2617		txr->res = bus_alloc_resource_any(dev,
2618		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2619		if (txr->res == NULL) {
2620			device_printf(dev,
2621			    "Unable to allocate bus resource: "
2622			    "TX MSIX Interrupt %d\n", i);
2623			return (ENXIO);
2624		}
2625		if ((error = bus_setup_intr(dev, txr->res,
2626		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2627		    txr, &txr->tag)) != 0) {
2628			device_printf(dev, "Failed to register TX handler");
2629			return (error);
2630		}
2631#if __FreeBSD_version >= 800504
2632		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2633#endif
2634		txr->msix = vector++; /* Increment vector for next pass */
2635		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637		    taskqueue_thread_enqueue, &txr->tq);
2638		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639		    device_get_nameunit(adapter->dev));
2640		/*
2641		** Set the bit to enable interrupt
2642		** in E1000_IMS -- bits 22 and 23
2643		** are for TX0 and TX1, note this has
2644		** NOTHING to do with the MSIX vector
2645		*/
2646		txr->ims = 1 << (22 + i);
2647		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2648	}
2649
2650	/* Link interrupt */
2651	++rid;
2652	adapter->res = bus_alloc_resource_any(dev,
2653	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2654	if (!adapter->res) {
2655		device_printf(dev,"Unable to allocate "
2656		    "bus resource: Link interrupt [%d]\n", rid);
2657		return (ENXIO);
2658        }
2659	/* Set the link handler function */
2660	error = bus_setup_intr(dev, adapter->res,
2661	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2662	    em_msix_link, adapter, &adapter->tag);
2663	if (error) {
2664		adapter->res = NULL;
2665		device_printf(dev, "Failed to register LINK handler");
2666		return (error);
2667	}
2668#if __FreeBSD_version >= 800504
2669		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2670#endif
2671	adapter->linkvec = vector;
2672	adapter->ivars |=  (8 | vector) << 16;
2673	adapter->ivars |= 0x80000000;
2674
2675	return (0);
2676}
2677
2678
2679static void
2680em_free_pci_resources(struct adapter *adapter)
2681{
2682	device_t	dev = adapter->dev;
2683	struct tx_ring	*txr;
2684	struct rx_ring	*rxr;
2685	int		rid;
2686
2687
2688	/*
2689	** Release all the queue interrupt resources:
2690	*/
2691	for (int i = 0; i < adapter->num_queues; i++) {
2692		txr = &adapter->tx_rings[i];
2693		rxr = &adapter->rx_rings[i];
2694		/* an early abort? */
2695		if ((txr == NULL) || (rxr == NULL))
2696			break;
2697		rid = txr->msix +1;
2698		if (txr->tag != NULL) {
2699			bus_teardown_intr(dev, txr->res, txr->tag);
2700			txr->tag = NULL;
2701		}
2702		if (txr->res != NULL)
2703			bus_release_resource(dev, SYS_RES_IRQ,
2704			    rid, txr->res);
2705		rid = rxr->msix +1;
2706		if (rxr->tag != NULL) {
2707			bus_teardown_intr(dev, rxr->res, rxr->tag);
2708			rxr->tag = NULL;
2709		}
2710		if (rxr->res != NULL)
2711			bus_release_resource(dev, SYS_RES_IRQ,
2712			    rid, rxr->res);
2713	}
2714
2715        if (adapter->linkvec) /* we are doing MSIX */
2716                rid = adapter->linkvec + 1;
2717        else
2718                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2719
2720	if (adapter->tag != NULL) {
2721		bus_teardown_intr(dev, adapter->res, adapter->tag);
2722		adapter->tag = NULL;
2723	}
2724
2725	if (adapter->res != NULL)
2726		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2727
2728
2729	if (adapter->msix)
2730		pci_release_msi(dev);
2731
2732	if (adapter->msix_mem != NULL)
2733		bus_release_resource(dev, SYS_RES_MEMORY,
2734		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735
2736	if (adapter->memory != NULL)
2737		bus_release_resource(dev, SYS_RES_MEMORY,
2738		    PCIR_BAR(0), adapter->memory);
2739
2740	if (adapter->flash != NULL)
2741		bus_release_resource(dev, SYS_RES_MEMORY,
2742		    EM_FLASH, adapter->flash);
2743}
2744
2745/*
2746 * Setup MSI or MSI/X
2747 */
2748static int
2749em_setup_msix(struct adapter *adapter)
2750{
2751	device_t dev = adapter->dev;
2752	int val = 0;
2753
2754	/*
2755	** Setup MSI/X for Hartwell: tests have shown
2756	** use of two queues to be unstable, and to
2757	** provide no great gain anyway, so we simply
2758	** seperate the interrupts and use a single queue.
2759	*/
2760	if ((adapter->hw.mac.type == e1000_82574) &&
2761	    (em_enable_msix == TRUE)) {
2762		/* Map the MSIX BAR */
2763		int rid = PCIR_BAR(EM_MSIX_BAR);
2764		adapter->msix_mem = bus_alloc_resource_any(dev,
2765		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2766       		if (!adapter->msix_mem) {
2767			/* May not be enabled */
2768               		device_printf(adapter->dev,
2769			    "Unable to map MSIX table \n");
2770			goto msi;
2771       		}
2772		val = pci_msix_count(dev);
2773		/* We only need 3 vectors */
2774		if (val > 3)
2775			val = 3;
2776		if ((val != 3) && (val != 5)) {
2777			bus_release_resource(dev, SYS_RES_MEMORY,
2778			    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2779			adapter->msix_mem = NULL;
2780               		device_printf(adapter->dev,
2781			    "MSIX: incorrect vectors, using MSI\n");
2782			goto msi;
2783		}
2784
2785		if (pci_alloc_msix(dev, &val) == 0) {
2786			device_printf(adapter->dev,
2787			    "Using MSIX interrupts "
2788			    "with %d vectors\n", val);
2789		}
2790
2791		return (val);
2792	}
2793msi:
2794       	val = pci_msi_count(dev);
2795       	if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2796               	adapter->msix = 1;
2797               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2798		return (val);
2799	}
2800	/* Should only happen due to manual configuration */
2801	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2802	return (0);
2803}
2804
2805
2806/*********************************************************************
2807 *
2808 *  Initialize the hardware to a configuration
2809 *  as specified by the adapter structure.
2810 *
2811 **********************************************************************/
2812static void
2813em_reset(struct adapter *adapter)
2814{
2815	device_t	dev = adapter->dev;
2816	struct ifnet	*ifp = adapter->ifp;
2817	struct e1000_hw	*hw = &adapter->hw;
2818	u16		rx_buffer_size;
2819	u32		pba;
2820
2821	INIT_DEBUGOUT("em_reset: begin");
2822
2823	/* Set up smart power down as default off on newer adapters. */
2824	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2825	    hw->mac.type == e1000_82572)) {
2826		u16 phy_tmp = 0;
2827
2828		/* Speed up time to link by disabling smart power down. */
2829		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2830		phy_tmp &= ~IGP02E1000_PM_SPD;
2831		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2832	}
2833
2834	/*
2835	 * Packet Buffer Allocation (PBA)
2836	 * Writing PBA sets the receive portion of the buffer
2837	 * the remainder is used for the transmit buffer.
2838	 */
2839	switch (hw->mac.type) {
2840	/* Total Packet Buffer on these is 48K */
2841	case e1000_82571:
2842	case e1000_82572:
2843	case e1000_80003es2lan:
2844			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2845		break;
2846	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2847			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2848		break;
2849	case e1000_82574:
2850	case e1000_82583:
2851			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2852		break;
2853	case e1000_ich8lan:
2854		pba = E1000_PBA_8K;
2855		break;
2856	case e1000_ich9lan:
2857	case e1000_ich10lan:
2858		/* Boost Receive side for jumbo frames */
2859		if (adapter->hw.mac.max_frame_size > 4096)
2860			pba = E1000_PBA_14K;
2861		else
2862			pba = E1000_PBA_10K;
2863		break;
2864	case e1000_pchlan:
2865	case e1000_pch2lan:
2866	case e1000_pch_lpt:
2867		pba = E1000_PBA_26K;
2868		break;
2869	default:
2870		if (adapter->hw.mac.max_frame_size > 8192)
2871			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2872		else
2873			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2874	}
2875	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2876
2877	/*
2878	 * These parameters control the automatic generation (Tx) and
2879	 * response (Rx) to Ethernet PAUSE frames.
2880	 * - High water mark should allow for at least two frames to be
2881	 *   received after sending an XOFF.
2882	 * - Low water mark works best when it is very near the high water mark.
2883	 *   This allows the receiver to restart by sending XON when it has
2884	 *   drained a bit. Here we use an arbitary value of 1500 which will
2885	 *   restart after one full frame is pulled from the buffer. There
2886	 *   could be several smaller frames in the buffer and if so they will
2887	 *   not trigger the XON until their total number reduces the buffer
2888	 *   by 1500.
2889	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2890	 */
2891	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2892	hw->fc.high_water = rx_buffer_size -
2893	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2894	hw->fc.low_water = hw->fc.high_water - 1500;
2895
2896	if (adapter->fc) /* locally set flow control value? */
2897		hw->fc.requested_mode = adapter->fc;
2898	else
2899		hw->fc.requested_mode = e1000_fc_full;
2900
2901	if (hw->mac.type == e1000_80003es2lan)
2902		hw->fc.pause_time = 0xFFFF;
2903	else
2904		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2905
2906	hw->fc.send_xon = TRUE;
2907
2908	/* Device specific overrides/settings */
2909	switch (hw->mac.type) {
2910	case e1000_pchlan:
2911		/* Workaround: no TX flow ctrl for PCH */
2912                hw->fc.requested_mode = e1000_fc_rx_pause;
2913		hw->fc.pause_time = 0xFFFF; /* override */
2914		if (ifp->if_mtu > ETHERMTU) {
2915			hw->fc.high_water = 0x3500;
2916			hw->fc.low_water = 0x1500;
2917		} else {
2918			hw->fc.high_water = 0x5000;
2919			hw->fc.low_water = 0x3000;
2920		}
2921		hw->fc.refresh_time = 0x1000;
2922		break;
2923	case e1000_pch2lan:
2924	case e1000_pch_lpt:
2925		hw->fc.high_water = 0x5C20;
2926		hw->fc.low_water = 0x5048;
2927		hw->fc.pause_time = 0x0650;
2928		hw->fc.refresh_time = 0x0400;
2929		/* Jumbos need adjusted PBA */
2930		if (ifp->if_mtu > ETHERMTU)
2931			E1000_WRITE_REG(hw, E1000_PBA, 12);
2932		else
2933			E1000_WRITE_REG(hw, E1000_PBA, 26);
2934		break;
2935        case e1000_ich9lan:
2936        case e1000_ich10lan:
2937		if (ifp->if_mtu > ETHERMTU) {
2938			hw->fc.high_water = 0x2800;
2939			hw->fc.low_water = hw->fc.high_water - 8;
2940			break;
2941		}
2942		/* else fall thru */
2943	default:
2944		if (hw->mac.type == e1000_80003es2lan)
2945			hw->fc.pause_time = 0xFFFF;
2946		break;
2947	}
2948
2949	/* Issue a global reset */
2950	e1000_reset_hw(hw);
2951	E1000_WRITE_REG(hw, E1000_WUC, 0);
2952	em_disable_aspm(adapter);
2953	/* and a re-init */
2954	if (e1000_init_hw(hw) < 0) {
2955		device_printf(dev, "Hardware Initialization Failed\n");
2956		return;
2957	}
2958
2959	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2960	e1000_get_phy_info(hw);
2961	e1000_check_for_link(hw);
2962	return;
2963}
2964
2965/*********************************************************************
2966 *
2967 *  Setup networking device structure and register an interface.
2968 *
2969 **********************************************************************/
2970static int
2971em_setup_interface(device_t dev, struct adapter *adapter)
2972{
2973	struct ifnet   *ifp;
2974
2975	INIT_DEBUGOUT("em_setup_interface: begin");
2976
2977	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2978	if (ifp == NULL) {
2979		device_printf(dev, "can not allocate ifnet structure\n");
2980		return (-1);
2981	}
2982	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2983	ifp->if_init =  em_init;
2984	ifp->if_softc = adapter;
2985	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2986	ifp->if_ioctl = em_ioctl;
2987#ifdef EM_MULTIQUEUE
2988	/* Multiqueue stack interface */
2989	ifp->if_transmit = em_mq_start;
2990	ifp->if_qflush = em_qflush;
2991#else
2992	ifp->if_start = em_start;
2993	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2994	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2995	IFQ_SET_READY(&ifp->if_snd);
2996#endif
2997
2998	ether_ifattach(ifp, adapter->hw.mac.addr);
2999
3000	ifp->if_capabilities = ifp->if_capenable = 0;
3001
3002
3003	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3004	ifp->if_capabilities |= IFCAP_TSO4;
3005	/*
3006	 * Tell the upper layer(s) we
3007	 * support full VLAN capability
3008	 */
3009	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3010	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3011			     |  IFCAP_VLAN_HWTSO
3012			     |  IFCAP_VLAN_MTU;
3013	ifp->if_capenable = ifp->if_capabilities;
3014
3015	/*
3016	** Don't turn this on by default, if vlans are
3017	** created on another pseudo device (eg. lagg)
3018	** then vlan events are not passed thru, breaking
3019	** operation, but with HW FILTER off it works. If
3020	** using vlans directly on the em driver you can
3021	** enable this and get full hardware tag filtering.
3022	*/
3023	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3024
3025#ifdef DEVICE_POLLING
3026	ifp->if_capabilities |= IFCAP_POLLING;
3027#endif
3028
3029	/* Enable only WOL MAGIC by default */
3030	if (adapter->wol) {
3031		ifp->if_capabilities |= IFCAP_WOL;
3032		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3033	}
3034
3035	/*
3036	 * Specify the media types supported by this adapter and register
3037	 * callbacks to update media and link information
3038	 */
3039	ifmedia_init(&adapter->media, IFM_IMASK,
3040	    em_media_change, em_media_status);
3041	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3042	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3043		u_char fiber_type = IFM_1000_SX;	/* default type */
3044
3045		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3046			    0, NULL);
3047		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3048	} else {
3049		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3050		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3051			    0, NULL);
3052		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3053			    0, NULL);
3054		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3055			    0, NULL);
3056		if (adapter->hw.phy.type != e1000_phy_ife) {
3057			ifmedia_add(&adapter->media,
3058				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3059			ifmedia_add(&adapter->media,
3060				IFM_ETHER | IFM_1000_T, 0, NULL);
3061		}
3062	}
3063	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3064	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3065	return (0);
3066}
3067
3068
3069/*
3070 * Manage DMA'able memory.
3071 */
3072static void
3073em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3074{
3075	if (error)
3076		return;
3077	*(bus_addr_t *) arg = segs[0].ds_addr;
3078}
3079
3080static int
3081em_dma_malloc(struct adapter *adapter, bus_size_t size,
3082        struct em_dma_alloc *dma, int mapflags)
3083{
3084	int error;
3085
3086	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3087				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3088				BUS_SPACE_MAXADDR,	/* lowaddr */
3089				BUS_SPACE_MAXADDR,	/* highaddr */
3090				NULL, NULL,		/* filter, filterarg */
3091				size,			/* maxsize */
3092				1,			/* nsegments */
3093				size,			/* maxsegsize */
3094				0,			/* flags */
3095				NULL,			/* lockfunc */
3096				NULL,			/* lockarg */
3097				&dma->dma_tag);
3098	if (error) {
3099		device_printf(adapter->dev,
3100		    "%s: bus_dma_tag_create failed: %d\n",
3101		    __func__, error);
3102		goto fail_0;
3103	}
3104
3105	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3106	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3107	if (error) {
3108		device_printf(adapter->dev,
3109		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3110		    __func__, (uintmax_t)size, error);
3111		goto fail_2;
3112	}
3113
3114	dma->dma_paddr = 0;
3115	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3116	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3117	if (error || dma->dma_paddr == 0) {
3118		device_printf(adapter->dev,
3119		    "%s: bus_dmamap_load failed: %d\n",
3120		    __func__, error);
3121		goto fail_3;
3122	}
3123
3124	return (0);
3125
3126fail_3:
3127	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3128fail_2:
3129	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3130	bus_dma_tag_destroy(dma->dma_tag);
3131fail_0:
3132	dma->dma_map = NULL;
3133	dma->dma_tag = NULL;
3134
3135	return (error);
3136}
3137
3138static void
3139em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3140{
3141	if (dma->dma_tag == NULL)
3142		return;
3143	if (dma->dma_map != NULL) {
3144		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3145		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3146		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3147		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3148		dma->dma_map = NULL;
3149	}
3150	bus_dma_tag_destroy(dma->dma_tag);
3151	dma->dma_tag = NULL;
3152}
3153
3154
3155/*********************************************************************
3156 *
3157 *  Allocate memory for the transmit and receive rings, and then
3158 *  the descriptors associated with each, called only once at attach.
3159 *
3160 **********************************************************************/
3161static int
3162em_allocate_queues(struct adapter *adapter)
3163{
3164	device_t		dev = adapter->dev;
3165	struct tx_ring		*txr = NULL;
3166	struct rx_ring		*rxr = NULL;
3167	int rsize, tsize, error = E1000_SUCCESS;
3168	int txconf = 0, rxconf = 0;
3169
3170
3171	/* Allocate the TX ring struct memory */
3172	if (!(adapter->tx_rings =
3173	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3174	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3175		device_printf(dev, "Unable to allocate TX ring memory\n");
3176		error = ENOMEM;
3177		goto fail;
3178	}
3179
3180	/* Now allocate the RX */
3181	if (!(adapter->rx_rings =
3182	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3183	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3184		device_printf(dev, "Unable to allocate RX ring memory\n");
3185		error = ENOMEM;
3186		goto rx_fail;
3187	}
3188
3189	tsize = roundup2(adapter->num_tx_desc *
3190	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3191	/*
3192	 * Now set up the TX queues, txconf is needed to handle the
3193	 * possibility that things fail midcourse and we need to
3194	 * undo memory gracefully
3195	 */
3196	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3197		/* Set up some basics */
3198		txr = &adapter->tx_rings[i];
3199		txr->adapter = adapter;
3200		txr->me = i;
3201
3202		/* Initialize the TX lock */
3203		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3204		    device_get_nameunit(dev), txr->me);
3205		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3206
3207		if (em_dma_malloc(adapter, tsize,
3208			&txr->txdma, BUS_DMA_NOWAIT)) {
3209			device_printf(dev,
3210			    "Unable to allocate TX Descriptor memory\n");
3211			error = ENOMEM;
3212			goto err_tx_desc;
3213		}
3214		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3215		bzero((void *)txr->tx_base, tsize);
3216
3217        	if (em_allocate_transmit_buffers(txr)) {
3218			device_printf(dev,
3219			    "Critical Failure setting up transmit buffers\n");
3220			error = ENOMEM;
3221			goto err_tx_desc;
3222        	}
3223#if __FreeBSD_version >= 800000
3224		/* Allocate a buf ring */
3225		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3226		    M_WAITOK, &txr->tx_mtx);
3227#endif
3228	}
3229
3230	/*
3231	 * Next the RX queues...
3232	 */
3233	rsize = roundup2(adapter->num_rx_desc *
3234	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3235	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3236		rxr = &adapter->rx_rings[i];
3237		rxr->adapter = adapter;
3238		rxr->me = i;
3239
3240		/* Initialize the RX lock */
3241		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3242		    device_get_nameunit(dev), txr->me);
3243		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3244
3245		if (em_dma_malloc(adapter, rsize,
3246			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3247			device_printf(dev,
3248			    "Unable to allocate RxDescriptor memory\n");
3249			error = ENOMEM;
3250			goto err_rx_desc;
3251		}
3252		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3253		bzero((void *)rxr->rx_base, rsize);
3254
3255        	/* Allocate receive buffers for the ring*/
3256		if (em_allocate_receive_buffers(rxr)) {
3257			device_printf(dev,
3258			    "Critical Failure setting up receive buffers\n");
3259			error = ENOMEM;
3260			goto err_rx_desc;
3261		}
3262	}
3263
3264	return (0);
3265
3266err_rx_desc:
3267	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3268		em_dma_free(adapter, &rxr->rxdma);
3269err_tx_desc:
3270	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3271		em_dma_free(adapter, &txr->txdma);
3272	free(adapter->rx_rings, M_DEVBUF);
3273rx_fail:
3274#if __FreeBSD_version >= 800000
3275	buf_ring_free(txr->br, M_DEVBUF);
3276#endif
3277	free(adapter->tx_rings, M_DEVBUF);
3278fail:
3279	return (error);
3280}
3281
3282
3283/*********************************************************************
3284 *
3285 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3286 *  the information needed to transmit a packet on the wire. This is
3287 *  called only once at attach, setup is done every reset.
3288 *
3289 **********************************************************************/
3290static int
3291em_allocate_transmit_buffers(struct tx_ring *txr)
3292{
3293	struct adapter *adapter = txr->adapter;
3294	device_t dev = adapter->dev;
3295	struct em_buffer *txbuf;
3296	int error, i;
3297
3298	/*
3299	 * Setup DMA descriptor areas.
3300	 */
3301	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3302			       1, 0,			/* alignment, bounds */
3303			       BUS_SPACE_MAXADDR,	/* lowaddr */
3304			       BUS_SPACE_MAXADDR,	/* highaddr */
3305			       NULL, NULL,		/* filter, filterarg */
3306			       EM_TSO_SIZE,		/* maxsize */
3307			       EM_MAX_SCATTER,		/* nsegments */
3308			       PAGE_SIZE,		/* maxsegsize */
3309			       0,			/* flags */
3310			       NULL,			/* lockfunc */
3311			       NULL,			/* lockfuncarg */
3312			       &txr->txtag))) {
3313		device_printf(dev,"Unable to allocate TX DMA tag\n");
3314		goto fail;
3315	}
3316
3317	if (!(txr->tx_buffers =
3318	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3319	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3321		error = ENOMEM;
3322		goto fail;
3323	}
3324
3325        /* Create the descriptor buffer dma maps */
3326	txbuf = txr->tx_buffers;
3327	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3328		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3329		if (error != 0) {
3330			device_printf(dev, "Unable to create TX DMA map\n");
3331			goto fail;
3332		}
3333	}
3334
3335	return 0;
3336fail:
3337	/* We free all, it handles case where we are in the middle */
3338	em_free_transmit_structures(adapter);
3339	return (error);
3340}
3341
3342/*********************************************************************
3343 *
3344 *  Initialize a transmit ring.
3345 *
3346 **********************************************************************/
3347static void
3348em_setup_transmit_ring(struct tx_ring *txr)
3349{
3350	struct adapter *adapter = txr->adapter;
3351	struct em_buffer *txbuf;
3352	int i;
3353#ifdef DEV_NETMAP
3354	struct netmap_adapter *na = NA(adapter->ifp);
3355	struct netmap_slot *slot;
3356#endif /* DEV_NETMAP */
3357
3358	/* Clear the old descriptor contents */
3359	EM_TX_LOCK(txr);
3360#ifdef DEV_NETMAP
3361	slot = netmap_reset(na, NR_TX, txr->me, 0);
3362#endif /* DEV_NETMAP */
3363
3364	bzero((void *)txr->tx_base,
3365	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3366	/* Reset indices */
3367	txr->next_avail_desc = 0;
3368	txr->next_to_clean = 0;
3369
3370	/* Free any existing tx buffers. */
3371        txbuf = txr->tx_buffers;
3372	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3373		if (txbuf->m_head != NULL) {
3374			bus_dmamap_sync(txr->txtag, txbuf->map,
3375			    BUS_DMASYNC_POSTWRITE);
3376			bus_dmamap_unload(txr->txtag, txbuf->map);
3377			m_freem(txbuf->m_head);
3378			txbuf->m_head = NULL;
3379		}
3380#ifdef DEV_NETMAP
3381		if (slot) {
3382			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3383			uint64_t paddr;
3384			void *addr;
3385
3386			addr = PNMB(slot + si, &paddr);
3387			txr->tx_base[i].buffer_addr = htole64(paddr);
3388			/* reload the map for netmap mode */
3389			netmap_load_map(txr->txtag, txbuf->map, addr);
3390		}
3391#endif /* DEV_NETMAP */
3392
3393		/* clear the watch index */
3394		txbuf->next_eop = -1;
3395        }
3396
3397	/* Set number of descriptors available */
3398	txr->tx_avail = adapter->num_tx_desc;
3399	txr->queue_status = EM_QUEUE_IDLE;
3400
3401	/* Clear checksum offload context. */
3402	txr->last_hw_offload = 0;
3403	txr->last_hw_ipcss = 0;
3404	txr->last_hw_ipcso = 0;
3405	txr->last_hw_tucss = 0;
3406	txr->last_hw_tucso = 0;
3407
3408	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3409	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3410	EM_TX_UNLOCK(txr);
3411}
3412
3413/*********************************************************************
3414 *
3415 *  Initialize all transmit rings.
3416 *
3417 **********************************************************************/
3418static void
3419em_setup_transmit_structures(struct adapter *adapter)
3420{
3421	struct tx_ring *txr = adapter->tx_rings;
3422
3423	for (int i = 0; i < adapter->num_queues; i++, txr++)
3424		em_setup_transmit_ring(txr);
3425
3426	return;
3427}
3428
3429/*********************************************************************
3430 *
3431 *  Enable transmit unit.
3432 *
3433 **********************************************************************/
3434static void
3435em_initialize_transmit_unit(struct adapter *adapter)
3436{
3437	struct tx_ring	*txr = adapter->tx_rings;
3438	struct e1000_hw	*hw = &adapter->hw;
3439	u32	tctl, tarc, tipg = 0;
3440
3441	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3442
3443	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3444		u64 bus_addr = txr->txdma.dma_paddr;
3445		/* Base and Len of TX Ring */
3446		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3447	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3448		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3449	    	    (u32)(bus_addr >> 32));
3450		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3451	    	    (u32)bus_addr);
3452		/* Init the HEAD/TAIL indices */
3453		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3454		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3455
3456		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3457		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3458		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3459
3460		txr->queue_status = EM_QUEUE_IDLE;
3461	}
3462
3463	/* Set the default values for the Tx Inter Packet Gap timer */
3464	switch (adapter->hw.mac.type) {
3465	case e1000_80003es2lan:
3466		tipg = DEFAULT_82543_TIPG_IPGR1;
3467		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3468		    E1000_TIPG_IPGR2_SHIFT;
3469		break;
3470	default:
3471		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3472		    (adapter->hw.phy.media_type ==
3473		    e1000_media_type_internal_serdes))
3474			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3475		else
3476			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3477		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3478		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3479	}
3480
3481	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3482	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3483
3484	if(adapter->hw.mac.type >= e1000_82540)
3485		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3486		    adapter->tx_abs_int_delay.value);
3487
3488	if ((adapter->hw.mac.type == e1000_82571) ||
3489	    (adapter->hw.mac.type == e1000_82572)) {
3490		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3491		tarc |= SPEED_MODE_BIT;
3492		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3493	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3494		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3495		tarc |= 1;
3496		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3497		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3498		tarc |= 1;
3499		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3500	}
3501
3502	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3503	if (adapter->tx_int_delay.value > 0)
3504		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3505
3506	/* Program the Transmit Control Register */
3507	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3508	tctl &= ~E1000_TCTL_CT;
3509	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3510		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3511
3512	if (adapter->hw.mac.type >= e1000_82571)
3513		tctl |= E1000_TCTL_MULR;
3514
3515	/* This write will effectively turn on the transmit unit. */
3516	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3517
3518}
3519
3520
3521/*********************************************************************
3522 *
3523 *  Free all transmit rings.
3524 *
3525 **********************************************************************/
3526static void
3527em_free_transmit_structures(struct adapter *adapter)
3528{
3529	struct tx_ring *txr = adapter->tx_rings;
3530
3531	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3532		EM_TX_LOCK(txr);
3533		em_free_transmit_buffers(txr);
3534		em_dma_free(adapter, &txr->txdma);
3535		EM_TX_UNLOCK(txr);
3536		EM_TX_LOCK_DESTROY(txr);
3537	}
3538
3539	free(adapter->tx_rings, M_DEVBUF);
3540}
3541
3542/*********************************************************************
3543 *
3544 *  Free transmit ring related data structures.
3545 *
3546 **********************************************************************/
3547static void
3548em_free_transmit_buffers(struct tx_ring *txr)
3549{
3550	struct adapter		*adapter = txr->adapter;
3551	struct em_buffer	*txbuf;
3552
3553	INIT_DEBUGOUT("free_transmit_ring: begin");
3554
3555	if (txr->tx_buffers == NULL)
3556		return;
3557
3558	for (int i = 0; i < adapter->num_tx_desc; i++) {
3559		txbuf = &txr->tx_buffers[i];
3560		if (txbuf->m_head != NULL) {
3561			bus_dmamap_sync(txr->txtag, txbuf->map,
3562			    BUS_DMASYNC_POSTWRITE);
3563			bus_dmamap_unload(txr->txtag,
3564			    txbuf->map);
3565			m_freem(txbuf->m_head);
3566			txbuf->m_head = NULL;
3567			if (txbuf->map != NULL) {
3568				bus_dmamap_destroy(txr->txtag,
3569				    txbuf->map);
3570				txbuf->map = NULL;
3571			}
3572		} else if (txbuf->map != NULL) {
3573			bus_dmamap_unload(txr->txtag,
3574			    txbuf->map);
3575			bus_dmamap_destroy(txr->txtag,
3576			    txbuf->map);
3577			txbuf->map = NULL;
3578		}
3579	}
3580#if __FreeBSD_version >= 800000
3581	if (txr->br != NULL)
3582		buf_ring_free(txr->br, M_DEVBUF);
3583#endif
3584	if (txr->tx_buffers != NULL) {
3585		free(txr->tx_buffers, M_DEVBUF);
3586		txr->tx_buffers = NULL;
3587	}
3588	if (txr->txtag != NULL) {
3589		bus_dma_tag_destroy(txr->txtag);
3590		txr->txtag = NULL;
3591	}
3592	return;
3593}
3594
3595
3596/*********************************************************************
3597 *  The offload context is protocol specific (TCP/UDP) and thus
3598 *  only needs to be set when the protocol changes. The occasion
3599 *  of a context change can be a performance detriment, and
3600 *  might be better just disabled. The reason arises in the way
3601 *  in which the controller supports pipelined requests from the
3602 *  Tx data DMA. Up to four requests can be pipelined, and they may
3603 *  belong to the same packet or to multiple packets. However all
3604 *  requests for one packet are issued before a request is issued
3605 *  for a subsequent packet and if a request for the next packet
3606 *  requires a context change, that request will be stalled
3607 *  until the previous request completes. This means setting up
3608 *  a new context effectively disables pipelined Tx data DMA which
3609 *  in turn greatly slow down performance to send small sized
3610 *  frames.
3611 **********************************************************************/
3612static void
3613em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3614    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3615{
3616	struct adapter			*adapter = txr->adapter;
3617	struct e1000_context_desc	*TXD = NULL;
3618	struct em_buffer		*tx_buffer;
3619	int				cur, hdr_len;
3620	u32				cmd = 0;
3621	u16				offload = 0;
3622	u8				ipcso, ipcss, tucso, tucss;
3623
3624	ipcss = ipcso = tucss = tucso = 0;
3625	hdr_len = ip_off + (ip->ip_hl << 2);
3626	cur = txr->next_avail_desc;
3627
3628	/* Setup of IP header checksum. */
3629	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3630		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3631		offload |= CSUM_IP;
3632		ipcss = ip_off;
3633		ipcso = ip_off + offsetof(struct ip, ip_sum);
3634		/*
3635		 * Start offset for header checksum calculation.
3636		 * End offset for header checksum calculation.
3637		 * Offset of place to put the checksum.
3638		 */
3639		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3640		TXD->lower_setup.ip_fields.ipcss = ipcss;
3641		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3642		TXD->lower_setup.ip_fields.ipcso = ipcso;
3643		cmd |= E1000_TXD_CMD_IP;
3644	}
3645
3646	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3647 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3648 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3649 		offload |= CSUM_TCP;
3650 		tucss = hdr_len;
3651 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3652 		/*
3653 		 * Setting up new checksum offload context for every frames
3654 		 * takes a lot of processing time for hardware. This also
3655 		 * reduces performance a lot for small sized frames so avoid
3656 		 * it if driver can use previously configured checksum
3657 		 * offload context.
3658 		 */
3659 		if (txr->last_hw_offload == offload) {
3660 			if (offload & CSUM_IP) {
3661 				if (txr->last_hw_ipcss == ipcss &&
3662 				    txr->last_hw_ipcso == ipcso &&
3663 				    txr->last_hw_tucss == tucss &&
3664 				    txr->last_hw_tucso == tucso)
3665 					return;
3666 			} else {
3667 				if (txr->last_hw_tucss == tucss &&
3668 				    txr->last_hw_tucso == tucso)
3669 					return;
3670 			}
3671  		}
3672 		txr->last_hw_offload = offload;
3673 		txr->last_hw_tucss = tucss;
3674 		txr->last_hw_tucso = tucso;
3675 		/*
3676 		 * Start offset for payload checksum calculation.
3677 		 * End offset for payload checksum calculation.
3678 		 * Offset of place to put the checksum.
3679 		 */
3680		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3681 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3682 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3683 		TXD->upper_setup.tcp_fields.tucso = tucso;
3684 		cmd |= E1000_TXD_CMD_TCP;
3685 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3686 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3687 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3688 		tucss = hdr_len;
3689 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3690 		/*
3691 		 * Setting up new checksum offload context for every frames
3692 		 * takes a lot of processing time for hardware. This also
3693 		 * reduces performance a lot for small sized frames so avoid
3694 		 * it if driver can use previously configured checksum
3695 		 * offload context.
3696 		 */
3697 		if (txr->last_hw_offload == offload) {
3698 			if (offload & CSUM_IP) {
3699 				if (txr->last_hw_ipcss == ipcss &&
3700 				    txr->last_hw_ipcso == ipcso &&
3701 				    txr->last_hw_tucss == tucss &&
3702 				    txr->last_hw_tucso == tucso)
3703 					return;
3704 			} else {
3705 				if (txr->last_hw_tucss == tucss &&
3706 				    txr->last_hw_tucso == tucso)
3707 					return;
3708 			}
3709 		}
3710 		txr->last_hw_offload = offload;
3711 		txr->last_hw_tucss = tucss;
3712 		txr->last_hw_tucso = tucso;
3713 		/*
3714 		 * Start offset for header checksum calculation.
3715 		 * End offset for header checksum calculation.
3716 		 * Offset of place to put the checksum.
3717 		 */
3718		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3719 		TXD->upper_setup.tcp_fields.tucss = tucss;
3720 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3721 		TXD->upper_setup.tcp_fields.tucso = tucso;
3722  	}
3723
3724 	if (offload & CSUM_IP) {
3725 		txr->last_hw_ipcss = ipcss;
3726 		txr->last_hw_ipcso = ipcso;
3727  	}
3728
3729	TXD->tcp_seg_setup.data = htole32(0);
3730	TXD->cmd_and_length =
3731	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3732	tx_buffer = &txr->tx_buffers[cur];
3733	tx_buffer->m_head = NULL;
3734	tx_buffer->next_eop = -1;
3735
3736	if (++cur == adapter->num_tx_desc)
3737		cur = 0;
3738
3739	txr->tx_avail--;
3740	txr->next_avail_desc = cur;
3741}
3742
3743
3744/**********************************************************************
3745 *
3746 *  Setup work for hardware segmentation offload (TSO)
3747 *
3748 **********************************************************************/
3749static void
3750em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3751    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3752{
3753	struct adapter			*adapter = txr->adapter;
3754	struct e1000_context_desc	*TXD;
3755	struct em_buffer		*tx_buffer;
3756	int cur, hdr_len;
3757
3758	/*
3759	 * In theory we can use the same TSO context if and only if
3760	 * frame is the same type(IP/TCP) and the same MSS. However
3761	 * checking whether a frame has the same IP/TCP structure is
3762	 * hard thing so just ignore that and always restablish a
3763	 * new TSO context.
3764	 */
3765	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3766	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3767		      E1000_TXD_DTYP_D |	/* Data descr type */
3768		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3769
3770	/* IP and/or TCP header checksum calculation and insertion. */
3771	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3772
3773	cur = txr->next_avail_desc;
3774	tx_buffer = &txr->tx_buffers[cur];
3775	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3776
3777	/*
3778	 * Start offset for header checksum calculation.
3779	 * End offset for header checksum calculation.
3780	 * Offset of place put the checksum.
3781	 */
3782	TXD->lower_setup.ip_fields.ipcss = ip_off;
3783	TXD->lower_setup.ip_fields.ipcse =
3784	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3785	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3786	/*
3787	 * Start offset for payload checksum calculation.
3788	 * End offset for payload checksum calculation.
3789	 * Offset of place to put the checksum.
3790	 */
3791	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3792	TXD->upper_setup.tcp_fields.tucse = 0;
3793	TXD->upper_setup.tcp_fields.tucso =
3794	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3795	/*
3796	 * Payload size per packet w/o any headers.
3797	 * Length of all headers up to payload.
3798	 */
3799	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3800	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3801
3802	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3803				E1000_TXD_CMD_DEXT |	/* Extended descr */
3804				E1000_TXD_CMD_TSE |	/* TSE context */
3805				E1000_TXD_CMD_IP |	/* Do IP csum */
3806				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3807				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3808
3809	tx_buffer->m_head = NULL;
3810	tx_buffer->next_eop = -1;
3811
3812	if (++cur == adapter->num_tx_desc)
3813		cur = 0;
3814
3815	txr->tx_avail--;
3816	txr->next_avail_desc = cur;
3817	txr->tx_tso = TRUE;
3818}
3819
3820
3821/**********************************************************************
3822 *
3823 *  Examine each tx_buffer in the used queue. If the hardware is done
3824 *  processing the packet then free associated resources. The
3825 *  tx_buffer is put back on the free queue.
3826 *
3827 **********************************************************************/
3828static void
3829em_txeof(struct tx_ring *txr)
3830{
3831	struct adapter	*adapter = txr->adapter;
3832        int first, last, done, processed;
3833        struct em_buffer *tx_buffer;
3834        struct e1000_tx_desc   *tx_desc, *eop_desc;
3835	struct ifnet   *ifp = adapter->ifp;
3836
3837	EM_TX_LOCK_ASSERT(txr);
3838#ifdef DEV_NETMAP
3839	if (netmap_tx_irq(ifp, txr->me |
3840	    (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT)))
3841		return;
3842#endif /* DEV_NETMAP */
3843
3844	/* No work, make sure watchdog is off */
3845        if (txr->tx_avail == adapter->num_tx_desc) {
3846		txr->queue_status = EM_QUEUE_IDLE;
3847                return;
3848	}
3849
3850	processed = 0;
3851        first = txr->next_to_clean;
3852        tx_desc = &txr->tx_base[first];
3853        tx_buffer = &txr->tx_buffers[first];
3854	last = tx_buffer->next_eop;
3855        eop_desc = &txr->tx_base[last];
3856
3857	/*
3858	 * What this does is get the index of the
3859	 * first descriptor AFTER the EOP of the
3860	 * first packet, that way we can do the
3861	 * simple comparison on the inner while loop.
3862	 */
3863	if (++last == adapter->num_tx_desc)
3864 		last = 0;
3865	done = last;
3866
3867        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3868            BUS_DMASYNC_POSTREAD);
3869
3870        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3871		/* We clean the range of the packet */
3872		while (first != done) {
3873                	tx_desc->upper.data = 0;
3874                	tx_desc->lower.data = 0;
3875                	tx_desc->buffer_addr = 0;
3876                	++txr->tx_avail;
3877			++processed;
3878
3879			if (tx_buffer->m_head) {
3880				bus_dmamap_sync(txr->txtag,
3881				    tx_buffer->map,
3882				    BUS_DMASYNC_POSTWRITE);
3883				bus_dmamap_unload(txr->txtag,
3884				    tx_buffer->map);
3885                        	m_freem(tx_buffer->m_head);
3886                        	tx_buffer->m_head = NULL;
3887                	}
3888			tx_buffer->next_eop = -1;
3889			txr->watchdog_time = ticks;
3890
3891	                if (++first == adapter->num_tx_desc)
3892				first = 0;
3893
3894	                tx_buffer = &txr->tx_buffers[first];
3895			tx_desc = &txr->tx_base[first];
3896		}
3897		++ifp->if_opackets;
3898		/* See if we can continue to the next packet */
3899		last = tx_buffer->next_eop;
3900		if (last != -1) {
3901        		eop_desc = &txr->tx_base[last];
3902			/* Get new done point */
3903			if (++last == adapter->num_tx_desc) last = 0;
3904			done = last;
3905		} else
3906			break;
3907        }
3908        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3909            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3910
3911        txr->next_to_clean = first;
3912
3913	/*
3914	** Watchdog calculation, we know there's
3915	** work outstanding or the first return
3916	** would have been taken, so none processed
3917	** for too long indicates a hang. local timer
3918	** will examine this and do a reset if needed.
3919	*/
3920	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3921		txr->queue_status = EM_QUEUE_HUNG;
3922
3923        /*
3924         * If we have a minimum free, clear IFF_DRV_OACTIVE
3925         * to tell the stack that it is OK to send packets.
3926	 * Notice that all writes of OACTIVE happen under the
3927	 * TX lock which, with a single queue, guarantees
3928	 * sanity.
3929         */
3930        if (txr->tx_avail >= EM_MAX_SCATTER)
3931		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3932
3933	/* Disable watchdog if all clean */
3934	if (txr->tx_avail == adapter->num_tx_desc) {
3935		txr->queue_status = EM_QUEUE_IDLE;
3936	}
3937}
3938
3939
3940/*********************************************************************
3941 *
3942 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3943 *
3944 **********************************************************************/
3945static void
3946em_refresh_mbufs(struct rx_ring *rxr, int limit)
3947{
3948	struct adapter		*adapter = rxr->adapter;
3949	struct mbuf		*m;
3950	bus_dma_segment_t	segs[1];
3951	struct em_buffer	*rxbuf;
3952	int			i, j, error, nsegs;
3953	bool			cleaned = FALSE;
3954
3955	i = j = rxr->next_to_refresh;
3956	/*
3957	** Get one descriptor beyond
3958	** our work mark to control
3959	** the loop.
3960	*/
3961	if (++j == adapter->num_rx_desc)
3962		j = 0;
3963
3964	while (j != limit) {
3965		rxbuf = &rxr->rx_buffers[i];
3966		if (rxbuf->m_head == NULL) {
3967			m = m_getjcl(M_NOWAIT, MT_DATA,
3968			    M_PKTHDR, adapter->rx_mbuf_sz);
3969			/*
3970			** If we have a temporary resource shortage
3971			** that causes a failure, just abort refresh
3972			** for now, we will return to this point when
3973			** reinvoked from em_rxeof.
3974			*/
3975			if (m == NULL)
3976				goto update;
3977		} else
3978			m = rxbuf->m_head;
3979
3980		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3981		m->m_flags |= M_PKTHDR;
3982		m->m_data = m->m_ext.ext_buf;
3983
3984		/* Use bus_dma machinery to setup the memory mapping  */
3985		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3986		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3987		if (error != 0) {
3988			printf("Refresh mbufs: hdr dmamap load"
3989			    " failure - %d\n", error);
3990			m_free(m);
3991			rxbuf->m_head = NULL;
3992			goto update;
3993		}
3994		rxbuf->m_head = m;
3995		bus_dmamap_sync(rxr->rxtag,
3996		    rxbuf->map, BUS_DMASYNC_PREREAD);
3997		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3998		cleaned = TRUE;
3999
4000		i = j; /* Next is precalulated for us */
4001		rxr->next_to_refresh = i;
4002		/* Calculate next controlling index */
4003		if (++j == adapter->num_rx_desc)
4004			j = 0;
4005	}
4006update:
4007	/*
4008	** Update the tail pointer only if,
4009	** and as far as we have refreshed.
4010	*/
4011	if (cleaned)
4012		E1000_WRITE_REG(&adapter->hw,
4013		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4014
4015	return;
4016}
4017
4018
4019/*********************************************************************
4020 *
4021 *  Allocate memory for rx_buffer structures. Since we use one
4022 *  rx_buffer per received packet, the maximum number of rx_buffer's
4023 *  that we'll need is equal to the number of receive descriptors
4024 *  that we've allocated.
4025 *
4026 **********************************************************************/
4027static int
4028em_allocate_receive_buffers(struct rx_ring *rxr)
4029{
4030	struct adapter		*adapter = rxr->adapter;
4031	device_t		dev = adapter->dev;
4032	struct em_buffer	*rxbuf;
4033	int			error;
4034
4035	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4036	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4037	if (rxr->rx_buffers == NULL) {
4038		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4039		return (ENOMEM);
4040	}
4041
4042	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4043				1, 0,			/* alignment, bounds */
4044				BUS_SPACE_MAXADDR,	/* lowaddr */
4045				BUS_SPACE_MAXADDR,	/* highaddr */
4046				NULL, NULL,		/* filter, filterarg */
4047				MJUM9BYTES,		/* maxsize */
4048				1,			/* nsegments */
4049				MJUM9BYTES,		/* maxsegsize */
4050				0,			/* flags */
4051				NULL,			/* lockfunc */
4052				NULL,			/* lockarg */
4053				&rxr->rxtag);
4054	if (error) {
4055		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4056		    __func__, error);
4057		goto fail;
4058	}
4059
4060	rxbuf = rxr->rx_buffers;
4061	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4062		rxbuf = &rxr->rx_buffers[i];
4063		error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4064		    &rxbuf->map);
4065		if (error) {
4066			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4067			    __func__, error);
4068			goto fail;
4069		}
4070	}
4071
4072	return (0);
4073
4074fail:
4075	em_free_receive_structures(adapter);
4076	return (error);
4077}
4078
4079
4080/*********************************************************************
4081 *
4082 *  Initialize a receive ring and its buffers.
4083 *
4084 **********************************************************************/
4085static int
4086em_setup_receive_ring(struct rx_ring *rxr)
4087{
4088	struct	adapter 	*adapter = rxr->adapter;
4089	struct em_buffer	*rxbuf;
4090	bus_dma_segment_t	seg[1];
4091	int			rsize, nsegs, error = 0;
4092#ifdef DEV_NETMAP
4093	struct netmap_adapter *na = NA(adapter->ifp);
4094	struct netmap_slot *slot;
4095#endif
4096
4097
4098	/* Clear the ring contents */
4099	EM_RX_LOCK(rxr);
4100	rsize = roundup2(adapter->num_rx_desc *
4101	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4102	bzero((void *)rxr->rx_base, rsize);
4103#ifdef DEV_NETMAP
4104	slot = netmap_reset(na, NR_RX, 0, 0);
4105#endif
4106
4107	/*
4108	** Free current RX buffer structs and their mbufs
4109	*/
4110	for (int i = 0; i < adapter->num_rx_desc; i++) {
4111		rxbuf = &rxr->rx_buffers[i];
4112		if (rxbuf->m_head != NULL) {
4113			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4114			    BUS_DMASYNC_POSTREAD);
4115			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4116			m_freem(rxbuf->m_head);
4117			rxbuf->m_head = NULL; /* mark as freed */
4118		}
4119	}
4120
4121	/* Now replenish the mbufs */
4122        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4123		rxbuf = &rxr->rx_buffers[j];
4124#ifdef DEV_NETMAP
4125		if (slot) {
4126			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4127			uint64_t paddr;
4128			void *addr;
4129
4130			addr = PNMB(slot + si, &paddr);
4131			netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4132			/* Update descriptor */
4133			rxr->rx_base[j].buffer_addr = htole64(paddr);
4134			continue;
4135		}
4136#endif /* DEV_NETMAP */
4137		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4138		    M_PKTHDR, adapter->rx_mbuf_sz);
4139		if (rxbuf->m_head == NULL) {
4140			error = ENOBUFS;
4141			goto fail;
4142		}
4143		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4144		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4145		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4146
4147		/* Get the memory mapping */
4148		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4149		    rxbuf->map, rxbuf->m_head, seg,
4150		    &nsegs, BUS_DMA_NOWAIT);
4151		if (error != 0) {
4152			m_freem(rxbuf->m_head);
4153			rxbuf->m_head = NULL;
4154			goto fail;
4155		}
4156		bus_dmamap_sync(rxr->rxtag,
4157		    rxbuf->map, BUS_DMASYNC_PREREAD);
4158
4159		/* Update descriptor */
4160		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4161	}
4162	rxr->next_to_check = 0;
4163	rxr->next_to_refresh = 0;
4164	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4165	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4166
4167fail:
4168	EM_RX_UNLOCK(rxr);
4169	return (error);
4170}
4171
4172/*********************************************************************
4173 *
4174 *  Initialize all receive rings.
4175 *
4176 **********************************************************************/
4177static int
4178em_setup_receive_structures(struct adapter *adapter)
4179{
4180	struct rx_ring *rxr = adapter->rx_rings;
4181	int q;
4182
4183	for (q = 0; q < adapter->num_queues; q++, rxr++)
4184		if (em_setup_receive_ring(rxr))
4185			goto fail;
4186
4187	return (0);
4188fail:
4189	/*
4190	 * Free RX buffers allocated so far, we will only handle
4191	 * the rings that completed, the failing case will have
4192	 * cleaned up for itself. 'q' failed, so its the terminus.
4193	 */
4194	for (int i = 0; i < q; ++i) {
4195		rxr = &adapter->rx_rings[i];
4196		for (int n = 0; n < adapter->num_rx_desc; n++) {
4197			struct em_buffer *rxbuf;
4198			rxbuf = &rxr->rx_buffers[n];
4199			if (rxbuf->m_head != NULL) {
4200				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4201			  	  BUS_DMASYNC_POSTREAD);
4202				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4203				m_freem(rxbuf->m_head);
4204				rxbuf->m_head = NULL;
4205			}
4206		}
4207		rxr->next_to_check = 0;
4208		rxr->next_to_refresh = 0;
4209	}
4210
4211	return (ENOBUFS);
4212}
4213
4214/*********************************************************************
4215 *
4216 *  Free all receive rings.
4217 *
4218 **********************************************************************/
4219static void
4220em_free_receive_structures(struct adapter *adapter)
4221{
4222	struct rx_ring *rxr = adapter->rx_rings;
4223
4224	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4225		em_free_receive_buffers(rxr);
4226		/* Free the ring memory as well */
4227		em_dma_free(adapter, &rxr->rxdma);
4228		EM_RX_LOCK_DESTROY(rxr);
4229	}
4230
4231	free(adapter->rx_rings, M_DEVBUF);
4232}
4233
4234
4235/*********************************************************************
4236 *
4237 *  Free receive ring data structures
4238 *
4239 **********************************************************************/
4240static void
4241em_free_receive_buffers(struct rx_ring *rxr)
4242{
4243	struct adapter		*adapter = rxr->adapter;
4244	struct em_buffer	*rxbuf = NULL;
4245
4246	INIT_DEBUGOUT("free_receive_buffers: begin");
4247
4248	if (rxr->rx_buffers != NULL) {
4249		for (int i = 0; i < adapter->num_rx_desc; i++) {
4250			rxbuf = &rxr->rx_buffers[i];
4251			if (rxbuf->map != NULL) {
4252				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4253				    BUS_DMASYNC_POSTREAD);
4254				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4255				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4256			}
4257			if (rxbuf->m_head != NULL) {
4258				m_freem(rxbuf->m_head);
4259				rxbuf->m_head = NULL;
4260			}
4261		}
4262		free(rxr->rx_buffers, M_DEVBUF);
4263		rxr->rx_buffers = NULL;
4264		rxr->next_to_check = 0;
4265		rxr->next_to_refresh = 0;
4266	}
4267
4268	if (rxr->rxtag != NULL) {
4269		bus_dma_tag_destroy(rxr->rxtag);
4270		rxr->rxtag = NULL;
4271	}
4272
4273	return;
4274}
4275
4276
4277/*********************************************************************
4278 *
4279 *  Enable receive unit.
4280 *
4281 **********************************************************************/
4282
4283static void
4284em_initialize_receive_unit(struct adapter *adapter)
4285{
4286	struct rx_ring	*rxr = adapter->rx_rings;
4287	struct ifnet	*ifp = adapter->ifp;
4288	struct e1000_hw	*hw = &adapter->hw;
4289	u64	bus_addr;
4290	u32	rctl, rxcsum;
4291
4292	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4293
4294	/*
4295	 * Make sure receives are disabled while setting
4296	 * up the descriptor ring
4297	 */
4298	rctl = E1000_READ_REG(hw, E1000_RCTL);
4299	/* Do not disable if ever enabled on this hardware */
4300	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4301		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4302
4303	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4304	    adapter->rx_abs_int_delay.value);
4305	/*
4306	 * Set the interrupt throttling rate. Value is calculated
4307	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4308	 */
4309	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4310
4311	/*
4312	** When using MSIX interrupts we need to throttle
4313	** using the EITR register (82574 only)
4314	*/
4315	if (hw->mac.type == e1000_82574) {
4316		for (int i = 0; i < 4; i++)
4317			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4318			    DEFAULT_ITR);
4319		/* Disable accelerated acknowledge */
4320		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4321	}
4322
4323	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4324	if (ifp->if_capenable & IFCAP_RXCSUM)
4325		rxcsum |= E1000_RXCSUM_TUOFL;
4326	else
4327		rxcsum &= ~E1000_RXCSUM_TUOFL;
4328	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4329
4330	/*
4331	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4332	** long latencies are observed, like Lenovo X60. This
4333	** change eliminates the problem, but since having positive
4334	** values in RDTR is a known source of problems on other
4335	** platforms another solution is being sought.
4336	*/
4337	if (hw->mac.type == e1000_82573)
4338		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4339
4340	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4341		/* Setup the Base and Length of the Rx Descriptor Ring */
4342		u32 rdt = adapter->num_rx_desc - 1; /* default */
4343
4344		bus_addr = rxr->rxdma.dma_paddr;
4345		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4346		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4347		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4348		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4349		/* Setup the Head and Tail Descriptor Pointers */
4350		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4351#ifdef DEV_NETMAP
4352		/*
4353		 * an init() while a netmap client is active must
4354		 * preserve the rx buffers passed to userspace.
4355		 */
4356		if (ifp->if_capenable & IFCAP_NETMAP)
4357			rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail;
4358#endif /* DEV_NETMAP */
4359		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4360	}
4361
4362	/* Set PTHRESH for improved jumbo performance */
4363	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4364	    (adapter->hw.mac.type == e1000_pch2lan) ||
4365	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4366	    (ifp->if_mtu > ETHERMTU)) {
4367		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4368		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4369	}
4370
4371	if (adapter->hw.mac.type >= e1000_pch2lan) {
4372		if (ifp->if_mtu > ETHERMTU)
4373			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4374		else
4375			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4376	}
4377
4378	/* Setup the Receive Control Register */
4379	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4380	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4381	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4382	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4383
4384        /* Strip the CRC */
4385        rctl |= E1000_RCTL_SECRC;
4386
4387        /* Make sure VLAN Filters are off */
4388        rctl &= ~E1000_RCTL_VFE;
4389	rctl &= ~E1000_RCTL_SBP;
4390
4391	if (adapter->rx_mbuf_sz == MCLBYTES)
4392		rctl |= E1000_RCTL_SZ_2048;
4393	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4394		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4395	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4396		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4397
4398	if (ifp->if_mtu > ETHERMTU)
4399		rctl |= E1000_RCTL_LPE;
4400	else
4401		rctl &= ~E1000_RCTL_LPE;
4402
4403	/* Write out the settings */
4404	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4405
4406	return;
4407}
4408
4409
4410/*********************************************************************
4411 *
4412 *  This routine executes in interrupt context. It replenishes
4413 *  the mbufs in the descriptor and sends data which has been
4414 *  dma'ed into host memory to upper layer.
4415 *
4416 *  We loop at most count times if count is > 0, or until done if
4417 *  count < 0.
4418 *
4419 *  For polling we also now return the number of cleaned packets
4420 *********************************************************************/
4421static bool
4422em_rxeof(struct rx_ring *rxr, int count, int *done)
4423{
4424	struct adapter		*adapter = rxr->adapter;
4425	struct ifnet		*ifp = adapter->ifp;
4426	struct mbuf		*mp, *sendmp;
4427	u8			status = 0;
4428	u16 			len;
4429	int			i, processed, rxdone = 0;
4430	bool			eop;
4431	struct e1000_rx_desc	*cur;
4432
4433	EM_RX_LOCK(rxr);
4434
4435#ifdef DEV_NETMAP
4436	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4437		return (FALSE);
4438#endif /* DEV_NETMAP */
4439
4440	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4441
4442		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4443			break;
4444
4445		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4446		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4447
4448		cur = &rxr->rx_base[i];
4449		status = cur->status;
4450		mp = sendmp = NULL;
4451
4452		if ((status & E1000_RXD_STAT_DD) == 0)
4453			break;
4454
4455		len = le16toh(cur->length);
4456		eop = (status & E1000_RXD_STAT_EOP) != 0;
4457
4458		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4459		    (rxr->discard == TRUE)) {
4460			adapter->dropped_pkts++;
4461			++rxr->rx_discarded;
4462			if (!eop) /* Catch subsequent segs */
4463				rxr->discard = TRUE;
4464			else
4465				rxr->discard = FALSE;
4466			em_rx_discard(rxr, i);
4467			goto next_desc;
4468		}
4469
4470		/* Assign correct length to the current fragment */
4471		mp = rxr->rx_buffers[i].m_head;
4472		mp->m_len = len;
4473
4474		/* Trigger for refresh */
4475		rxr->rx_buffers[i].m_head = NULL;
4476
4477		/* First segment? */
4478		if (rxr->fmp == NULL) {
4479			mp->m_pkthdr.len = len;
4480			rxr->fmp = rxr->lmp = mp;
4481		} else {
4482			/* Chain mbuf's together */
4483			mp->m_flags &= ~M_PKTHDR;
4484			rxr->lmp->m_next = mp;
4485			rxr->lmp = mp;
4486			rxr->fmp->m_pkthdr.len += len;
4487		}
4488
4489		if (eop) {
4490			--count;
4491			sendmp = rxr->fmp;
4492			sendmp->m_pkthdr.rcvif = ifp;
4493			ifp->if_ipackets++;
4494			em_receive_checksum(cur, sendmp);
4495#ifndef __NO_STRICT_ALIGNMENT
4496			if (adapter->hw.mac.max_frame_size >
4497			    (MCLBYTES - ETHER_ALIGN) &&
4498			    em_fixup_rx(rxr) != 0)
4499				goto skip;
4500#endif
4501			if (status & E1000_RXD_STAT_VP) {
4502				sendmp->m_pkthdr.ether_vtag =
4503				    le16toh(cur->special);
4504				sendmp->m_flags |= M_VLANTAG;
4505			}
4506#ifndef __NO_STRICT_ALIGNMENT
4507skip:
4508#endif
4509			rxr->fmp = rxr->lmp = NULL;
4510		}
4511next_desc:
4512		/* Zero out the receive descriptors status. */
4513		cur->status = 0;
4514		++rxdone;	/* cumulative for POLL */
4515		++processed;
4516
4517		/* Advance our pointers to the next descriptor. */
4518		if (++i == adapter->num_rx_desc)
4519			i = 0;
4520
4521		/* Send to the stack */
4522		if (sendmp != NULL) {
4523			rxr->next_to_check = i;
4524			EM_RX_UNLOCK(rxr);
4525			(*ifp->if_input)(ifp, sendmp);
4526			EM_RX_LOCK(rxr);
4527			i = rxr->next_to_check;
4528		}
4529
4530		/* Only refresh mbufs every 8 descriptors */
4531		if (processed == 8) {
4532			em_refresh_mbufs(rxr, i);
4533			processed = 0;
4534		}
4535	}
4536
4537	/* Catch any remaining refresh work */
4538	if (e1000_rx_unrefreshed(rxr))
4539		em_refresh_mbufs(rxr, i);
4540
4541	rxr->next_to_check = i;
4542	if (done != NULL)
4543		*done = rxdone;
4544	EM_RX_UNLOCK(rxr);
4545
4546	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4547}
4548
4549static __inline void
4550em_rx_discard(struct rx_ring *rxr, int i)
4551{
4552	struct em_buffer	*rbuf;
4553
4554	rbuf = &rxr->rx_buffers[i];
4555	/* Free any previous pieces */
4556	if (rxr->fmp != NULL) {
4557		rxr->fmp->m_flags |= M_PKTHDR;
4558		m_freem(rxr->fmp);
4559		rxr->fmp = NULL;
4560		rxr->lmp = NULL;
4561	}
4562	/*
4563	** Free buffer and allow em_refresh_mbufs()
4564	** to clean up and recharge buffer.
4565	*/
4566	if (rbuf->m_head) {
4567		m_free(rbuf->m_head);
4568		rbuf->m_head = NULL;
4569	}
4570	return;
4571}
4572
4573#ifndef __NO_STRICT_ALIGNMENT
4574/*
4575 * When jumbo frames are enabled we should realign entire payload on
4576 * architecures with strict alignment. This is serious design mistake of 8254x
4577 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4578 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4579 * payload. On architecures without strict alignment restrictions 8254x still
4580 * performs unaligned memory access which would reduce the performance too.
4581 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4582 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4583 * existing mbuf chain.
4584 *
4585 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4586 * not used at all on architectures with strict alignment.
4587 */
4588static int
4589em_fixup_rx(struct rx_ring *rxr)
4590{
4591	struct adapter *adapter = rxr->adapter;
4592	struct mbuf *m, *n;
4593	int error;
4594
4595	error = 0;
4596	m = rxr->fmp;
4597	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4598		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4599		m->m_data += ETHER_HDR_LEN;
4600	} else {
4601		MGETHDR(n, M_NOWAIT, MT_DATA);
4602		if (n != NULL) {
4603			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4604			m->m_data += ETHER_HDR_LEN;
4605			m->m_len -= ETHER_HDR_LEN;
4606			n->m_len = ETHER_HDR_LEN;
4607			M_MOVE_PKTHDR(n, m);
4608			n->m_next = m;
4609			rxr->fmp = n;
4610		} else {
4611			adapter->dropped_pkts++;
4612			m_freem(rxr->fmp);
4613			rxr->fmp = NULL;
4614			error = ENOMEM;
4615		}
4616	}
4617
4618	return (error);
4619}
4620#endif
4621
4622/*********************************************************************
4623 *
4624 *  Verify that the hardware indicated that the checksum is valid.
4625 *  Inform the stack about the status of checksum so that stack
4626 *  doesn't spend time verifying the checksum.
4627 *
4628 *********************************************************************/
4629static void
4630em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4631{
4632	mp->m_pkthdr.csum_flags = 0;
4633
4634	/* Ignore Checksum bit is set */
4635	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4636		return;
4637
4638	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4639		return;
4640
4641	/* IP Checksum Good? */
4642	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4643		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4644
4645	/* TCP or UDP checksum */
4646	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4647		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4648		mp->m_pkthdr.csum_data = htons(0xffff);
4649	}
4650}
4651
4652/*
4653 * This routine is run via an vlan
4654 * config EVENT
4655 */
4656static void
4657em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4658{
4659	struct adapter	*adapter = ifp->if_softc;
4660	u32		index, bit;
4661
4662	if (ifp->if_softc !=  arg)   /* Not our event */
4663		return;
4664
4665	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4666                return;
4667
4668	EM_CORE_LOCK(adapter);
4669	index = (vtag >> 5) & 0x7F;
4670	bit = vtag & 0x1F;
4671	adapter->shadow_vfta[index] |= (1 << bit);
4672	++adapter->num_vlans;
4673	/* Re-init to load the changes */
4674	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4675		em_init_locked(adapter);
4676	EM_CORE_UNLOCK(adapter);
4677}
4678
4679/*
4680 * This routine is run via an vlan
4681 * unconfig EVENT
4682 */
4683static void
4684em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4685{
4686	struct adapter	*adapter = ifp->if_softc;
4687	u32		index, bit;
4688
4689	if (ifp->if_softc !=  arg)
4690		return;
4691
4692	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4693                return;
4694
4695	EM_CORE_LOCK(adapter);
4696	index = (vtag >> 5) & 0x7F;
4697	bit = vtag & 0x1F;
4698	adapter->shadow_vfta[index] &= ~(1 << bit);
4699	--adapter->num_vlans;
4700	/* Re-init to load the changes */
4701	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4702		em_init_locked(adapter);
4703	EM_CORE_UNLOCK(adapter);
4704}
4705
4706static void
4707em_setup_vlan_hw_support(struct adapter *adapter)
4708{
4709	struct e1000_hw *hw = &adapter->hw;
4710	u32             reg;
4711
4712	/*
4713	** We get here thru init_locked, meaning
4714	** a soft reset, this has already cleared
4715	** the VFTA and other state, so if there
4716	** have been no vlan's registered do nothing.
4717	*/
4718	if (adapter->num_vlans == 0)
4719                return;
4720
4721	/*
4722	** A soft reset zero's out the VFTA, so
4723	** we need to repopulate it now.
4724	*/
4725	for (int i = 0; i < EM_VFTA_SIZE; i++)
4726                if (adapter->shadow_vfta[i] != 0)
4727			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4728                            i, adapter->shadow_vfta[i]);
4729
4730	reg = E1000_READ_REG(hw, E1000_CTRL);
4731	reg |= E1000_CTRL_VME;
4732	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4733
4734	/* Enable the Filter Table */
4735	reg = E1000_READ_REG(hw, E1000_RCTL);
4736	reg &= ~E1000_RCTL_CFIEN;
4737	reg |= E1000_RCTL_VFE;
4738	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4739}
4740
4741static void
4742em_enable_intr(struct adapter *adapter)
4743{
4744	struct e1000_hw *hw = &adapter->hw;
4745	u32 ims_mask = IMS_ENABLE_MASK;
4746
4747	if (hw->mac.type == e1000_82574) {
4748		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4749		ims_mask |= EM_MSIX_MASK;
4750	}
4751	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4752}
4753
4754static void
4755em_disable_intr(struct adapter *adapter)
4756{
4757	struct e1000_hw *hw = &adapter->hw;
4758
4759	if (hw->mac.type == e1000_82574)
4760		E1000_WRITE_REG(hw, EM_EIAC, 0);
4761	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4762}
4763
4764/*
4765 * Bit of a misnomer, what this really means is
4766 * to enable OS management of the system... aka
4767 * to disable special hardware management features
4768 */
4769static void
4770em_init_manageability(struct adapter *adapter)
4771{
4772	/* A shared code workaround */
4773#define E1000_82542_MANC2H E1000_MANC2H
4774	if (adapter->has_manage) {
4775		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4776		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4777
4778		/* disable hardware interception of ARP */
4779		manc &= ~(E1000_MANC_ARP_EN);
4780
4781                /* enable receiving management packets to the host */
4782		manc |= E1000_MANC_EN_MNG2HOST;
4783#define E1000_MNG2HOST_PORT_623 (1 << 5)
4784#define E1000_MNG2HOST_PORT_664 (1 << 6)
4785		manc2h |= E1000_MNG2HOST_PORT_623;
4786		manc2h |= E1000_MNG2HOST_PORT_664;
4787		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4788		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4789	}
4790}
4791
4792/*
4793 * Give control back to hardware management
4794 * controller if there is one.
4795 */
4796static void
4797em_release_manageability(struct adapter *adapter)
4798{
4799	if (adapter->has_manage) {
4800		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4801
4802		/* re-enable hardware interception of ARP */
4803		manc |= E1000_MANC_ARP_EN;
4804		manc &= ~E1000_MANC_EN_MNG2HOST;
4805
4806		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4807	}
4808}
4809
4810/*
4811 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4812 * For ASF and Pass Through versions of f/w this means
4813 * that the driver is loaded. For AMT version type f/w
4814 * this means that the network i/f is open.
4815 */
4816static void
4817em_get_hw_control(struct adapter *adapter)
4818{
4819	u32 ctrl_ext, swsm;
4820
4821	if (adapter->hw.mac.type == e1000_82573) {
4822		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4823		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4824		    swsm | E1000_SWSM_DRV_LOAD);
4825		return;
4826	}
4827	/* else */
4828	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4829	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4830	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4831	return;
4832}
4833
4834/*
4835 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4836 * For ASF and Pass Through versions of f/w this means that
4837 * the driver is no longer loaded. For AMT versions of the
4838 * f/w this means that the network i/f is closed.
4839 */
4840static void
4841em_release_hw_control(struct adapter *adapter)
4842{
4843	u32 ctrl_ext, swsm;
4844
4845	if (!adapter->has_manage)
4846		return;
4847
4848	if (adapter->hw.mac.type == e1000_82573) {
4849		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4850		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4851		    swsm & ~E1000_SWSM_DRV_LOAD);
4852		return;
4853	}
4854	/* else */
4855	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4856	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4857	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4858	return;
4859}
4860
4861static int
4862em_is_valid_ether_addr(u8 *addr)
4863{
4864	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4865
4866	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4867		return (FALSE);
4868	}
4869
4870	return (TRUE);
4871}
4872
4873/*
4874** Parse the interface capabilities with regard
4875** to both system management and wake-on-lan for
4876** later use.
4877*/
4878static void
4879em_get_wakeup(device_t dev)
4880{
4881	struct adapter	*adapter = device_get_softc(dev);
4882	u16		eeprom_data = 0, device_id, apme_mask;
4883
4884	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4885	apme_mask = EM_EEPROM_APME;
4886
4887	switch (adapter->hw.mac.type) {
4888	case e1000_82573:
4889	case e1000_82583:
4890		adapter->has_amt = TRUE;
4891		/* Falls thru */
4892	case e1000_82571:
4893	case e1000_82572:
4894	case e1000_80003es2lan:
4895		if (adapter->hw.bus.func == 1) {
4896			e1000_read_nvm(&adapter->hw,
4897			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4898			break;
4899		} else
4900			e1000_read_nvm(&adapter->hw,
4901			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4902		break;
4903	case e1000_ich8lan:
4904	case e1000_ich9lan:
4905	case e1000_ich10lan:
4906	case e1000_pchlan:
4907	case e1000_pch2lan:
4908		apme_mask = E1000_WUC_APME;
4909		adapter->has_amt = TRUE;
4910		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4911		break;
4912	default:
4913		e1000_read_nvm(&adapter->hw,
4914		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4915		break;
4916	}
4917	if (eeprom_data & apme_mask)
4918		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4919	/*
4920         * We have the eeprom settings, now apply the special cases
4921         * where the eeprom may be wrong or the board won't support
4922         * wake on lan on a particular port
4923	 */
4924	device_id = pci_get_device(dev);
4925        switch (device_id) {
4926	case E1000_DEV_ID_82571EB_FIBER:
4927		/* Wake events only supported on port A for dual fiber
4928		 * regardless of eeprom setting */
4929		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4930		    E1000_STATUS_FUNC_1)
4931			adapter->wol = 0;
4932		break;
4933	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4934	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4935	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4936                /* if quad port adapter, disable WoL on all but port A */
4937		if (global_quad_port_a != 0)
4938			adapter->wol = 0;
4939		/* Reset for multiple quad port adapters */
4940		if (++global_quad_port_a == 4)
4941			global_quad_port_a = 0;
4942                break;
4943	}
4944	return;
4945}
4946
4947
4948/*
4949 * Enable PCI Wake On Lan capability
4950 */
4951static void
4952em_enable_wakeup(device_t dev)
4953{
4954	struct adapter	*adapter = device_get_softc(dev);
4955	struct ifnet	*ifp = adapter->ifp;
4956	u32		pmc, ctrl, ctrl_ext, rctl;
4957	u16     	status;
4958
4959	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4960		return;
4961
4962	/* Advertise the wakeup capability */
4963	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4964	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4965	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4966	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4967
4968	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4969	    (adapter->hw.mac.type == e1000_pchlan) ||
4970	    (adapter->hw.mac.type == e1000_ich9lan) ||
4971	    (adapter->hw.mac.type == e1000_ich10lan))
4972		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4973
4974	/* Keep the laser running on Fiber adapters */
4975	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4976	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4977		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4978		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4979		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4980	}
4981
4982	/*
4983	** Determine type of Wakeup: note that wol
4984	** is set with all bits on by default.
4985	*/
4986	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4987		adapter->wol &= ~E1000_WUFC_MAG;
4988
4989	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4990		adapter->wol &= ~E1000_WUFC_MC;
4991	else {
4992		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4993		rctl |= E1000_RCTL_MPE;
4994		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4995	}
4996
4997	if ((adapter->hw.mac.type == e1000_pchlan) ||
4998	    (adapter->hw.mac.type == e1000_pch2lan)) {
4999		if (em_enable_phy_wakeup(adapter))
5000			return;
5001	} else {
5002		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5003		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5004	}
5005
5006	if (adapter->hw.phy.type == e1000_phy_igp_3)
5007		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5008
5009        /* Request PME */
5010        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5011	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5012	if (ifp->if_capenable & IFCAP_WOL)
5013		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5014        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5015
5016	return;
5017}
5018
5019/*
5020** WOL in the newer chipset interfaces (pchlan)
5021** require thing to be copied into the phy
5022*/
5023static int
5024em_enable_phy_wakeup(struct adapter *adapter)
5025{
5026	struct e1000_hw *hw = &adapter->hw;
5027	u32 mreg, ret = 0;
5028	u16 preg;
5029
5030	/* copy MAC RARs to PHY RARs */
5031	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5032
5033	/* copy MAC MTA to PHY MTA */
5034	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5035		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5036		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5037		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5038		    (u16)((mreg >> 16) & 0xFFFF));
5039	}
5040
5041	/* configure PHY Rx Control register */
5042	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5043	mreg = E1000_READ_REG(hw, E1000_RCTL);
5044	if (mreg & E1000_RCTL_UPE)
5045		preg |= BM_RCTL_UPE;
5046	if (mreg & E1000_RCTL_MPE)
5047		preg |= BM_RCTL_MPE;
5048	preg &= ~(BM_RCTL_MO_MASK);
5049	if (mreg & E1000_RCTL_MO_3)
5050		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5051				<< BM_RCTL_MO_SHIFT);
5052	if (mreg & E1000_RCTL_BAM)
5053		preg |= BM_RCTL_BAM;
5054	if (mreg & E1000_RCTL_PMCF)
5055		preg |= BM_RCTL_PMCF;
5056	mreg = E1000_READ_REG(hw, E1000_CTRL);
5057	if (mreg & E1000_CTRL_RFCE)
5058		preg |= BM_RCTL_RFCE;
5059	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5060
5061	/* enable PHY wakeup in MAC register */
5062	E1000_WRITE_REG(hw, E1000_WUC,
5063	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5064	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5065
5066	/* configure and enable PHY wakeup in PHY registers */
5067	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5068	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5069
5070	/* activate PHY wakeup */
5071	ret = hw->phy.ops.acquire(hw);
5072	if (ret) {
5073		printf("Could not acquire PHY\n");
5074		return ret;
5075	}
5076	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5077	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5078	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5079	if (ret) {
5080		printf("Could not read PHY page 769\n");
5081		goto out;
5082	}
5083	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5084	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5085	if (ret)
5086		printf("Could not set PHY Host Wakeup bit\n");
5087out:
5088	hw->phy.ops.release(hw);
5089
5090	return ret;
5091}
5092
5093static void
5094em_led_func(void *arg, int onoff)
5095{
5096	struct adapter	*adapter = arg;
5097
5098	EM_CORE_LOCK(adapter);
5099	if (onoff) {
5100		e1000_setup_led(&adapter->hw);
5101		e1000_led_on(&adapter->hw);
5102	} else {
5103		e1000_led_off(&adapter->hw);
5104		e1000_cleanup_led(&adapter->hw);
5105	}
5106	EM_CORE_UNLOCK(adapter);
5107}
5108
5109/*
5110** Disable the L0S and L1 LINK states
5111*/
5112static void
5113em_disable_aspm(struct adapter *adapter)
5114{
5115	int		base, reg;
5116	u16		link_cap,link_ctrl;
5117	device_t	dev = adapter->dev;
5118
5119	switch (adapter->hw.mac.type) {
5120		case e1000_82573:
5121		case e1000_82574:
5122		case e1000_82583:
5123			break;
5124		default:
5125			return;
5126	}
5127	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5128		return;
5129	reg = base + PCIER_LINK_CAP;
5130	link_cap = pci_read_config(dev, reg, 2);
5131	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5132		return;
5133	reg = base + PCIER_LINK_CTL;
5134	link_ctrl = pci_read_config(dev, reg, 2);
5135	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5136	pci_write_config(dev, reg, link_ctrl, 2);
5137	return;
5138}
5139
5140/**********************************************************************
5141 *
5142 *  Update the board statistics counters.
5143 *
5144 **********************************************************************/
5145static void
5146em_update_stats_counters(struct adapter *adapter)
5147{
5148	struct ifnet   *ifp;
5149
5150	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5151	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5152		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5153		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5154	}
5155	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5156	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5157	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5158	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5159
5160	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5161	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5162	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5163	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5164	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5165	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5166	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5167	/*
5168	** For watchdog management we need to know if we have been
5169	** paused during the last interval, so capture that here.
5170	*/
5171	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5172	adapter->stats.xoffrxc += adapter->pause_frames;
5173	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5174	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5175	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5176	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5177	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5178	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5179	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5180	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5181	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5182	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5183	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5184	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5185
5186	/* For the 64-bit byte counters the low dword must be read first. */
5187	/* Both registers clear on the read of the high dword */
5188
5189	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5190	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5191	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5192	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5193
5194	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5195	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5196	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5197	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5198	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5199
5200	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5201	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5202
5203	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5204	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5205	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5206	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5207	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5208	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5209	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5210	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5211	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5212	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5213
5214	/* Interrupt Counts */
5215
5216	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5217	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5218	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5219	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5220	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5221	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5222	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5223	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5224	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5225
5226	if (adapter->hw.mac.type >= e1000_82543) {
5227		adapter->stats.algnerrc +=
5228		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5229		adapter->stats.rxerrc +=
5230		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5231		adapter->stats.tncrs +=
5232		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5233		adapter->stats.cexterr +=
5234		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5235		adapter->stats.tsctc +=
5236		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5237		adapter->stats.tsctfc +=
5238		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5239	}
5240	ifp = adapter->ifp;
5241
5242	ifp->if_collisions = adapter->stats.colc;
5243
5244	/* Rx Errors */
5245	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5246	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5247	    adapter->stats.ruc + adapter->stats.roc +
5248	    adapter->stats.mpc + adapter->stats.cexterr;
5249
5250	/* Tx Errors */
5251	ifp->if_oerrors = adapter->stats.ecol +
5252	    adapter->stats.latecol + adapter->watchdog_events;
5253}
5254
5255/* Export a single 32-bit register via a read-only sysctl. */
5256static int
5257em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5258{
5259	struct adapter *adapter;
5260	u_int val;
5261
5262	adapter = oidp->oid_arg1;
5263	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5264	return (sysctl_handle_int(oidp, &val, 0, req));
5265}
5266
5267/*
5268 * Add sysctl variables, one per statistic, to the system.
5269 */
5270static void
5271em_add_hw_stats(struct adapter *adapter)
5272{
5273	device_t dev = adapter->dev;
5274
5275	struct tx_ring *txr = adapter->tx_rings;
5276	struct rx_ring *rxr = adapter->rx_rings;
5277
5278	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5279	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5280	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5281	struct e1000_hw_stats *stats = &adapter->stats;
5282
5283	struct sysctl_oid *stat_node, *queue_node, *int_node;
5284	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5285
5286#define QUEUE_NAME_LEN 32
5287	char namebuf[QUEUE_NAME_LEN];
5288
5289	/* Driver Statistics */
5290	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5291			CTLFLAG_RD, &adapter->link_irq,
5292			"Link MSIX IRQ Handled");
5293	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5294			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5295			 "Std mbuf failed");
5296	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5297			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5298			 "Std mbuf cluster failed");
5299	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5300			CTLFLAG_RD, &adapter->dropped_pkts,
5301			"Driver dropped packets");
5302	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5303			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5304			"Driver tx dma failure in xmit");
5305	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5306			CTLFLAG_RD, &adapter->rx_overruns,
5307			"RX overruns");
5308	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5309			CTLFLAG_RD, &adapter->watchdog_events,
5310			"Watchdog timeouts");
5311
5312	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5313			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5314			em_sysctl_reg_handler, "IU",
5315			"Device Control Register");
5316	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5317			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5318			em_sysctl_reg_handler, "IU",
5319			"Receiver Control Register");
5320	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5321			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5322			"Flow Control High Watermark");
5323	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5324			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5325			"Flow Control Low Watermark");
5326
5327	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5328		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5329		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5330					    CTLFLAG_RD, NULL, "Queue Name");
5331		queue_list = SYSCTL_CHILDREN(queue_node);
5332
5333		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5334				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5335				E1000_TDH(txr->me),
5336				em_sysctl_reg_handler, "IU",
5337 				"Transmit Descriptor Head");
5338		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5339				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5340				E1000_TDT(txr->me),
5341				em_sysctl_reg_handler, "IU",
5342 				"Transmit Descriptor Tail");
5343		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5344				CTLFLAG_RD, &txr->tx_irq,
5345				"Queue MSI-X Transmit Interrupts");
5346		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5347				CTLFLAG_RD, &txr->no_desc_avail,
5348				"Queue No Descriptor Available");
5349
5350		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5351				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5352				E1000_RDH(rxr->me),
5353				em_sysctl_reg_handler, "IU",
5354				"Receive Descriptor Head");
5355		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5356				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5357				E1000_RDT(rxr->me),
5358				em_sysctl_reg_handler, "IU",
5359				"Receive Descriptor Tail");
5360		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5361				CTLFLAG_RD, &rxr->rx_irq,
5362				"Queue MSI-X Receive Interrupts");
5363	}
5364
5365	/* MAC stats get their own sub node */
5366
5367	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5368				    CTLFLAG_RD, NULL, "Statistics");
5369	stat_list = SYSCTL_CHILDREN(stat_node);
5370
5371	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5372			CTLFLAG_RD, &stats->ecol,
5373			"Excessive collisions");
5374	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5375			CTLFLAG_RD, &stats->scc,
5376			"Single collisions");
5377	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5378			CTLFLAG_RD, &stats->mcc,
5379			"Multiple collisions");
5380	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5381			CTLFLAG_RD, &stats->latecol,
5382			"Late collisions");
5383	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5384			CTLFLAG_RD, &stats->colc,
5385			"Collision Count");
5386	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5387			CTLFLAG_RD, &adapter->stats.symerrs,
5388			"Symbol Errors");
5389	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5390			CTLFLAG_RD, &adapter->stats.sec,
5391			"Sequence Errors");
5392	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5393			CTLFLAG_RD, &adapter->stats.dc,
5394			"Defer Count");
5395	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5396			CTLFLAG_RD, &adapter->stats.mpc,
5397			"Missed Packets");
5398	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5399			CTLFLAG_RD, &adapter->stats.rnbc,
5400			"Receive No Buffers");
5401	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5402			CTLFLAG_RD, &adapter->stats.ruc,
5403			"Receive Undersize");
5404	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5405			CTLFLAG_RD, &adapter->stats.rfc,
5406			"Fragmented Packets Received ");
5407	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5408			CTLFLAG_RD, &adapter->stats.roc,
5409			"Oversized Packets Received");
5410	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5411			CTLFLAG_RD, &adapter->stats.rjc,
5412			"Recevied Jabber");
5413	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5414			CTLFLAG_RD, &adapter->stats.rxerrc,
5415			"Receive Errors");
5416	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5417			CTLFLAG_RD, &adapter->stats.crcerrs,
5418			"CRC errors");
5419	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5420			CTLFLAG_RD, &adapter->stats.algnerrc,
5421			"Alignment Errors");
5422	/* On 82575 these are collision counts */
5423	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5424			CTLFLAG_RD, &adapter->stats.cexterr,
5425			"Collision/Carrier extension errors");
5426	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5427			CTLFLAG_RD, &adapter->stats.xonrxc,
5428			"XON Received");
5429	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5430			CTLFLAG_RD, &adapter->stats.xontxc,
5431			"XON Transmitted");
5432	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5433			CTLFLAG_RD, &adapter->stats.xoffrxc,
5434			"XOFF Received");
5435	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5436			CTLFLAG_RD, &adapter->stats.xofftxc,
5437			"XOFF Transmitted");
5438
5439	/* Packet Reception Stats */
5440	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5441			CTLFLAG_RD, &adapter->stats.tpr,
5442			"Total Packets Received ");
5443	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5444			CTLFLAG_RD, &adapter->stats.gprc,
5445			"Good Packets Received");
5446	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5447			CTLFLAG_RD, &adapter->stats.bprc,
5448			"Broadcast Packets Received");
5449	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5450			CTLFLAG_RD, &adapter->stats.mprc,
5451			"Multicast Packets Received");
5452	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5453			CTLFLAG_RD, &adapter->stats.prc64,
5454			"64 byte frames received ");
5455	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5456			CTLFLAG_RD, &adapter->stats.prc127,
5457			"65-127 byte frames received");
5458	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5459			CTLFLAG_RD, &adapter->stats.prc255,
5460			"128-255 byte frames received");
5461	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5462			CTLFLAG_RD, &adapter->stats.prc511,
5463			"256-511 byte frames received");
5464	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5465			CTLFLAG_RD, &adapter->stats.prc1023,
5466			"512-1023 byte frames received");
5467	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5468			CTLFLAG_RD, &adapter->stats.prc1522,
5469			"1023-1522 byte frames received");
5470 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5471 			CTLFLAG_RD, &adapter->stats.gorc,
5472 			"Good Octets Received");
5473
5474	/* Packet Transmission Stats */
5475 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5476 			CTLFLAG_RD, &adapter->stats.gotc,
5477 			"Good Octets Transmitted");
5478	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5479			CTLFLAG_RD, &adapter->stats.tpt,
5480			"Total Packets Transmitted");
5481	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5482			CTLFLAG_RD, &adapter->stats.gptc,
5483			"Good Packets Transmitted");
5484	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5485			CTLFLAG_RD, &adapter->stats.bptc,
5486			"Broadcast Packets Transmitted");
5487	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5488			CTLFLAG_RD, &adapter->stats.mptc,
5489			"Multicast Packets Transmitted");
5490	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5491			CTLFLAG_RD, &adapter->stats.ptc64,
5492			"64 byte frames transmitted ");
5493	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5494			CTLFLAG_RD, &adapter->stats.ptc127,
5495			"65-127 byte frames transmitted");
5496	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5497			CTLFLAG_RD, &adapter->stats.ptc255,
5498			"128-255 byte frames transmitted");
5499	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5500			CTLFLAG_RD, &adapter->stats.ptc511,
5501			"256-511 byte frames transmitted");
5502	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5503			CTLFLAG_RD, &adapter->stats.ptc1023,
5504			"512-1023 byte frames transmitted");
5505	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5506			CTLFLAG_RD, &adapter->stats.ptc1522,
5507			"1024-1522 byte frames transmitted");
5508	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5509			CTLFLAG_RD, &adapter->stats.tsctc,
5510			"TSO Contexts Transmitted");
5511	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5512			CTLFLAG_RD, &adapter->stats.tsctfc,
5513			"TSO Contexts Failed");
5514
5515
5516	/* Interrupt Stats */
5517
5518	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5519				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5520	int_list = SYSCTL_CHILDREN(int_node);
5521
5522	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5523			CTLFLAG_RD, &adapter->stats.iac,
5524			"Interrupt Assertion Count");
5525
5526	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5527			CTLFLAG_RD, &adapter->stats.icrxptc,
5528			"Interrupt Cause Rx Pkt Timer Expire Count");
5529
5530	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5531			CTLFLAG_RD, &adapter->stats.icrxatc,
5532			"Interrupt Cause Rx Abs Timer Expire Count");
5533
5534	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5535			CTLFLAG_RD, &adapter->stats.ictxptc,
5536			"Interrupt Cause Tx Pkt Timer Expire Count");
5537
5538	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5539			CTLFLAG_RD, &adapter->stats.ictxatc,
5540			"Interrupt Cause Tx Abs Timer Expire Count");
5541
5542	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5543			CTLFLAG_RD, &adapter->stats.ictxqec,
5544			"Interrupt Cause Tx Queue Empty Count");
5545
5546	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5547			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5548			"Interrupt Cause Tx Queue Min Thresh Count");
5549
5550	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5551			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5552			"Interrupt Cause Rx Desc Min Thresh Count");
5553
5554	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5555			CTLFLAG_RD, &adapter->stats.icrxoc,
5556			"Interrupt Cause Receiver Overrun Count");
5557}
5558
5559/**********************************************************************
5560 *
5561 *  This routine provides a way to dump out the adapter eeprom,
5562 *  often a useful debug/service tool. This only dumps the first
5563 *  32 words, stuff that matters is in that extent.
5564 *
5565 **********************************************************************/
5566static int
5567em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5568{
5569	struct adapter *adapter = (struct adapter *)arg1;
5570	int error;
5571	int result;
5572
5573	result = -1;
5574	error = sysctl_handle_int(oidp, &result, 0, req);
5575
5576	if (error || !req->newptr)
5577		return (error);
5578
5579	/*
5580	 * This value will cause a hex dump of the
5581	 * first 32 16-bit words of the EEPROM to
5582	 * the screen.
5583	 */
5584	if (result == 1)
5585		em_print_nvm_info(adapter);
5586
5587	return (error);
5588}
5589
5590static void
5591em_print_nvm_info(struct adapter *adapter)
5592{
5593	u16	eeprom_data;
5594	int	i, j, row = 0;
5595
5596	/* Its a bit crude, but it gets the job done */
5597	printf("\nInterface EEPROM Dump:\n");
5598	printf("Offset\n0x0000  ");
5599	for (i = 0, j = 0; i < 32; i++, j++) {
5600		if (j == 8) { /* Make the offset block */
5601			j = 0; ++row;
5602			printf("\n0x00%x0  ",row);
5603		}
5604		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5605		printf("%04x ", eeprom_data);
5606	}
5607	printf("\n");
5608}
5609
5610static int
5611em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5612{
5613	struct em_int_delay_info *info;
5614	struct adapter *adapter;
5615	u32 regval;
5616	int error, usecs, ticks;
5617
5618	info = (struct em_int_delay_info *)arg1;
5619	usecs = info->value;
5620	error = sysctl_handle_int(oidp, &usecs, 0, req);
5621	if (error != 0 || req->newptr == NULL)
5622		return (error);
5623	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5624		return (EINVAL);
5625	info->value = usecs;
5626	ticks = EM_USECS_TO_TICKS(usecs);
5627	if (info->offset == E1000_ITR)	/* units are 256ns here */
5628		ticks *= 4;
5629
5630	adapter = info->adapter;
5631
5632	EM_CORE_LOCK(adapter);
5633	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5634	regval = (regval & ~0xffff) | (ticks & 0xffff);
5635	/* Handle a few special cases. */
5636	switch (info->offset) {
5637	case E1000_RDTR:
5638		break;
5639	case E1000_TIDV:
5640		if (ticks == 0) {
5641			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5642			/* Don't write 0 into the TIDV register. */
5643			regval++;
5644		} else
5645			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5646		break;
5647	}
5648	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5649	EM_CORE_UNLOCK(adapter);
5650	return (0);
5651}
5652
5653static void
5654em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5655	const char *description, struct em_int_delay_info *info,
5656	int offset, int value)
5657{
5658	info->adapter = adapter;
5659	info->offset = offset;
5660	info->value = value;
5661	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5662	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5663	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5664	    info, 0, em_sysctl_int_delay, "I", description);
5665}
5666
5667static void
5668em_set_sysctl_value(struct adapter *adapter, const char *name,
5669	const char *description, int *limit, int value)
5670{
5671	*limit = value;
5672	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5673	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5674	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5675}
5676
5677
5678/*
5679** Set flow control using sysctl:
5680** Flow control values:
5681**      0 - off
5682**      1 - rx pause
5683**      2 - tx pause
5684**      3 - full
5685*/
5686static int
5687em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5688{
5689        int		error;
5690	static int	input = 3; /* default is full */
5691        struct adapter	*adapter = (struct adapter *) arg1;
5692
5693        error = sysctl_handle_int(oidp, &input, 0, req);
5694
5695        if ((error) || (req->newptr == NULL))
5696                return (error);
5697
5698	if (input == adapter->fc) /* no change? */
5699		return (error);
5700
5701        switch (input) {
5702                case e1000_fc_rx_pause:
5703                case e1000_fc_tx_pause:
5704                case e1000_fc_full:
5705                case e1000_fc_none:
5706                        adapter->hw.fc.requested_mode = input;
5707			adapter->fc = input;
5708                        break;
5709                default:
5710			/* Do nothing */
5711			return (error);
5712        }
5713
5714        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5715        e1000_force_mac_fc(&adapter->hw);
5716        return (error);
5717}
5718
5719/*
5720** Manage Energy Efficient Ethernet:
5721** Control values:
5722**     0/1 - enabled/disabled
5723*/
5724static int
5725em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5726{
5727       struct adapter *adapter = (struct adapter *) arg1;
5728       int             error, value;
5729
5730       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5731       error = sysctl_handle_int(oidp, &value, 0, req);
5732       if (error || req->newptr == NULL)
5733               return (error);
5734       EM_CORE_LOCK(adapter);
5735       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5736       em_init_locked(adapter);
5737       EM_CORE_UNLOCK(adapter);
5738       return (0);
5739}
5740
5741static int
5742em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5743{
5744	struct adapter *adapter;
5745	int error;
5746	int result;
5747
5748	result = -1;
5749	error = sysctl_handle_int(oidp, &result, 0, req);
5750
5751	if (error || !req->newptr)
5752		return (error);
5753
5754	if (result == 1) {
5755		adapter = (struct adapter *)arg1;
5756		em_print_debug_info(adapter);
5757        }
5758
5759	return (error);
5760}
5761
5762/*
5763** This routine is meant to be fluid, add whatever is
5764** needed for debugging a problem.  -jfv
5765*/
5766static void
5767em_print_debug_info(struct adapter *adapter)
5768{
5769	device_t dev = adapter->dev;
5770	struct tx_ring *txr = adapter->tx_rings;
5771	struct rx_ring *rxr = adapter->rx_rings;
5772
5773	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5774		printf("Interface is RUNNING ");
5775	else
5776		printf("Interface is NOT RUNNING\n");
5777
5778	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5779		printf("and INACTIVE\n");
5780	else
5781		printf("and ACTIVE\n");
5782
5783	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5784	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5785	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5786	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5787	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5788	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5789	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5790	device_printf(dev, "TX descriptors avail = %d\n",
5791	    txr->tx_avail);
5792	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5793	    txr->no_desc_avail);
5794	device_printf(dev, "RX discarded packets = %ld\n",
5795	    rxr->rx_discarded);
5796	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5797	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5798}
5799